Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/boost_1_33_1/libs/serialization/test/test_utf8_codecvt.cpp @ 12

Last change on this file since 12 was 12, checked in by landauf, 18 years ago

added boost

File size: 6.2 KB
Line 
1/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
2// test_utf8_codecvt.cpp
3
4// (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
5// Use, modification and distribution is subject to the Boost Software
6// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
7// http://www.boost.org/LICENSE_1_0.txt)
8
9#include <algorithm>
10#include <fstream>
11#include <iostream>
12#include <iterator>
13#include <locale>
14#include <vector>
15#include <string>
16
17#include <cstddef> // size_t
18#include <boost/config.hpp>
19#if defined(BOOST_NO_STDC_NAMESPACE)
20namespace std{ 
21    using ::size_t; 
22} // namespace std
23#endif
24
25#include <cwchar>
26#ifdef BOOST_NO_STDC_NAMESPACE
27namespace std{ using ::wcslen; }
28#endif
29
30#include <boost/test/test_tools.hpp>
31#include <boost/archive/iterators/istream_iterator.hpp>
32#include <boost/archive/iterators/ostream_iterator.hpp>
33
34#include <boost/archive/add_facet.hpp>
35#include <boost/archive/detail/utf8_codecvt_facet.hpp>
36
37template<std::size_t s>
38struct test_data
39{
40    static unsigned char utf8_encoding[];
41    static wchar_t wchar_encoding[];
42};
43
44template<>
45unsigned char test_data<2>::utf8_encoding[] = {
46    0x01,
47    0x7f,
48    0xc2, 0x80,
49    0xdf, 0xbf,
50    0xe0, 0xa0, 0x80,
51    0xe7, 0xbf, 0xbf
52};
53
54template<>
55wchar_t test_data<2>::wchar_encoding[] = {
56    0x0001,
57    0x007f,
58    0x0080,
59    0x07ff,
60    0x0800,
61    0x7fff
62};
63
64template<>
65unsigned char test_data<4>::utf8_encoding[] = {
66    0x01,
67    0x7f,
68    0xc2, 0x80,
69    0xdf, 0xbf,
70    0xe0, 0xa0, 0x80,
71    0xef, 0xbf, 0xbf,
72    0xf0, 0x90, 0x80, 0x80,
73    0xf4, 0x8f, 0xbf, 0xbf,
74    0xf7, 0xbf, 0xbf, 0xbf,
75    0xf8, 0x88, 0x80, 0x80, 0x80,
76    0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
77    0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
78    0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
79};
80
81template<>
82wchar_t test_data<4>::wchar_encoding[] = {
83    0x00000001,
84    0x0000007f,
85    0x00000080,
86    0x000007ff,
87    0x00000800,
88    0x0000ffff,
89    0x00010000,
90    0x0010ffff,
91    0x001fffff,
92    0x00200000,
93    0x03ffffff,
94    0x04000000,
95    0x7fffffff
96};
97
98int
99test_main(int /* argc */, char * /* argv */[]) {
100    std::locale old_loc;
101    std::locale * utf8_locale
102        = boost::archive::add_facet(
103            old_loc, 
104            new boost::archive::detail::utf8_codecvt_facet
105        );
106
107    typedef char utf8_t;
108    typedef test_data<sizeof(wchar_t)> td;
109
110    // Send our test UTF-8 data to file
111    {
112        std::ofstream ofs;
113        ofs.open("test.dat", std::ios::binary);
114        std::copy(
115            td::utf8_encoding,
116            #if ! defined(__BORLANDC__)
117                // borland 5.60 complains about this
118                td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
119            #else
120                // so use this instead
121                td::utf8_encoding + 12,
122            #endif
123            boost::archive::iterators::ostream_iterator<utf8_t>(ofs)
124        );
125    }
126
127    // Read the test data back in, converting to UCS-4 on the way in
128    std::vector<wchar_t> from_file;
129    {
130        std::wifstream ifs;
131        ifs.imbue(*utf8_locale);
132        ifs.open("test.dat");
133
134        wchar_t item = 0;
135        // note can't use normal vector from iterator constructor because
136        // dinkumware doesn't have it.
137        for(;;){
138            item = ifs.get();
139            if(item == WEOF)
140                break;
141            //ifs >> item;
142            //if(ifs.eof())
143            //    break;
144            from_file.push_back(item);
145        }
146    }
147
148    // compare the data read back in with the orginal
149    #if ! defined(__BORLANDC__)
150        // borland 5.60 complains about this
151        BOOST_CHECK(from_file.size() == sizeof(td::wchar_encoding)/sizeof(wchar_t));
152    #else
153        // so use this instead
154        BOOST_CHECK(from_file.size() == 6);
155    #endif
156
157    BOOST_CHECK(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
158 
159    // Send the UCS4_data back out, converting to UTF-8
160    {
161        std::wofstream ofs;
162        ofs.imbue(*utf8_locale);
163        ofs.open("test2.dat");
164        std::copy(
165            from_file.begin(),
166            from_file.end(),
167            boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
168        );
169    }
170
171    // Make sure that both files are the same
172    {
173        typedef boost::archive::iterators::istream_iterator<utf8_t> is_iter;
174        is_iter end_iter;
175
176        std::ifstream ifs1("test.dat");
177        is_iter it1(ifs1);
178        std::vector<utf8_t> data1;
179        std::copy(it1, end_iter, std::back_inserter(data1));
180
181        std::ifstream ifs2("test2.dat");
182        is_iter it2(ifs2);
183        std::vector<utf8_t> data2;
184        std::copy(it2, end_iter, std::back_inserter(data2));
185
186        BOOST_CHECK(data1 == data2);
187    }
188
189    // some libraries have trouble that only shows up with longer strings
190   
191    wchar_t * test3_data = L"\
192    <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
193    <!DOCTYPE boost_serialization>\
194    <boost_serialization signature=\"serialization::archive\" version=\"3\">\
195    <a class_id=\"0\" tracking_level=\"0\">\
196        <b>1</b>\
197        <f>96953204</f>\
198        <g>177129195</g>\
199        <l>1</l>\
200        <m>5627</m>\
201        <n>23010</n>\
202        <o>7419</o>\
203        <p>16212</p>\
204        <q>4086</q>\
205        <r>2749</r>\
206        <c>-33</c>\
207        <s>124</s>\
208        <t>28</t>\
209        <u>32225</u>\
210        <v>17543</v>\
211        <w>0.84431422</w>\
212        <x>1.0170664757130923</x>\
213        <y>tjbx</y>\
214        <z>cuwjentqpkejp</z>\
215    </a>\
216    </boost_serialization>\
217    ";
218   
219    // Send the UCS4_data back out, converting to UTF-8
220    std::size_t l = std::wcslen(test3_data);
221    {
222        std::wofstream ofs;
223        ofs.imbue(*utf8_locale);
224        ofs.open("test3.dat");
225        std::copy(
226            test3_data,
227            test3_data + l,
228            boost::archive::iterators::ostream_iterator<wchar_t>(ofs)
229        );
230    }
231
232    // Make sure that both files are the same
233    {
234        std::wifstream ifs;
235        ifs.imbue(*utf8_locale);
236        ifs.open("test3.dat");
237        BOOST_CHECK(
238            std::equal(
239                test3_data,
240                test3_data + l,
241                boost::archive::iterators::istream_iterator<wchar_t>(ifs)
242            )
243        );
244    }
245
246    delete utf8_locale;
247    return EXIT_SUCCESS;
248}
Note: See TracBrowser for help on using the repository browser.