Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: downloads/boost_1_33_1/libs/regex/src/icu.cpp @ 20

Last change on this file since 20 was 12, checked in by landauf, 18 years ago
added boost
File size: 19.9 KB

Rev	Line
[12]	1	/*
	2	*
	3	* Copyright (c) 2004
	4	* John Maddock
	5	*
	6	* Use, modification and distribution are subject to the
	7	* Boost Software License, Version 1.0. (See accompanying file
	8	* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	9	*
	10	*/
	11
	12	/*
	13	* LOCATION: see http://www.boost.org for most recent version.
	14	* FILE icu.cpp
	15	* VERSION see <boost/version.hpp>
	16	* DESCRIPTION: Unicode regular expressions on top of the ICU Library.
	17	*/
	18	#define BOOST_REGEX_SOURCE
	19
	20	#include <boost/regex/config.hpp>
	21	#ifdef BOOST_HAS_ICU
	22	#define BOOST_REGEX_ICU_INSTANTIATE
	23	#include <boost/regex/icu.hpp>
	24
	25	namespace boost{
	26
	27	namespace re_detail{
	28
	29	icu_regex_traits_implementation::string_type icu_regex_traits_implementation::do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const
	30	{
	31	// TODO make thread safe!!!! :
	32	typedef u32_to_u16_iterator<const char_type*, ::UChar> itt;
	33	itt i(p1), j(p2);
	34	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	35	std::vector< ::UChar> t(i, j);
	36	#else
	37	std::vector< ::UChar> t;
	38	while(i != j)
	39	t.push_back(*i++);
	40	#endif
	41	::uint8_t result[100];
	42	::int32_t len;
	43	if(t.size())
	44	len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result));
	45	else
	46	len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), result, sizeof(result));
	47	if(std::size_t(len) > sizeof(result))
	48	{
	49	scoped_array< ::uint8_t> presult(new ::uint8_t[len+1]);
	50	if(t.size())
	51	len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1);
	52	else
	53	len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), presult.get(), len+1);
	54	if((0 == presult[len-1]) && (len > 1))
	55	--len;
	56	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	57	return string_type(presult.get(), presult.get()+len);
	58	#else
	59	string_type sresult;
	60	::uint8_t const* ia = presult.get();
	61	::uint8_t const* ib = presult.get()+len;
	62	while(ia != ib)
	63	sresult.push_back(*ia++);
	64	return sresult;
	65	#endif
	66	}
	67	if((0 == result[len-1]) && (len > 1))
	68	--len;
	69	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	70	return string_type(result, result+len);
	71	#else
	72	string_type sresult;
	73	::uint8_t const* ia = result;
	74	::uint8_t const* ib = result+len;
	75	while(ia != ib)
	76	sresult.push_back(*ia++);
	77	return sresult;
	78	#endif
	79	}
	80
	81	}
	82
	83	icu_regex_traits::size_type icu_regex_traits::length(const char_type* p)
	84	{
	85	size_type result = 0;
	86	while(*p)
	87	{
	88	++p;
	89	++result;
	90	}
	91	return result;
	92	}
	93
	94	//
	95	// define our bitmasks:
	96	//
	97	const icu_regex_traits::char_class_type icu_regex_traits::mask_blank = icu_regex_traits::char_class_type(1) << offset_blank;
	98	const icu_regex_traits::char_class_type icu_regex_traits::mask_space = icu_regex_traits::char_class_type(1) << offset_space;
	99	const icu_regex_traits::char_class_type icu_regex_traits::mask_xdigit = icu_regex_traits::char_class_type(1) << offset_xdigit;
	100	const icu_regex_traits::char_class_type icu_regex_traits::mask_underscore = icu_regex_traits::char_class_type(1) << offset_underscore;
	101	const icu_regex_traits::char_class_type icu_regex_traits::mask_unicode = icu_regex_traits::char_class_type(1) << offset_unicode;
	102	const icu_regex_traits::char_class_type icu_regex_traits::mask_any = icu_regex_traits::char_class_type(1) << offset_any;
	103	const icu_regex_traits::char_class_type icu_regex_traits::mask_ascii = icu_regex_traits::char_class_type(1) << offset_ascii;
	104
	105	icu_regex_traits::char_class_type icu_regex_traits::lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2)
	106	{
	107	static const ::UChar32 prop_name_table[] = {
	108	/* any */ 'a', 'n', 'y',
	109	/* ascii */ 'a', 's', 'c', 'i', 'i',
	110	/* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
	111	/* c* / 'c', '',
	112	/* cc */ 'c', 'c',
	113	/* cf */ 'c', 'f',
	114	/* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	115	/* cn */ 'c', 'n',
	116	/* co */ 'c', 'o',
	117	/* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	118	/* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l',
	119	/* cs */ 'c', 's',
	120	/* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l',
	121	/* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	122	/* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r',
	123	/* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
	124	/* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	125	/* format */ 'f', 'o', 'r', 'm', 'a', 't',
	126	/* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	127	/* l* / 'l', '',
	128	/* letter */ 'l', 'e', 't', 't', 'e', 'r',
	129	/* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
	130	/* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	131	/* ll */ 'l', 'l',
	132	/* lm */ 'l', 'm',
	133	/* lo */ 'l', 'o',
	134	/* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
	135	/* lt */ 'l', 't',
	136	/* lu */ 'l', 'u',
	137	/* m* / 'm', '',
	138	/* mark */ 'm', 'a', 'r', 'k',
	139	/* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l',
	140	/* mc */ 'm', 'c',
	141	/* me */ 'm', 'e',
	142	/* mn */ 'm', 'n',
	143	/* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
	144	/* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
	145	/* n* / 'n', '',
	146	/* nd */ 'n', 'd',
	147	/* nl */ 'n', 'l',
	148	/* no */ 'n', 'o',
	149	/* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
	150	/* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
	151	/* number */ 'n', 'u', 'm', 'b', 'e', 'r',
	152	/* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	153	/* other */ 'o', 't', 'h', 'e', 'r',
	154	/* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
	155	/* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
	156	/* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	157	/* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
	158	/* p* / 'p', '',
	159	/* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	160	/* pc */ 'p', 'c',
	161	/* pd */ 'p', 'd',
	162	/* pe */ 'p', 'e',
	163	/* pf */ 'p', 'f',
	164	/* pi */ 'p', 'i',
	165	/* po */ 'p', 'o',
	166	/* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e',
	167	/* ps */ 'p', 's',
	168	/* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
	169	/* s* / 's', '',
	170	/* sc */ 's', 'c',
	171	/* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	172	/* sk */ 's', 'k',
	173	/* sm */ 's', 'm',
	174	/* so */ 's', 'o',
	175	/* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
	176	/* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
	177	/* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e',
	178	/* symbol */ 's', 'y', 'm', 'b', 'o', 'l',
	179	/* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e',
	180	/* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
	181	/* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
	182	/* z* / 'z', '',
	183	/* zl */ 'z', 'l',
	184	/* zp */ 'z', 'p',
	185	/* zs */ 'z', 's',
	186	};
	187
	188	static const re_detail::character_pointer_range< ::UChar32> range_data[] = {
	189	{ prop_name_table+0, prop_name_table+3, }, // any
	190	{ prop_name_table+3, prop_name_table+8, }, // ascii
	191	{ prop_name_table+8, prop_name_table+16, }, // assigned
	192	{ prop_name_table+16, prop_name_table+18, }, // c*
	193	{ prop_name_table+18, prop_name_table+20, }, // cc
	194	{ prop_name_table+20, prop_name_table+22, }, // cf
	195	{ prop_name_table+22, prop_name_table+38, }, // closepunctuation
	196	{ prop_name_table+38, prop_name_table+40, }, // cn
	197	{ prop_name_table+40, prop_name_table+42, }, // co
	198	{ prop_name_table+42, prop_name_table+62, }, // connectorpunctuation
	199	{ prop_name_table+62, prop_name_table+69, }, // control
	200	{ prop_name_table+69, prop_name_table+71, }, // cs
	201	{ prop_name_table+71, prop_name_table+85, }, // currencysymbol
	202	{ prop_name_table+85, prop_name_table+100, }, // dashpunctuation
	203	{ prop_name_table+100, prop_name_table+118, }, // decimaldigitnumber
	204	{ prop_name_table+118, prop_name_table+131, }, // enclosingmark
	205	{ prop_name_table+131, prop_name_table+147, }, // finalpunctuation
	206	{ prop_name_table+147, prop_name_table+153, }, // format
	207	{ prop_name_table+153, prop_name_table+171, }, // initialpunctuation
	208	{ prop_name_table+171, prop_name_table+173, }, // l*
	209	{ prop_name_table+173, prop_name_table+179, }, // letter
	210	{ prop_name_table+179, prop_name_table+191, }, // letternumber
	211	{ prop_name_table+191, prop_name_table+204, }, // lineseparator
	212	{ prop_name_table+204, prop_name_table+206, }, // ll
	213	{ prop_name_table+206, prop_name_table+208, }, // lm
	214	{ prop_name_table+208, prop_name_table+210, }, // lo
	215	{ prop_name_table+210, prop_name_table+225, }, // lowercaseletter
	216	{ prop_name_table+225, prop_name_table+227, }, // lt
	217	{ prop_name_table+227, prop_name_table+229, }, // lu
	218	{ prop_name_table+229, prop_name_table+231, }, // m*
	219	{ prop_name_table+231, prop_name_table+235, }, // mark
	220	{ prop_name_table+235, prop_name_table+245, }, // mathsymbol
	221	{ prop_name_table+245, prop_name_table+247, }, // mc
	222	{ prop_name_table+247, prop_name_table+249, }, // me
	223	{ prop_name_table+249, prop_name_table+251, }, // mn
	224	{ prop_name_table+251, prop_name_table+265, }, // modifierletter
	225	{ prop_name_table+265, prop_name_table+279, }, // modifiersymbol
	226	{ prop_name_table+279, prop_name_table+281, }, // n*
	227	{ prop_name_table+281, prop_name_table+283, }, // nd
	228	{ prop_name_table+283, prop_name_table+285, }, // nl
	229	{ prop_name_table+285, prop_name_table+287, }, // no
	230	{ prop_name_table+287, prop_name_table+301, }, // nonspacingmark
	231	{ prop_name_table+301, prop_name_table+312, }, // notassigned
	232	{ prop_name_table+312, prop_name_table+318, }, // number
	233	{ prop_name_table+318, prop_name_table+333, }, // openpunctuation
	234	{ prop_name_table+333, prop_name_table+338, }, // other
	235	{ prop_name_table+338, prop_name_table+349, }, // otherletter
	236	{ prop_name_table+349, prop_name_table+360, }, // othernumber
	237	{ prop_name_table+360, prop_name_table+376, }, // otherpunctuation
	238	{ prop_name_table+376, prop_name_table+387, }, // othersymbol
	239	{ prop_name_table+387, prop_name_table+389, }, // p*
	240	{ prop_name_table+389, prop_name_table+407, }, // paragraphseparator
	241	{ prop_name_table+407, prop_name_table+409, }, // pc
	242	{ prop_name_table+409, prop_name_table+411, }, // pd
	243	{ prop_name_table+411, prop_name_table+413, }, // pe
	244	{ prop_name_table+413, prop_name_table+415, }, // pf
	245	{ prop_name_table+415, prop_name_table+417, }, // pi
	246	{ prop_name_table+417, prop_name_table+419, }, // po
	247	{ prop_name_table+419, prop_name_table+429, }, // privateuse
	248	{ prop_name_table+429, prop_name_table+431, }, // ps
	249	{ prop_name_table+431, prop_name_table+442, }, // punctuation
	250	{ prop_name_table+442, prop_name_table+444, }, // s*
	251	{ prop_name_table+444, prop_name_table+446, }, // sc
	252	{ prop_name_table+446, prop_name_table+455, }, // separator
	253	{ prop_name_table+455, prop_name_table+457, }, // sk
	254	{ prop_name_table+457, prop_name_table+459, }, // sm
	255	{ prop_name_table+459, prop_name_table+461, }, // so
	256	{ prop_name_table+461, prop_name_table+475, }, // spaceseparator
	257	{ prop_name_table+475, prop_name_table+495, }, // spacingcombiningmark
	258	{ prop_name_table+495, prop_name_table+504, }, // surrogate
	259	{ prop_name_table+504, prop_name_table+510, }, // symbol
	260	{ prop_name_table+510, prop_name_table+519, }, // titlecase
	261	{ prop_name_table+519, prop_name_table+534, }, // titlecaseletter
	262	{ prop_name_table+534, prop_name_table+549, }, // uppercaseletter
	263	{ prop_name_table+549, prop_name_table+551, }, // z*
	264	{ prop_name_table+551, prop_name_table+553, }, // zl
	265	{ prop_name_table+553, prop_name_table+555, }, // zp
	266	{ prop_name_table+555, prop_name_table+557, }, // zs
	267	};
	268
	269	static const icu_regex_traits::char_class_type icu_class_map[] = {
	270	icu_regex_traits::mask_any, // any
	271	icu_regex_traits::mask_ascii, // ascii
	272	(0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned
	273	U_GC_C_MASK, // c*
	274	U_GC_CC_MASK, // cc
	275	U_GC_CF_MASK, // cf
	276	U_GC_PE_MASK, // closepunctuation
	277	U_GC_CN_MASK, // cn
	278	U_GC_CO_MASK, // co
	279	U_GC_PC_MASK, // connectorpunctuation
	280	U_GC_CC_MASK, // control
	281	U_GC_CS_MASK, // cs
	282	U_GC_SC_MASK, // currencysymbol
	283	U_GC_PD_MASK, // dashpunctuation
	284	U_GC_ND_MASK, // decimaldigitnumber
	285	U_GC_ME_MASK, // enclosingmark
	286	U_GC_PF_MASK, // finalpunctuation
	287	U_GC_CF_MASK, // format
	288	U_GC_PI_MASK, // initialpunctuation
	289	U_GC_L_MASK, // l*
	290	U_GC_L_MASK, // letter
	291	U_GC_NL_MASK, // letternumber
	292	U_GC_ZL_MASK, // lineseparator
	293	U_GC_LL_MASK, // ll
	294	U_GC_LM_MASK, // lm
	295	U_GC_LO_MASK, // lo
	296	U_GC_LL_MASK, // lowercaseletter
	297	U_GC_LT_MASK, // lt
	298	U_GC_LU_MASK, // lu
	299	U_GC_M_MASK, // m*
	300	U_GC_M_MASK, // mark
	301	U_GC_SM_MASK, // mathsymbol
	302	U_GC_MC_MASK, // mc
	303	U_GC_ME_MASK, // me
	304	U_GC_MN_MASK, // mn
	305	U_GC_LM_MASK, // modifierletter
	306	U_GC_SK_MASK, // modifiersymbol
	307	U_GC_N_MASK, // n*
	308	U_GC_ND_MASK, // nd
	309	U_GC_NL_MASK, // nl
	310	U_GC_NO_MASK, // no
	311	U_GC_MN_MASK, // nonspacingmark
	312	U_GC_CN_MASK, // notassigned
	313	U_GC_N_MASK, // number
	314	U_GC_PS_MASK, // openpunctuation
	315	U_GC_C_MASK, // other
	316	U_GC_LO_MASK, // otherletter
	317	U_GC_NO_MASK, // othernumber
	318	U_GC_PO_MASK, // otherpunctuation
	319	U_GC_SO_MASK, // othersymbol
	320	U_GC_P_MASK, // p*
	321	U_GC_ZP_MASK, // paragraphseparator
	322	U_GC_PC_MASK, // pc
	323	U_GC_PD_MASK, // pd
	324	U_GC_PE_MASK, // pe
	325	U_GC_PF_MASK, // pf
	326	U_GC_PI_MASK, // pi
	327	U_GC_PO_MASK, // po
	328	U_GC_CO_MASK, // privateuse
	329	U_GC_PS_MASK, // ps
	330	U_GC_P_MASK, // punctuation
	331	U_GC_S_MASK, // s*
	332	U_GC_SC_MASK, // sc
	333	U_GC_Z_MASK, // separator
	334	U_GC_SK_MASK, // sk
	335	U_GC_SM_MASK, // sm
	336	U_GC_SO_MASK, // so
	337	U_GC_ZS_MASK, // spaceseparator
	338	U_GC_MC_MASK, // spacingcombiningmark
	339	U_GC_CS_MASK, // surrogate
	340	U_GC_S_MASK, // symbol
	341	U_GC_LT_MASK, // titlecase
	342	U_GC_LT_MASK, // titlecaseletter
	343	U_GC_LU_MASK, // uppercaseletter
	344	U_GC_Z_MASK, // z*
	345	U_GC_ZL_MASK, // zl
	346	U_GC_ZP_MASK, // zp
	347	U_GC_ZS_MASK, // zs
	348	};
	349
	350
	351	static const re_detail::character_pointer_range< ::UChar32>* ranges_begin = range_data;
	352	static const re_detail::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data)/sizeof(range_data[0]));
	353
	354	re_detail::character_pointer_range< ::UChar32> t = { p1, p2, };
	355	const re_detail::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t);
	356	if((p != ranges_end) && (t == *p))
	357	return icu_class_map[p - ranges_begin];
	358	return 0;
	359	}
	360
	361	icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_type* p1, const char_type* p2) const
	362	{
	363	static const char_class_type masks[] =
	364	{
	365	0,
	366	U_GC_L_MASK \| U_GC_ND_MASK,
	367	U_GC_L_MASK,
	368	mask_blank,
	369	U_GC_CC_MASK \| U_GC_CF_MASK \| U_GC_ZL_MASK \| U_GC_ZP_MASK,
	370	U_GC_ND_MASK,
	371	U_GC_ND_MASK,
	372	(0x3FFFFFFFu) & ~(U_GC_CC_MASK \| U_GC_CF_MASK \| U_GC_CS_MASK \| U_GC_CN_MASK \| U_GC_Z_MASK),
	373	U_GC_LL_MASK,
	374	U_GC_LL_MASK,
	375	~(U_GC_C_MASK),
	376	U_GC_P_MASK,
	377	char_class_type(U_GC_Z_MASK) \| mask_space,
	378	char_class_type(U_GC_Z_MASK) \| mask_space,
	379	U_GC_LU_MASK,
	380	mask_unicode,
	381	U_GC_LU_MASK,
	382	char_class_type(U_GC_L_MASK \| U_GC_ND_MASK \| U_GC_MN_MASK) \| mask_underscore,
	383	char_class_type(U_GC_L_MASK \| U_GC_ND_MASK \| U_GC_MN_MASK) \| mask_underscore,
	384	char_class_type(U_GC_ND_MASK) \| mask_xdigit,
	385	};
	386
	387	int id = ::boost::re_detail::get_default_class_id(p1, p2);
	388	if(id >= 0)
	389	return masks[id+1];
	390	char_class_type result = lookup_icu_mask(p1, p2);
	391	if(result != 0)
	392	return result;
	393
	394	if(id < 0)
	395	{
	396	string_type s(p1, p2);
	397	string_type::size_type i = 0;
	398	while(i < s.size())
	399	{
	400	s[i] = static_cast<char>((::u_tolower)(s[i]));
	401	if(::u_isspace(s[i]) \|\| (s[i] == '-') \|\| (s[i] == '_'))
	402	s.erase(s.begin()+i, s.begin()+i+1);
	403	else
	404	{
	405	s[i] = static_cast<char>((::u_tolower)(s[i]));
	406	++i;
	407	}
	408	}
	409	if(s.size())
	410	id = ::boost::re_detail::get_default_class_id(&s.begin(), &s.begin() + s.size());
	411	if(id >= 0)
	412	return masks[id+1];
	413	if(s.size())
	414	result = lookup_icu_mask(&s.begin(), &s.begin() + s.size());
	415	if(result != 0)
	416	return result;
	417	}
	418	BOOST_ASSERT(std::size_t(id+1) < sizeof(masks) / sizeof(masks[0]));
	419	return masks[id+1];
	420	}
	421
	422	icu_regex_traits::string_type icu_regex_traits::lookup_collatename(const char_type* p1, const char_type* p2) const
	423	{
	424	string_type result;
	425	if(std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2)
	426	{
	427	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	428	std::string s(p1, p2);
	429	#else
	430	std::string s;
	431	const char_type* p3 = p1;
	432	while(p3 != p2)
	433	s.append(1, *p3++);
	434	#endif
	435	// Try Unicode name:
	436	UErrorCode err = U_ZERO_ERROR;
	437	UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err);
	438	if(U_SUCCESS(err))
	439	{
	440	result.push_back(c);
	441	return result;
	442	}
	443	// Try Unicode-extended name:
	444	err = U_ZERO_ERROR;
	445	c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err);
	446	if(U_SUCCESS(err))
	447	{
	448	result.push_back(c);
	449	return result;
	450	}
	451	// try POSIX name:
	452	s = ::boost::re_detail::lookup_default_collate_name(s);
	453	#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
	454	result.assign(s.begin(), s.end());
	455	#else
	456	result.clear();
	457	std::string::const_iterator si, sj;
	458	si = s.begin();
	459	sj = s.end();
	460	while(si != sj)
	461	result.push_back(*si++);
	462	#endif
	463	}
	464	if(result.empty() && (p2-p1 == 1))
	465	result.push_back(*p1);
	466	return result;
	467	}
	468
	469	bool icu_regex_traits::isctype(char_type c, char_class_type f) const
	470	{
	471	// check for standard catagories first:
	472	char_class_type m = char_class_type(1u << u_charType(c));
	473	if((m & f) != 0)
	474	return true;
	475	// now check for special cases:
	476	if(((f & mask_blank) != 0) && u_isblank(c))
	477	return true;
	478	if(((f & mask_space) != 0) && u_isspace(c))
	479	return true;
	480	if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
	481	return true;
	482	if(((f & mask_unicode) != 0) && (c >= 0x100))
	483	return true;
	484	if(((f & mask_underscore) != 0) && (c == '_'))
	485	return true;
	486	if(((f & mask_any) != 0) && (c <= 0x10FFFF))
	487	return true;
	488	if(((f & mask_ascii) != 0) && (c <= 0x7F))
	489	return true;
	490	return false;
	491	}
	492
	493	}
	494
	495	#endif // BOOST_HAS_ICU

Note: See TracBrowser for help on using the repository browser.

Download in other formats: