| 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> |
|---|
| 2 | <html> |
|---|
| 3 | <head> |
|---|
| 4 | <title>Boost.Regex: Algorithm regex_split (deprecated)</title> |
|---|
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> |
|---|
| 6 | <link rel="stylesheet" type="text/css" href="../../../boost.css"> |
|---|
| 7 | </head> |
|---|
| 8 | <body> |
|---|
| 9 | <P> |
|---|
| 10 | <TABLE id="Table1" cellSpacing="1" cellPadding="1" width="100%" border="0"> |
|---|
| 11 | <TR> |
|---|
| 12 | <td valign="top" width="300"> |
|---|
| 13 | <h3><a href="../../../index.htm"><img height="86" width="277" alt="C++ Boost" src="../../../boost.png" border="0"></a></h3> |
|---|
| 14 | </td> |
|---|
| 15 | <TD width="353"> |
|---|
| 16 | <H1 align="center">Boost.Regex</H1> |
|---|
| 17 | <H2 align="center">Algorithm regex_split (deprecated)</H2> |
|---|
| 18 | </TD> |
|---|
| 19 | <td width="50"> |
|---|
| 20 | <h3><a href="index.html"><img height="45" width="43" alt="Boost.Regex Index" src="uarrow.gif" border="0"></a></h3> |
|---|
| 21 | </td> |
|---|
| 22 | </TR> |
|---|
| 23 | </TABLE> |
|---|
| 24 | </P> |
|---|
| 25 | <HR> |
|---|
| 26 | <p></p> |
|---|
| 27 | <P>The algorithm regex_split has been deprecated in favor of the iterator <A href="regex_token_iterator.html"> |
|---|
| 28 | regex_token_iterator</A> which has a more flexible and powerful interface, |
|---|
| 29 | as well as following the more usual standard library "pull" rather than "push" |
|---|
| 30 | semantics.</P> |
|---|
| 31 | <P>Code which uses regex_split will continue to compile, the following |
|---|
| 32 | documentation is taken from the previous boost.regex version:</P> |
|---|
| 33 | <H3><A name="regex_split"></A>Algorithm regex_split</H3> |
|---|
| 34 | <PRE>#include <<A href="../../../boost/regex.hpp">boost/regex.hpp</A>> </PRE> |
|---|
| 35 | <P>Algorithm regex_split performs a similar operation to the perl split operation, |
|---|
| 36 | and comes in three overloaded forms: |
|---|
| 37 | </P> |
|---|
| 38 | <PRE><B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2> |
|---|
| 39 | std::size_t regex_split(OutputIterator out, |
|---|
| 40 | std::basic_string<charT, Traits1, Alloc1>& s, |
|---|
| 41 | <B> const</B> basic_regex<charT, Traits2>& e, |
|---|
| 42 | <STRONG> </STRONG>boost::match_flag_type flags, |
|---|
| 43 | std::size_t max_split); |
|---|
| 44 | |
|---|
| 45 | <B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1, <B>class</B> Traits2> |
|---|
| 46 | std::size_t regex_split(OutputIterator out, |
|---|
| 47 | std::basic_string<charT, Traits1, Alloc1>& s, |
|---|
| 48 | <B> const</B> basic_regex<charT, Traits2>& e, |
|---|
| 49 | boost::match_flag_type flags = match_default); |
|---|
| 50 | |
|---|
| 51 | <B>template</B> <<B>class</B> OutputIterator, <B>class</B> charT, <B>class</B> Traits1, <B>class</B> Alloc1> |
|---|
| 52 | std::size_t regex_split(OutputIterator out, |
|---|
| 53 | std::basic_string<charT, Traits1, Alloc1>& s);</PRE> |
|---|
| 54 | <P><STRONG>Effects: </STRONG>Each version of the algorithm takes an |
|---|
| 55 | output-iterator for output, and a string for input. If the expression contains |
|---|
| 56 | no marked sub-expressions, then the algorithm writes one string onto the |
|---|
| 57 | output-iterator for each section of input that does not match the expression. |
|---|
| 58 | If the expression does contain marked sub-expressions, then each time a match |
|---|
| 59 | is found, one string for each marked sub-expression will be written to the |
|---|
| 60 | output-iterator. No more than <I>max_split </I>strings will be written to the |
|---|
| 61 | output-iterator. Before returning, all the input processed will be deleted from |
|---|
| 62 | the string <I>s</I> (if <I>max_split </I>is not reached then all of <I>s</I> will |
|---|
| 63 | be deleted). Returns the number of strings written to the output-iterator. If |
|---|
| 64 | the parameter <I>max_split</I> is not specified then it defaults to UINT_MAX. |
|---|
| 65 | If no expression is specified, then it defaults to "\s+", and splitting occurs |
|---|
| 66 | on whitespace. |
|---|
| 67 | </P> |
|---|
| 68 | <P><STRONG>Throws:</STRONG> <CODE>std::runtime_error</CODE> if the complexity of |
|---|
| 69 | matching the expression against an N character string begins to exceed O(N<SUP>2</SUP>), |
|---|
| 70 | or if the program runs out of stack space while matching the expression (if |
|---|
| 71 | Boost.regex is <A href="configuration.html">configured</A> in recursive mode), |
|---|
| 72 | or if the matcher exhausts it's permitted memory allocation (if Boost.regex is <A href="configuration.html"> |
|---|
| 73 | configured</A> in non-recursive mode).</P> |
|---|
| 74 | <P><A href="../example/snippets/regex_split_example_1.cpp">Example</A>: the |
|---|
| 75 | following function will split the input string into a series of tokens, and |
|---|
| 76 | remove each token from the string <I>s</I>: |
|---|
| 77 | </P> |
|---|
| 78 | <PRE><B>unsigned</B> tokenise(std::list<std::string>& l, std::string& s) |
|---|
| 79 | { |
|---|
| 80 | <B> return</B> boost::regex_split(std::back_inserter(l), s); |
|---|
| 81 | }</PRE> |
|---|
| 82 | <P><A href="../example/snippets/regex_split_example_2.cpp">Example</A>: the |
|---|
| 83 | following short program will extract all of the URL's from a html file, and |
|---|
| 84 | print them out to <I>cout</I>: |
|---|
| 85 | </P> |
|---|
| 86 | <PRE><FONT color=#008000>#include <list> |
|---|
| 87 | #include <fstream> |
|---|
| 88 | #include <iostream> |
|---|
| 89 | #include <boost/regex.hpp> |
|---|
| 90 | </FONT> |
|---|
| 91 | boost::regex e(<FONT color=#000080>"<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\""</FONT>, |
|---|
| 92 | boost::regbase::normal | boost::regbase::icase); |
|---|
| 93 | |
|---|
| 94 | <B>void</B> load_file(std::string& s, std::istream& is) |
|---|
| 95 | { |
|---|
| 96 | s.erase(); |
|---|
| 97 | <FONT color=#000080>// |
|---|
| 98 | // attempt to grow string buffer to match file size, |
|---|
| 99 | // this doesn't always work... |
|---|
| 100 | </FONT> s.reserve(is.rdbuf()-&gtin_avail()); |
|---|
| 101 | <B>char</B> c; |
|---|
| 102 | <B>while</B>(is.get(c)) |
|---|
| 103 | { |
|---|
| 104 | <FONT color=#000080>// use logarithmic growth stategy, in case |
|---|
| 105 | // in_avail (above) returned zero: |
|---|
| 106 | </FONT> <B>if</B>(s.capacity() == s.size()) |
|---|
| 107 | s.reserve(s.capacity() * 3); |
|---|
| 108 | s.append(1, c); |
|---|
| 109 | } |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | |
|---|
| 113 | <B>int</B> main(<B>int</B> argc, <B>char</B>** argv) |
|---|
| 114 | { |
|---|
| 115 | std::string s; |
|---|
| 116 | std::list<std::string> l; |
|---|
| 117 | |
|---|
| 118 | <B>for</B>(<B>int</B> i = 1; i < argc; ++i) |
|---|
| 119 | { |
|---|
| 120 | std::cout << <FONT color=#000080>"Findings URL's in "</FONT> << argv[i] << <FONT color=#000080>":"</FONT> << std::endl; |
|---|
| 121 | s.erase(); |
|---|
| 122 | std::ifstream is(argv[i]); |
|---|
| 123 | load_file(s, is); |
|---|
| 124 | boost::regex_split(std::back_inserter(l), s, e); |
|---|
| 125 | <B>while</B>(l.size()) |
|---|
| 126 | { |
|---|
| 127 | s = *(l.begin()); |
|---|
| 128 | l.pop_front(); |
|---|
| 129 | std::cout << s << std::endl; |
|---|
| 130 | } |
|---|
| 131 | } |
|---|
| 132 | <B>return</B> 0; |
|---|
| 133 | }</PRE> |
|---|
| 134 | <HR> |
|---|
| 135 | <p>Revised |
|---|
| 136 | <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --> |
|---|
| 137 | 26 June 2004 |
|---|
| 138 | <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p> |
|---|
| 139 | <p><i>© Copyright John Maddock 1998- |
|---|
| 140 | <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2004<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p> |
|---|
| 141 | <P><I>Use, modification and distribution are subject to the Boost Software License, |
|---|
| 142 | Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A> |
|---|
| 143 | or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P> |
|---|
| 144 | </body> |
|---|
| 145 | </html> |
|---|