Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/boost_1_34_1/boost/xpressive/regex_token_iterator.hpp @ 33

Last change on this file since 33 was 29, checked in by landauf, 17 years ago

updated boost from 1_33_1 to 1_34_1

File size: 9.0 KB
Line 
1 ///////////////////////////////////////////////////////////////////////////////
2/// \file regex_token_iterator.hpp
3/// Contains the definition of regex_token_iterator, and STL-compatible iterator
4/// for tokenizing a string using a regular expression.
5//
6//  Copyright 2004 Eric Niebler. Distributed under the Boost
7//  Software License, Version 1.0. (See accompanying file
8//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9
10#ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
11#define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
12
13// MS compatible compilers support #pragma once
14#if defined(_MSC_VER) && (_MSC_VER >= 1020)
15# pragma once
16#endif
17
18#include <vector>
19#include <boost/mpl/assert.hpp>
20#include <boost/type_traits/is_same.hpp>
21#include <boost/type_traits/is_convertible.hpp>
22#include <boost/xpressive/regex_iterator.hpp>
23
24namespace boost { namespace xpressive { namespace detail
25{
26
27//////////////////////////////////////////////////////////////////////////
28// regex_token_iterator_impl
29//
30template<typename BidiIter>
31struct regex_token_iterator_impl
32  : private noncopyable
33{
34    typedef typename iterator_value<BidiIter>::type  char_type;
35
36    regex_token_iterator_impl
37    (
38        BidiIter begin
39      , BidiIter cur
40      , BidiIter end
41      , basic_regex<BidiIter> const *rex
42      , regex_constants::match_flag_type flags = regex_constants::match_default
43      , std::vector<int> subs = std::vector<int>(1, 0)
44      , int n = -2
45      , bool not_null = false
46    )
47      : iter_(begin, cur, end, rex, flags, not_null)
48      , result_()
49      , n_((-2 == n) ? static_cast<int>(subs.size()) - 1 : n)
50      , subs_()
51    {
52        this->subs_.swap(subs);
53    }
54
55    bool next()
56    {
57        if(-1 != this->n_)
58        {
59            BidiIter cur = this->iter_.state_.cur_;
60            if(++this->n_ != static_cast<int>(this->subs_.size()))
61            {
62                this->result_ = (-1 == this->subs_[ this->n_ ])
63                    ? this->iter_.what_.prefix().str()
64                    : this->iter_.what_[ this->subs_[ this->n_ ] ].str();
65                return true;
66            }
67            else if(this->iter_.next())
68            {
69                this->n_ = 0;
70                this->result_ = (-1 == this->subs_[ this->n_ ])
71                    ? this->iter_.what_.prefix().str()
72                    : this->iter_.what_[ this->subs_[ this->n_ ] ].str();
73                return true;
74            }
75            else if(cur != this->iter_.state_.end_ && -1 == this->subs_[ 0 ])
76            {
77                this->n_ = -1;
78                this->result_.assign(cur, this->iter_.state_.end_);
79                return true;
80            }
81        }
82
83        this->n_ = -1;
84        return false;
85    }
86
87    bool equal_to(regex_token_iterator_impl<BidiIter> const &that) const
88    {
89        return this->iter_.equal_to(that.iter_) && this->n_ == that.n_;
90    }
91
92    regex_iterator_impl<BidiIter> iter_;
93    std::basic_string<char_type> result_;
94    int n_;
95    std::vector<int> subs_;
96};
97
98inline int get_mark_number(int i)
99{
100    return i;
101}
102
103inline std::vector<int> to_vector(int sub_match)
104{
105    return std::vector<int>(1, sub_match);
106}
107
108inline std::vector<int> const &to_vector(std::vector<int> const &sub_matches)
109{
110    return sub_matches;
111}
112
113template<typename Int, std::size_t Size>
114inline std::vector<int> to_vector(Int const (&sub_matches)[ Size ])
115{
116    // so that people can specify sub-match indices inline with
117    // string literals, like "\1\2\3", leave off the trailing '\0'
118    std::size_t const size = Size - is_same<Int, char>::value;
119    std::vector<int> vect(size);
120    for(std::size_t i = 0; i < size; ++i)
121    {
122        vect[i] = get_mark_number(sub_matches[i]);
123    }
124    return vect;
125}
126
127template<typename Int>
128inline std::vector<int> to_vector(std::vector<Int> const &sub_matches)
129{
130    BOOST_MPL_ASSERT((is_convertible<Int, int>));
131    return std::vector<int>(sub_matches.begin(), sub_matches.end());
132}
133
134} // namespace detail
135
136//////////////////////////////////////////////////////////////////////////
137// regex_token_iterator
138//
139template<typename BidiIter>
140struct regex_token_iterator
141{
142    typedef basic_regex<BidiIter> regex_type;
143    typedef typename iterator_value<BidiIter>::type char_type;
144    typedef std::basic_string<char_type> value_type;
145    typedef typename iterator_difference<BidiIter>::type difference_type;
146    typedef value_type const *pointer;
147    typedef value_type const &reference;
148    typedef std::forward_iterator_tag iterator_category;
149
150    /// INTERNAL ONLY
151    typedef detail::regex_token_iterator_impl<BidiIter> impl_type_;
152
153    regex_token_iterator()
154      : impl_()
155    {
156    }
157
158    regex_token_iterator
159    (
160        BidiIter begin
161      , BidiIter end
162      , basic_regex<BidiIter> const &rex
163    )
164      : impl_(new impl_type_(begin, begin, end, &rex))
165    {
166        this->next_();
167    }
168
169    template<typename SubMatches>
170    regex_token_iterator
171    (
172        BidiIter begin
173      , BidiIter end
174      , basic_regex<BidiIter> const &rex
175      , SubMatches const &sub_matches
176      , regex_constants::match_flag_type flags = regex_constants::match_default
177    )
178      : impl_(new impl_type_(begin, begin, end, &rex, flags, detail::to_vector(sub_matches)))
179    {
180        this->next_();
181    }
182
183    regex_token_iterator(regex_token_iterator<BidiIter> const &that)
184      : impl_(that.impl_) // COW
185    {
186    }
187
188    regex_token_iterator<BidiIter> &operator =(regex_token_iterator<BidiIter> const &that)
189    {
190        this->impl_ = that.impl_; // COW
191        return *this;
192    }
193
194    friend bool operator ==(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
195    {
196        if(!left.impl_ || !right.impl_)
197        {
198            return !left.impl_ && !right.impl_;
199        }
200
201        return left.impl_->equal_to(*right.impl_);
202    }
203
204    friend bool operator !=(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
205    {
206        return !(left == right);
207    }
208
209    value_type const &operator *() const
210    {
211        return this->impl_->result_;
212    }
213
214    value_type const *operator ->() const
215    {
216        return &this->impl_->result_;
217    }
218
219    /// If N == -1 then sets *this equal to the end of sequence iterator.
220    /// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to
221    /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
222    /// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is
223    /// not set in flags then sets it. Then locates the next match as if by calling
224    /// regex_search(what[0].second, end, what, *pre, flags), with the following variation:
225    /// in the event that the previous match found was of zero length (what[0].length() == 0)
226    /// then attempts to find a non-zero length match starting at what[0].second, only if that
227    /// fails and provided what[0].second != suffix().second does it look for a (possibly zero
228    /// length) match starting from what[0].second + 1.  If such a match is found then sets N
229    /// equal to zero, and sets result equal to
230    /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
231    /// Otherwise if no further matches were found, then let last_end be the endpoint of the last
232    /// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and
233    /// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end
234    /// of sequence iterator.
235    regex_token_iterator<BidiIter> &operator ++()
236    {
237        this->fork_(); // un-share the implementation
238        this->next_();
239        return *this;
240    }
241
242    regex_token_iterator<BidiIter> operator ++(int)
243    {
244        regex_token_iterator<BidiIter> tmp(*this);
245        ++*this;
246        return tmp;
247    }
248
249private:
250
251    /// INTERNAL ONLY
252    void fork_()
253    {
254        if(!this->impl_.unique())
255        {
256            shared_ptr<impl_type_> clone
257            (
258                new impl_type_
259                (
260                    this->impl_->iter_.state_.begin_
261                  , this->impl_->iter_.state_.cur_
262                  , this->impl_->iter_.state_.end_
263                  , this->impl_->iter_.rex_
264                  , this->impl_->iter_.flags_
265                  , this->impl_->subs_
266                  , this->impl_->n_
267                  , this->impl_->iter_.not_null_
268                )
269            );
270
271            // only copy the match_results struct if we have to. Note: if the next call
272            // to impl_->next() will return false or call regex_search, we don't need to
273            // copy the match_results struct.
274            if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast<int>(this->impl_->subs_.size()))
275            {
276                clone->iter_.what_ = this->impl_->iter_.what_;
277            }
278
279            this->impl_.swap(clone);
280        }
281    }
282
283    /// INTERNAL ONLY
284    void next_()
285    {
286        BOOST_ASSERT(this->impl_ && this->impl_.unique());
287        if(!this->impl_->next())
288        {
289            this->impl_.reset();
290        }
291    }
292
293    shared_ptr<impl_type_> impl_;
294};
295
296}} // namespace boost::xpressive
297
298#endif
Note: See TracBrowser for help on using the repository browser.