Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  
tokenizer.h
1 /***************************************************************************
2  copyright : (C) 2002-2008 by Stefano Barbato
3  email : stefano@codesink.org
4 
5  $Id: tokenizer.h,v 1.18 2008-10-07 11:44:38 tat Exp $
6  ***************************************************************************/
7 #ifndef _MIMETIC_TOKENIZER_H_
8 #define _MIMETIC_TOKENIZER_H_
9 #include <iterator>
10 #include <algorithm>
11 #include <set>
12 #include <string>
13 #include <cstring>
14 
15 namespace mimetic
16 {
17 
18 template<typename value_type>
19 struct IsDelim: public std::unary_function<value_type,bool>
20 {
21  bool operator()(const value_type& val) const
22  {
23  return m_delims.count(val) != 0;
24  }
25  template<typename Container>
26  void setDelimList(const Container& cont)
27  {
28  typename Container::const_iterator bit, eit;
29  bit = cont.begin(), eit = cont.end();
30  for(; bit != eit; ++bit)
31  m_delims.insert(*bit);
32  }
33  template<typename Iterator>
34  void setDelimList(Iterator bit, Iterator eit)
35  {
36  for(; bit != eit; ++bit)
37  m_delims.insert(*bit);
38  }
39  void addDelim(const value_type& value)
40  {
41  m_delims.insert(value);
42  }
43  void removeDelim(const value_type& value)
44  {
45  m_delims.erase(value);
46  }
47 private:
48  std::set<value_type> m_delims;
49 };
50 
51 template<>
52 struct IsDelim<char>: public std::unary_function<char, bool>
53 {
54  void setDelimList(const std::string& delims)
55  {
56  setDelimList(delims.begin(), delims.end());
57  }
58  template<typename Iterator>
59  void setDelimList(Iterator bit, Iterator eit)
60  {
61  memset(&m_lookup, 0, sizeof(m_lookup));
62  for(; bit != eit; ++bit)
63  m_lookup[(int)*bit] = 1;
64  }
65  bool operator()(unsigned char val) const
66  {
67  return m_lookup[val] != 0;
68  }
69 private:
70  char m_lookup[256];
71 };
72 
73 
74 /// Iterator tokenizer template class
75 template<class Iterator,typename value_type>
77 {
78 public:
79  ItTokenizer(Iterator bit, Iterator eit)
80  : m_bit(bit), m_eit(eit), m_tok_eit(bit)
81  {
82  }
83  void setSource(Iterator bit, Iterator eit)
84  {
85  m_bit = bit;
86  m_eit = eit;
87  m_tok_eit = bit;
88  }
89  template<typename DelimCont>
90  void setDelimList(const DelimCont& cont)
91  {
92  m_delimPred.setDelimList(cont);
93  }
94  template<typename It>
95  void setDelimList(It bit, It eit)
96  {
97  m_delimPred.setDelimList(bit, eit);
98  }
99  template<typename DestCont>
100  bool next(DestCont& dst)
101  {
102  dst.erase(dst.begin(), dst.end());
103  if(m_tok_eit == m_eit)
104  return false;
105  m_tok_eit = std::find_if(m_bit, m_eit, m_delimPred);
106  m_matched = 0; // end of input
107  if(m_tok_eit != m_eit)
108  m_matched = *m_tok_eit; // matched delimiter
109  std::copy(m_bit, m_tok_eit, std::back_inserter<DestCont>(dst));
110  m_bit = (m_tok_eit != m_eit && ++m_tok_eit != m_eit ? m_tok_eit :m_eit);
111  return true;
112  }
113  const value_type& matched() const
114  {
115  return m_matched;
116  }
117  void addDelim(const value_type& value)
118  {
119  m_delimPred.addDelim(value);
120  }
121  void removeDelim(const value_type& value)
122  {
123  m_delimPred.removeDelim(value);
124  }
125 private:
126  Iterator m_bit, m_eit, m_tok_eit;
127  IsDelim<value_type> m_delimPred;
128  value_type m_matched;
129 };
130 
131 
132 /// char container tokenizer template class
133 template<typename Container>
134 struct ContTokenizer: public ItTokenizer<typename Container::const_iterator,typename Container::value_type>
135 {
136  typedef typename Container::value_type value_type;
137  typedef typename Container::iterator iterator;
138  typedef typename Container::const_iterator const_iterator;
139  // i want to be fast here so i don't want to copy "cont"
140  // so "cont" MUST be in scope for all following calls
141  // to next(...).
142  ContTokenizer(const Container* cont)
143  : ItTokenizer<const_iterator, value_type>(cont->begin(), cont->end())
144  {
145  }
146  template<typename DelimCont>
147  ContTokenizer(const Container* cont, const DelimCont& delims)
148  : ItTokenizer<const_iterator,value_type>(cont->begin(), cont->end())
149  {
150  this->setDelimList(delims);
151  }
152  void setSource(const Container* cont)
153  {
154  ItTokenizer<const_iterator,value_type>::setSource(cont->begin(), cont->end());
155  }
156 private:
158  ContTokenizer& operator=(const ContTokenizer&);
159 };
160 
161 /// std::string tokenizer
163 
164 }
165 
166 #endif
167 
Iterator tokenizer template class.
Definition: tokenizer.h:76
ContTokenizer< std::string > StringTokenizer
std::string tokenizer
Definition: tokenizer.h:162
Definition: body.h:17
char container tokenizer template class
Definition: tokenizer.h:134