00001 #ifndef WIBBLE_REGEXP_H
00002 #define WIBBLE_REGEXP_H
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <wibble/exception.h>
00025 #include <sys/types.h>
00026 #include <regex.h>
00027
00028 namespace wibble {
00029 namespace exception {
00030
00032
00033 class Regexp : public wibble::exception::Generic
00034 {
00035 protected:
00036 int m_code;
00037 std::string m_message;
00038
00039 public:
00040 Regexp(const regex_t& re, int code, const std::string& context)
00041 throw ();
00042 ~Regexp() throw () {}
00043
00045 virtual int code() const throw () { return m_code; }
00046
00047 virtual const char* type() const throw () { return "Regexp"; }
00048 virtual std::string desc() const throw () { return m_message; }
00049 };
00050
00051 }
00052
00053 class Regexp
00054 {
00055 protected:
00056 regex_t re;
00057 regmatch_t* pmatch;
00058 int nmatch;
00059 std::string lastMatch;
00060
00061 public:
00062
00063
00064
00065
00066 Regexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp);
00067 ~Regexp() throw ();
00068
00069 bool match(const std::string& str, int flags = 0) throw (wibble::exception::Regexp);
00070
00071
00072
00073
00074
00075 std::string operator[](int idx) throw (wibble::exception::OutOfRange);
00076
00077 size_t matchStart(int idx) throw (wibble::exception::OutOfRange);
00078 size_t matchEnd(int idx) throw (wibble::exception::OutOfRange);
00079 size_t matchLength(int idx) throw (wibble::exception::OutOfRange);
00080 };
00081
00082 class ERegexp : public Regexp
00083 {
00084 public:
00085 ERegexp(const std::string& expr, int match_count = 0, int flags = 0) throw (wibble::exception::Regexp)
00086 : Regexp(expr, match_count, flags | REG_EXTENDED) {}
00087 };
00088
00089 class Tokenizer
00090 {
00091 const std::string& str;
00092 wibble::Regexp re;
00093
00094 public:
00095 class const_iterator
00096 {
00097 Tokenizer& tok;
00098 size_t beg, end;
00099 public:
00100 typedef std::string value_type;
00101 typedef ptrdiff_t difference_type;
00102 typedef value_type *pointer;
00103 typedef value_type &reference;
00104 typedef std::forward_iterator_tag iterator_category;
00105
00106 const_iterator(Tokenizer& tok) : tok(tok), beg(0), end(0) { operator++(); }
00107 const_iterator(Tokenizer& tok, bool) : tok(tok), beg(tok.str.size()), end(tok.str.size()) {}
00108
00109 const_iterator& operator++();
00110
00111 std::string operator*() const
00112 {
00113 return tok.str.substr(beg, end-beg);
00114 }
00115 bool operator==(const const_iterator& ti) const
00116 {
00117 return beg == ti.beg && end == ti.end;
00118 }
00119 bool operator!=(const const_iterator& ti) const
00120 {
00121 return beg != ti.beg || end != ti.end;
00122 }
00123 };
00124
00125 Tokenizer(const std::string& str, const std::string& re, int flags)
00126 : str(str), re(re, 1, flags) {}
00127
00128 const_iterator begin() { return const_iterator(*this); }
00129 const_iterator end() { return const_iterator(*this, false); }
00130 };
00131
00145 class Splitter
00146 {
00147 wibble::Regexp re;
00148
00149 public:
00154
00155 class const_iterator
00156 {
00157 wibble::Regexp& re;
00158 std::string cur;
00159 std::string next;
00160
00161 public:
00162 typedef std::string value_type;
00163 typedef ptrdiff_t difference_type;
00164 typedef value_type *pointer;
00165 typedef value_type &reference;
00166 typedef std::forward_iterator_tag iterator_category;
00167
00168 const_iterator(wibble::Regexp& re, const std::string& str) : re(re), next(str) { ++*this; }
00169 const_iterator(wibble::Regexp& re) : re(re) {}
00170
00171 const_iterator& operator++();
00172
00173 const std::string& operator*() const
00174 {
00175 return cur;
00176 }
00177 const std::string* operator->() const
00178 {
00179 return &cur;
00180 }
00181 bool operator==(const const_iterator& ti) const
00182 {
00183 return cur == ti.cur && next == ti.next;
00184 }
00185 bool operator!=(const const_iterator& ti) const
00186 {
00187 return cur != ti.cur || next != ti.next;
00188 }
00189 };
00190
00194 Splitter(const std::string& re, int flags)
00195 : re(re, 1, flags) {}
00196
00200 const_iterator begin(const std::string& str) { return const_iterator(re, str); }
00201 const_iterator end() { return const_iterator(re); }
00202 };
00203
00204 }
00205
00206
00207 #endif