/////////////////////////////////////////////////////////////////////////////// // perl2xpr.cpp // A utility for translating a Perl regular expression into an // xpressive static regular expression. // // Copyright 2007 Eric Niebler. Distributed under the Boost // Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include using namespace boost::xpressive; int main(int argc, char *argv[]) { int i = 1; char const *dot = " ~_n "; char const *bos = " bos "; char const *eos = " eos "; for(; i < argc && '-' == *argv[i]; ++i) { switch(argv[i][1]) { case 's': // perl /s modifier dot = " _ "; break; case 'm': // perl /m modifier bos = " bol "; eos = " eol "; break; default: std::cerr << "Unknown option : " << argv[i] << std::endl; return -1; } } if(i == argc) { std::cerr << "Usage:\n perl2xpr [-s] [-m] 're'\n"; return -1; } // Local variables used by the semantic actions below local mark_nbr; local tmp; local > strings; // The rules in the dynamic regex grammar cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem; lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|') ; escape = as_xpr("b") [top(strings) += " _b "] | as_xpr("B") [top(strings) += " ~_b "] | as_xpr("d") [top(strings) += " _d "] | as_xpr("D") [top(strings) += " ~_d "] | as_xpr("s") [top(strings) += " _s "] | as_xpr("S") [top(strings) += " ~_s "] | as_xpr("w") [top(strings) += " _w "] | as_xpr("W") [top(strings) += " ~_w "] | _d [top(strings) += " s" + _ + " "] | _ [top(strings) += " as_xpr('" + _ + "') "] ; group = as_xpr("?:") [top(strings) += " ( "] >> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] | as_xpr("?i:") [top(strings) += " icase( "] >> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] | as_xpr("?>") [top(strings) += " keep( "] >> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] | as_xpr("?=") [top(strings) += " before( "] >> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] | as_xpr("?!") [top(strings) += " ~before( "] >> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] | as_xpr("?<=") [top(strings) += " after( "] >> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] | as_xpr("?> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] | nil [top(strings) += " ( s" + as(++mark_nbr) + "= "] >> by_ref(regex) >> as_xpr(')') [top(strings) += " ) "] ; setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "] | "[:" >> !as_xpr('^') [top(strings) += "~"] >> (+_w) [top(strings) += _ ] >> ":]" | ((s1=~as_xpr(']')) >> '-' >> (s2=~as_xpr(']'))) [top(strings) += "range('" + s1 + "','" + s2 + "')"] ; charset = !as_xpr('^') [top(strings) += " ~ "] >> nil [top(strings) += " set[ "] >> (setelem | (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "]) >> *( nil [ top(strings) += " | " ] >> (setelem | (~as_xpr(']')) [ top(strings) += "'" + _ + "'" ] ) ) >> as_xpr(']') [top(strings) += " ] "] ; atom = (+(lit >> ~before((set='*','+','?','{'))) | lit) [top(strings) += " as_xpr(\"" + _ + "\") "] | as_xpr('.') [top(strings) += dot] | as_xpr('^') [top(strings) += bos] | as_xpr('$') [top(strings) += eos] | '\\' >> escape | '(' >> group | '[' >> charset ; repeat = as_xpr('{') [top(strings) += " repeat<"] >> (+_d) [top(strings) += _] >> !( as_xpr(',') [top(strings) += ","] >> ( (+_d) [top(strings) += _] | nil [top(strings) += "inf"] ) ) >> as_xpr('}') [top(strings) += ">(", tmp += " ) "] ; quant = nil [push(strings, "")] >> atom [tmp = top(strings), pop(strings)] >> !( nil [push(strings, "")] >> ( as_xpr("*") [top(strings) += " * "] | as_xpr("+") [top(strings) += " + "] | as_xpr("?") [top(strings) += " ! "] | repeat ) >> !( as_xpr('?') [ top(strings) = " - " + top(strings) ] ) >> nil [ tmp = top(strings) + tmp, pop(strings) ] ) >> nil [top(strings) += tmp] ; seq = quant >> *( nil [top(strings) += " >> "] >> quant ) ; alts = seq >> *( as_xpr('|') [top(strings) += " | "] >> seq ) ; regex = alts ; strings.get().push(""); if(!regex_match(argv[i], regex)) { std::cerr << "ERROR: unrecognized regular expression" << std::endl; return -1; } else { std::cout << strings.get().top() << std::endl; } return 0; }