1 /** 2 Simple parsers 3 4 Here are some commonly used parsers. Also as an example for how 5 to use the combinators. 6 7 Copyright: 2017 Yuxuan Shui 8 */ 9 module sdpc.parsers; 10 import sdpc.combinators, 11 sdpc.primitives; 12 import std.traits, 13 std.conv, 14 std.meta, 15 std.functional, 16 std.algorithm; 17 import std.range : ElementType; 18 import std.array : array; 19 import std.experimental.allocator; 20 import std.experimental.allocator.gc_allocator : GCAllocator; 21 public: 22 23 struct Err(R) { 24 const(string[])[] e; 25 const(bool)[] inv; 26 const(R)[] err_range; 27 alias RangeType = R; 28 Err!R opBinary(string op)(auto ref const(Err!R) o) const if (op == "+") { 29 assert(e.length > 0); 30 assert(o.e.length > 0); 31 return Err!R(e~o.e, inv~o.inv, err_range~o.err_range); 32 } 33 this(const(string[])[] e, const(bool)[] inv, const(R)[] i) { 34 this.e = e; 35 this.inv = inv; 36 this.err_range = i; 37 } 38 this(const(string[]) e, bool inv, R i) { 39 this.e = [e]; 40 this.inv = [inv]; 41 this.err_range = [i]; 42 } 43 private string toString1(ulong id) const { 44 import std.format; 45 import std.range : take, join; 46 import std.string : indexOf; 47 string tmp; 48 if (inv[id]) 49 tmp = "Expecting any character/string other than: "~e[id].join(", "); 50 else 51 tmp = "Expecting any of the followings: "~e[id].join(", "); 52 ulong mlen = e[id].map!"a.length".maxElement; 53 string got; 54 if (err_range[id].empty) 55 got = "<EOF>"; 56 else 57 got = err_range[id].take(mlen).to!string; 58 string pos; 59 static if (is(typeof(R.init.row)) && is(typeof(R.init.col))) 60 return format("%s, but got %s at %s, %s", tmp, got, err_range[id].row, err_range[id].col); 61 else 62 return tmp~", but got "~got; 63 } 64 string toString() const { 65 assert(e.length > 0); 66 if (e.length == 1) 67 return toString1(0); 68 string ret = "Encoutered following errors (only need to solve one of them): \n"; 69 foreach(i; 0..e.length) 70 ret ~= "\t"~toString1(i)~"\n"; 71 return ret; 72 } 73 } 74 75 /// Match a string, return the matched string 76 template token(string t) { 77 auto token(R)(in auto ref R i) 78 if (isForwardRange!R) { 79 import std.algorithm.comparison; 80 enum string[] expects = [t]; 81 static struct Token { 82 bool empty; 83 string front; 84 Err!R err; 85 R cont; 86 87 void popFront() { 88 import std.range : take, drop, popFrontExactly; 89 auto str = take(cont.save, t.length); 90 if (equal(str, t)) { 91 front = t; 92 empty = false; 93 cont.popFrontExactly(t.length); 94 return; 95 } 96 97 empty = true; 98 err = Err!R(expects, false, cont.save); 99 } 100 101 this(R i) { 102 cont = i; 103 popFront; 104 } 105 } 106 return Token(i); 107 }} 108 109 /// Match any character in accept 110 template ch(alias accept) if (is(ElementType!(typeof(accept)))) { 111 auto ch(R)(in auto ref R i) { 112 static const(string[])[] expects = [accept.map!"[a]".array]; 113 static bool[] inv = [false]; 114 alias V = aliasSeqOf!accept; 115 struct Ch { 116 bool empty; 117 ElementType!R front; 118 Err!R err; 119 R cont; 120 121 this(R i) { 122 cont = i; 123 popFront; 124 } 125 126 void popFront() { 127 if (cont.empty) { 128 empty = true; 129 err = Err!R(expects, inv, [cont]); 130 return; 131 } 132 133 auto u = cont.front; 134 o:switch(u) { 135 // static foreach magic 136 foreach(v; V) { 137 case v: 138 empty = false; 139 front = u; 140 cont.popFront; 141 break o; 142 } 143 default: 144 empty = true; 145 err = Err!R(expects, inv, [cont]); 146 break; 147 } 148 } 149 } 150 return Ch(i); 151 }} 152 153 /// Match any character except those in reject 154 version(legacy) 155 struct not_ch(alias reject) if (is(ElementType!(typeof(reject)))) { 156 alias Char = ElementType!(typeof(reject)); 157 enum string[] e = reject.map!"[a]".array; 158 static auto opCall(R, alias Allocator = GCAllocator.instance)(in auto ref R i) 159 if (isForwardRange!R && is(typeof(Char.init == ElementType!R.init))) { 160 alias RT = Result!(R, Unqual!(ElementType!R), Err!R); 161 alias V = aliasSeqOf!reject; 162 if (i.empty) 163 return RT(Err!R(e, true, i)); 164 165 auto u = i.front; 166 auto retr = i.save; 167 switch(u) { 168 // static foreach magic 169 foreach(v; V) { 170 case v: 171 return RT(Err!R(e, true, i)); 172 } 173 default: 174 retr.popFront; 175 return RT(retr, u); 176 } 177 } 178 } 179 180 /// Parse a sequences of digits, return an array of number 181 template digit(string _digits) { 182 import std.string : indexOf; 183 alias digit = pmap!(ch!_digits, ch => cast(int)_digits.indexOf(ch)); 184 } 185 186 immutable string lower = "qwertyuiopasdfghjklzxcvbnm"; 187 immutable string upper = "QWERTYUIOPASDFGHJKLZXCVBNM"; 188 immutable string alphabet = lower ~ upper; 189 immutable string digits = "0123456789"; 190 191 /** 192 Parse a number 193 Params: 194 accept = digits allowed in the number, i-th character corresponds to digit i 195 base = base 196 */ 197 template number(string accept = digits, int base = 10) if (accept.length == base) { 198 import std.algorithm.iteration; 199 alias number = pfold!(digit!accept, (a, b) => a*base+b, 0); 200 } 201 202 /// 203 version(legacy) 204 unittest { 205 auto i = "12354"; 206 auto rx = number!()(i); 207 assert(rx.ok); 208 assert(rx.v == 12354); 209 210 i = "ffabc"; 211 auto rx1 = number!(digits~"abcdef", 16)(i); 212 assert(rx1.ok); 213 assert(rx1.v == 1047228); 214 } 215 216 /** 217 Parse a sequence of characters 218 Params: 219 accept = an array of acceptable characters 220 */ 221 version(legacy) 222 alias word(alias accept = alphabet) = many!(ch!accept); 223 224 /// Parse an identifier, starts with a letter or _, followed by letter, _, or digits 225 version(legacy) 226 auto identifier(R)(in auto ref R i) 227 if (isForwardRange!R) { 228 auto ret = ch!(alphabet~"_")(i); 229 alias RT = Result!(R, ElementType!R[], Err!R); 230 if (!ret.ok) 231 return RT(ret.err); 232 auto ret2 = word!(alphabet~"_"~digits)(ret.cont); 233 ElementType!R[] str = [ret.v]; 234 if (ret2.ok) { 235 str ~= array(ret2.v[]); 236 return RT(ret2.cont, str); 237 } 238 return RT(ret.cont, str); 239 } 240 241 /// 242 version(legacy) 243 unittest { 244 auto i = "_asd1234a"; 245 auto rx2 = identifier(i); 246 assert(rx2.ok); 247 assert(!rx2.cont.length); 248 assert(rx2.v == "_asd1234a"); 249 } 250 251 /// Parse escaped character, \n, \r, \b, \" and \\ 252 version(legacy) 253 auto parse_escape1(R)(in auto ref R i) 254 if (isForwardRange!R) { 255 alias RT = Result!(R, dchar, Err!R); 256 auto r = seq!( 257 discard!(token!"\\"), 258 ch!"nbr\"\\")(i); 259 if (!r.ok) 260 return RT(r.err); 261 dchar res; 262 final switch(r.v[1]) { 263 case 'n': 264 res = '\n'; 265 break; 266 case 'b': 267 res = '\b'; 268 break; 269 case 'r': 270 res = '\r'; 271 break; 272 case '"': 273 res = '\"'; 274 break; 275 case '\\': 276 res = '\\'; 277 break; 278 } 279 return RT(r.cont, res); 280 } 281 282 /// Parse a string enclosed by a pair of quotes, and containing escape sequence 283 version(legacy) 284 auto parse_string(R)(in auto ref R i) 285 if (isForwardRange!R) { 286 alias RT = Result!(R, dchar[], Err!R); 287 auto r = between!(token!"\"", 288 many!(choice!( 289 parse_escape1, 290 not_ch!"\"" 291 )), 292 token!"\"")(i); 293 if (!r.ok) 294 return RT(r.err); 295 return RT(r.cont, array(r.v[])); 296 } 297 298 /// 299 version(legacy) 300 unittest { 301 auto i = "\"asdf\\n\\b\\\"\""; 302 auto r = parse_string(i); 303 import std.format; 304 assert(r.ok); 305 assert(r.v == "asdf\n\b\"", format("%s", r.v)); 306 } 307 308 /// Skip white spaces 309 version(legacy) 310 alias whitespace = pipe!(choice!(token!" ", token!"\n", token!"\t")); 311 version(legacy) 312 alias ws(alias func) = pipe!(seq!(func, skip!whitespace), wrap!"a[0]"); 313 314 version(legacy) 315 template token_ws(string t) { 316 auto token_ws(R)(in auto ref R i) 317 if (isForwardRange!R) { 318 return i.ws!(token!t); 319 } 320 } 321 322 /// 323 version(legacy) 324 unittest { 325 const(char)[] i = " \n\t "; 326 auto r = skip!whitespace(i); 327 assert(r.ok); 328 assert(!r.cont.length); 329 }