1 /**
2   Simple parsers
3 
4   Here are some commonly used parsers. Also as an example for how
5   to use the combinators.
6 
7   Copyright: 2017 Yuxuan Shui
8 */
9 module sdpc.parsers;
10 import sdpc.combinators,
11        sdpc.primitives;
12 import std.traits,
13        std.conv,
14        std.meta,
15        std.functional,
16        std.algorithm;
17 import std.range : ElementType;
18 import std.array : array;
19 import std.experimental.allocator;
20 import std.experimental.allocator.gc_allocator : GCAllocator;
21 public:
22 
23 struct Err(R) {
24 	const(string[])[] e;
25 	const(bool)[] inv;
26 	const(R)[] err_range;
27 	alias RangeType = R;
28 	Err!R opBinary(string op)(auto ref const(Err!R) o) const if (op == "+") {
29 		assert(e.length > 0);
30 		assert(o.e.length > 0);
31 		return Err!R(e~o.e, inv~o.inv, err_range~o.err_range);
32 	}
33 	this(const(string[])[] e, const(bool)[] inv, const(R)[] i) {
34 		this.e = e;
35 		this.inv = inv;
36 		this.err_range = i;
37 	}
38 	this(const(string[]) e, bool inv, R i) {
39 		this.e = [e];
40 		this.inv = [inv];
41 		this.err_range = [i];
42 	}
43 	private string toString1(ulong id) const {
44 		import std.format;
45 		import std.range : take, join;
46 		import std.string : indexOf;
47 		string tmp;
48 		if (inv[id])
49 			tmp = "Expecting any character/string other than: "~e[id].join(", ");
50 		else
51 			tmp = "Expecting any of the followings: "~e[id].join(", ");
52 		ulong mlen = e[id].map!"a.length".maxElement;
53 		string got;
54 		if (err_range[id].empty)
55 			got = "<EOF>";
56 		else
57 			got = err_range[id].take(mlen).to!string;
58 		string pos;
59 		static if (is(typeof(R.init.row)) && is(typeof(R.init.col)))
60 			return format("%s, but got %s at %s, %s", tmp, got, err_range[id].row, err_range[id].col);
61 		else
62 			return tmp~", but got "~got;
63 	}
64 	string toString() const {
65 		assert(e.length > 0);
66 		if (e.length == 1)
67 			return toString1(0);
68 		string ret = "Encoutered following errors (only need to solve one of them): \n";
69 		foreach(i; 0..e.length)
70 			ret ~= "\t"~toString1(i)~"\n";
71 		return ret;
72 	}
73 }
74 
75 /// Match a string, return the matched string
76 template token(string t) {
77 auto token(R)(in auto ref R i)
78 if (isForwardRange!R) {
79 	import std.algorithm.comparison;
80 	enum string[] expects = [t];
81 	static struct Token {
82 		bool empty;
83 		string front;
84 		Err!R err;
85 		R cont;
86 
87 		void popFront() {
88 			import std.range : take, drop, popFrontExactly;
89 			auto str = take(cont.save, t.length);
90 			if (equal(str, t)) {
91 				front = t;
92 				empty = false;
93 				cont.popFrontExactly(t.length);
94 				return;
95 			}
96 
97 			empty = true;
98 			err = Err!R(expects, false, cont.save);
99 		}
100 
101 		this(R i) {
102 			cont = i;
103 			popFront;
104 		}
105 	}
106 	return Token(i);
107 }}
108 
109 /// Match any character in accept
110 template ch(alias accept) if (is(ElementType!(typeof(accept)))) {
111 auto ch(R)(in auto ref R i) {
112 	static const(string[])[] expects = [accept.map!"[a]".array];
113 	static bool[] inv = [false];
114 	alias V = aliasSeqOf!accept;
115 	struct Ch {
116 		bool empty;
117 		ElementType!R front;
118 		Err!R err;
119 		R cont;
120 
121 		this(R i) {
122 			cont = i;
123 			popFront;
124 		}
125 
126 		void popFront() {
127 			if (cont.empty) {
128 				empty = true;
129 				err = Err!R(expects, inv, [cont]);
130 				return;
131 			}
132 
133 			auto u = cont.front;
134 			o:switch(u) {
135 				// static foreach magic
136 				foreach(v; V) {
137 				case v:
138 					empty = false;
139 					front = u;
140 					cont.popFront;
141 					break o;
142 				}
143 				default:
144 					empty = true;
145 					err = Err!R(expects, inv, [cont]);
146 					break;
147 			}
148 		}
149 	}
150 	return Ch(i);
151 }}
152 
153 /// Match any character except those in reject
154 version(legacy)
155 struct not_ch(alias reject) if (is(ElementType!(typeof(reject)))) {
156 	alias Char = ElementType!(typeof(reject));
157 	enum string[] e = reject.map!"[a]".array;
158 	static auto opCall(R, alias Allocator = GCAllocator.instance)(in auto ref R i)
159 	if (isForwardRange!R && is(typeof(Char.init == ElementType!R.init))) {
160 		alias RT = Result!(R, Unqual!(ElementType!R), Err!R);
161 		alias V = aliasSeqOf!reject;
162 		if (i.empty)
163 			return RT(Err!R(e, true, i));
164 
165 		auto u = i.front;
166 		auto retr = i.save;
167 		switch(u) {
168 			// static foreach magic
169 			foreach(v; V) {
170 			case v:
171 				return RT(Err!R(e, true, i));
172 			}
173 			default:
174 				retr.popFront;
175 				return RT(retr, u);
176 		}
177 	}
178 }
179 
180 /// Parse a sequences of digits, return an array of number
181 template digit(string _digits) {
182 	import std.string : indexOf;
183 	alias digit = pmap!(ch!_digits, ch => cast(int)_digits.indexOf(ch));
184 }
185 
186 immutable string lower = "qwertyuiopasdfghjklzxcvbnm";
187 immutable string upper = "QWERTYUIOPASDFGHJKLZXCVBNM";
188 immutable string alphabet = lower ~ upper;
189 immutable string digits = "0123456789";
190 
191 /**
192   Parse a number
193   Params:
194 	accept = digits allowed in the number, i-th character corresponds to digit i
195 	base = base
196 */
197 template number(string accept = digits, int base = 10) if (accept.length == base) {
198 	import std.algorithm.iteration;
199 	alias number = pfold!(digit!accept, (a, b) => a*base+b, 0);
200 }
201 
202 ///
203 version(legacy)
204 unittest {
205 	auto i = "12354";
206 	auto rx = number!()(i);
207 	assert(rx.ok);
208 	assert(rx.v == 12354);
209 
210 	i = "ffabc";
211 	auto rx1 = number!(digits~"abcdef", 16)(i);
212 	assert(rx1.ok);
213 	assert(rx1.v == 1047228);
214 }
215 
216 /**
217   Parse a sequence of characters
218   Params:
219 	accept = an array of acceptable characters
220 */
221 version(legacy)
222 alias word(alias accept = alphabet) = many!(ch!accept);
223 
224 /// Parse an identifier, starts with a letter or _, followed by letter, _, or digits
225 version(legacy)
226 auto identifier(R)(in auto ref R i)
227 if (isForwardRange!R) {
228 	auto ret = ch!(alphabet~"_")(i);
229 	alias RT = Result!(R, ElementType!R[], Err!R);
230 	if (!ret.ok)
231 		return RT(ret.err);
232 	auto ret2 = word!(alphabet~"_"~digits)(ret.cont);
233 	ElementType!R[] str = [ret.v];
234 	if (ret2.ok) {
235 		str ~= array(ret2.v[]);
236 		return RT(ret2.cont, str);
237 	}
238 	return RT(ret.cont, str);
239 }
240 
241 ///
242 version(legacy)
243 unittest {
244 	auto i = "_asd1234a";
245 	auto rx2 = identifier(i);
246 	assert(rx2.ok);
247 	assert(!rx2.cont.length);
248 	assert(rx2.v == "_asd1234a");
249 }
250 
251 /// Parse escaped character, \n, \r, \b, \" and \\
252 version(legacy)
253 auto parse_escape1(R)(in auto ref R i)
254 if (isForwardRange!R) {
255 	alias RT = Result!(R, dchar, Err!R);
256 	auto r = seq!(
257 		discard!(token!"\\"),
258 		ch!"nbr\"\\")(i);
259 	if (!r.ok)
260 		return RT(r.err);
261 	dchar res;
262 	final switch(r.v[1]) {
263 	case 'n':
264 		res = '\n';
265 		break;
266 	case 'b':
267 		res = '\b';
268 		break;
269 	case 'r':
270 		res = '\r';
271 		break;
272 	case '"':
273 		res = '\"';
274 		break;
275 	case '\\':
276 		res = '\\';
277 		break;
278 	}
279 	return RT(r.cont, res);
280 }
281 
282 /// Parse a string enclosed by a pair of quotes, and containing escape sequence
283 version(legacy)
284 auto parse_string(R)(in auto ref R i)
285 if (isForwardRange!R) {
286 	alias RT = Result!(R, dchar[], Err!R);
287 	auto r = between!(token!"\"",
288 		many!(choice!(
289 			parse_escape1,
290 			not_ch!"\""
291 		)),
292 	token!"\"")(i);
293 	if (!r.ok)
294 		return RT(r.err);
295 	return RT(r.cont, array(r.v[]));
296 }
297 
298 ///
299 version(legacy)
300 unittest {
301 	auto i = "\"asdf\\n\\b\\\"\"";
302 	auto r = parse_string(i);
303 	import std.format;
304 	assert(r.ok);
305 	assert(r.v == "asdf\n\b\"", format("%s", r.v));
306 }
307 
308 /// Skip white spaces
309 version(legacy)
310 alias whitespace = pipe!(choice!(token!" ", token!"\n", token!"\t"));
311 version(legacy)
312 alias ws(alias func) = pipe!(seq!(func, skip!whitespace), wrap!"a[0]");
313 
314 version(legacy)
315 template token_ws(string t) {
316 	auto token_ws(R)(in auto ref R i)
317 	if (isForwardRange!R) {
318 		return i.ws!(token!t);
319 	}
320 }
321 
322 ///
323 version(legacy)
324 unittest {
325 	const(char)[] i = " \n\t    ";
326 	auto r = skip!whitespace(i);
327 	assert(r.ok);
328 	assert(!r.cont.length);
329 }