1: <?php
2: // Parco
3: // Copyright (c) 2015 Niels Sonnich Poulsen (http://nielssp.dk)
4: // Licensed under the MIT license.
5: // See the LICENSE file or http://opensource.org/licenses/MIT for more information.
6: namespace Parco\Combinator;
7:
8: use Parco\Parser;
9: use Parco\FuncParser;
10: use Parco\Success;
11: use Parco\Failure;
12: use Parco\Positional;
13: use Parco\Result;
14:
15: /**
16: * A collection of generic parser combinators.
17: *
18: * A user of this trait should define an input sequence type by implementing
19: * the {@see atEnd}, {@see head}, {@see tail}, and {@see show} methods.
20: */
21: trait Parsers
22: {
23:
24: /**
25: * @var Parser[]
26: */
27: private $parserCache = array();
28:
29: /**
30: * Whether the given input sequence is empty.
31: *
32: * @param mixed $input
33: * The input sequence.
34: * @return bool True if empty, false otherwise.
35: */
36: abstract protected function atEnd($input);
37:
38: /**
39: * Get the first element of the given input sequence.
40: *
41: * @param mixed $input
42: * The input sequence
43: * @return mixed The first element of the input sequence.
44: */
45: abstract protected function head($input);
46:
47: /**
48: * Remove the first element of the given input sequence and advance the
49: * position counter.
50: *
51: * @param mixed $input
52: * The input sequence.
53: * @param array $pos
54: * Current position as a 2-element array consisting of a line
55: * number and a column number. See {@see Positional}.
56: * @return array A two-element array consisting of the remaining input
57: * sequence and the position of the first element in the remaining
58: * input sequence. If the remaining input sequence is empty, the
59: * position returned should be `array(-1, -1)`. See
60: * {@see Positional}.
61: */
62: abstract protected function tail($input, array $pos);
63:
64: /**
65: * Convert an input sequence element to a string for use in failure
66: * messages.
67: *
68: * @param mixed $element
69: * An input sequence element.
70: * @return string String representation of element.
71: */
72: abstract protected function show($element);
73:
74: /**
75: * Lazily fetch a parser.
76: *
77: * @param string $name
78: * Name of parser method. Method must have zero parameters.
79: * @return Parser A lazy parser.
80: */
81: public function __get($name)
82: {
83: return new FuncParser(function ($input, array $pos) use ($name) {
84: if (! isset($this->parserCache[$name])) {
85: $this->parserCache[$name] = $this->$name();
86: }
87: return $this->parserCache[$name]->parse($input, $pos);
88: });
89: }
90:
91: /**
92: * A parser that accepts only the given element.
93: *
94: * `elem($e)` is a parser that succeeds if the first element in the input
95: * is equal to `$e`.
96: *
97: * @param mixed $e
98: * An element.
99: * @param bool $strict
100: * If true, `===` will be used for comparison instead of `==`.
101: * @return Parser An element parser.
102: */
103: public function elem($e, $strict = true)
104: {
105: return new FuncParser(function ($input, array $pos) use ($e, $strict) {
106: if ($this->atEnd($input)) {
107: return new Failure(
108: 'unexpected end of input, expected ' . $this->show($e),
109: $pos,
110: $input,
111: $pos
112: );
113: }
114: $head = $this->head($input);
115: $eq = $strict ? $head === $e : $head == $e;
116: if (! $eq) {
117: return new Failure(
118: 'unexpected ' . $this->show($head) . ', expected ' . $this->show($e),
119: $pos,
120: $input,
121: $pos
122: );
123: }
124: list($input, $nextPos) = $this->tail($input, $pos);
125: return new Success($e, $pos, $input, $nextPos);
126: });
127: }
128:
129: /**
130: * A parser that accepts input using a predicate.
131: *
132: * `acceptIf($f)` is a parser that succeeds if `$f($x) = true`, where `$x`
133: * is the first element in the input. The second parameter is optional and
134: * can be used to customize the failure message.
135: *
136: * @param callable $predicate
137: * A predicate function.
138: * @param callable|null $failure
139: * A function that converts a failing input to a failure message.
140: * @return Parser A parser.
141: */
142: public function acceptIf(callable $predicate, $failure = null)
143: {
144: return new FuncParser(function ($input, array $pos) use ($predicate, $failure) {
145: if ($this->atEnd($input)) {
146: return new Failure('unexpected end of input', $pos, $input, $pos);
147: }
148: $head = $this->head($input);
149: if (! call_user_func($predicate, $head)) {
150: if (isset($failure)) {
151: $message = call_user_func($failure, $head);
152: } else {
153: $message = 'unexpected ' . $this->show($head);
154: }
155: return new Failure($message, $pos, $input, $pos);
156: }
157: list($input, $nextPos) = $this->tail($input, $pos);
158: return new Success($head, $pos, $input, $nextPos);
159: });
160: }
161:
162: /**
163: * A parser that parses the entire input.
164: *
165: * `phrase($p)` is a parser that succeeds if `$p` succeeds and no input
166: * remains.
167: *
168: * @param Parser $p
169: * A parser.
170: * @return Parser A phrase parser.
171: */
172: public function phrase(Parser $p)
173: {
174: return new FuncParser(function ($input, array $pos) use ($p) {
175: $r = $p->parse($input, $pos);
176: if (! $r->successful) {
177: return $r;
178: }
179: if (! $this->atEnd($r->nextInput)) {
180: return new Failure(
181: 'unexpected ' . $this->show($this->head($r->nextInput)) . ', expected end of input',
182: $r->nextPos,
183: $r->nextInput,
184: $r->nextPos
185: );
186: }
187: return $r;
188: });
189: }
190:
191: /**
192: * Add position to result of parser.
193: *
194: * `positioned($p)` adds the position of the first input to the result of
195: * `$p`. The result must implement {@see Positional}.
196: *
197: * @param Parser $p
198: * A parser.
199: * @return Parser A positioned parser.
200: */
201: public function positioned(Parser $p)
202: {
203: return $p->positioned();
204: }
205:
206: /**
207: * Optional parser.
208: *
209: * `opt($p)` is a parser that always succeeds and returns `$x` if `$p`
210: * returns `$x` and `null` if `$p` fails.
211: *
212: * @param Parser $p
213: * A parser.
214: * @return Parser An optional parser.
215: */
216: public function opt(Parser $p)
217: {
218: return new FuncParser(function ($input, array $pos) use ($p) {
219: $r = $p->parse($input, $pos);
220: if ($r->successful) {
221: return $r;
222: }
223: return new Success(null, $pos, $input, $pos);
224: });
225: }
226:
227: /**
228: * Negating parser.
229: *
230: * `not($p)` is a parser that fails if `$p` succeeds and succeeds if `$p`
231: * fails. It never consumes any input.
232: *
233: * @param Parser $p
234: * A parser.
235: * @return Parser A negating parser.
236: */
237: public function not(Parser $p)
238: {
239: return new FuncParser(function ($input, array $pos) use ($p) {
240: $r = $p->parse($input, $pos);
241: if ($r->successful) {
242: return new Failure(null, $pos, $input, $pos);
243: }
244: return new Success(null, $pos, $input, $pos);
245: });
246: }
247:
248: /**
249: * Repetition parser.
250: *
251: * `rep($p)` is a parser that repeatedly uses `$p` to parse the input until
252: * `$p` fails. The result is an array of all results.
253: *
254: * @param Parser $p
255: * A parser.
256: * @return Parser A repetition parser.
257: */
258: public function rep(Parser $p)
259: {
260: return new FuncParser(function ($input, array $pos) use ($p) {
261: $list = array();
262: $nextPos = $pos;
263: while (true) {
264: $r = $p->parse($input, $nextPos);
265: if (! $r->successful) {
266: break;
267: }
268: $list[] = $r->result;
269: $input = $r->nextInput;
270: $nextPos = $r->nextPos;
271: }
272: return new Success($list, $pos, $input, $nextPos);
273: });
274: }
275:
276: /**
277: * Interleaved repetition parser.
278: *
279: * `repsep($p, $sep)` is a parser that repeatedly uses `$p` interleaved with
280: * `$sep` to parse the input until `$p` fails. The result is an array of all
281: * results of `$p`.
282: *
283: * @param Parser $p
284: * A parser.
285: * @param Parser $sep
286: * A parser that parses the elements that separate the elements
287: * parsed by `$p`.
288: * @return Parser A repetition parser.
289: */
290: public function repsep(Parser $p, Parser $sep)
291: {
292: return new FuncParser(function ($input, array $pos) use ($p, $sep) {
293: $list = array();
294: $r = $p->parse($input, $pos);
295: if (! $r->successful) {
296: return new Success($list, $pos, $input, $pos);
297: }
298: $list[] = $r->result;
299: $input = $r->nextInput;
300: $nextPos = $r->nextPos;
301: while (true) {
302: $s = $sep->parse($input, $nextPos);
303: if (! $s->successful) {
304: break;
305: }
306: $r = $p->parse($s->nextInput, $s->nextPos);
307: if (! $r->successful) {
308: return $r;
309: }
310: $list[] = $r->result;
311: $input = $r->nextInput;
312: $nextPos = $r->nextPos;
313: }
314: return new Success($list, $pos, $input, $nextPos);
315: });
316: }
317:
318: /**
319: * Non-empty repetition parser.
320: *
321: * `rep1($p)` is a parser that repeatedly uses `$p` to parse the input until
322: * `$p` fails. It fails if the first use of `$p` fails. The result is an
323: * array of all results.
324: *
325: * @param Parser $p
326: * A parser.
327: * @return Parser A repetition parser.
328: */
329: public function rep1(Parser $p)
330: {
331: return new FuncParser(function ($input, array $pos) use ($p) {
332: $list = array();
333: $nextPos = $pos;
334: do {
335: $r = $p->parse($input, $nextPos);
336: if (! $r->successful) {
337: if (! count($list)) {
338: return $r;
339: }
340: break;
341: }
342: $list[] = $r->result;
343: $input = $r->nextInput;
344: $nextPos = $r->nextPos;
345: } while (true);
346: return new Success($list, $pos, $input, $nextPos);
347: });
348: }
349:
350: /**
351: * Non-empty interleaved repetition parser.
352: *
353: * `repsep($p, $sep)` is a parser that repeatedly uses `$p` interleaved with
354: * `$sep` to parse the input until `$p` fails. It fails if the first use of
355: * `$p` fails. The result is an array of all results of `$p`.
356: *
357: * @param Parser $p
358: * A parser.
359: * @param Parser $sep
360: * A parser that parses the elements that separate the elements
361: * parsed by `$p`.
362: * @return Parser A repetition parser.
363: */
364: public function rep1sep(Parser $p, Parser $sep)
365: {
366: return new FuncParser(function ($input, array $pos) use ($p, $sep) {
367: $list = array();
368: $r = $p->parse($input, $pos);
369: if (! $r->successful) {
370: return $r;
371: }
372: $list[] = $r->result;
373: $input = $r->nextInput;
374: $nextPos = $r->nextPos;
375: while (true) {
376: $s = $sep->parse($input, $nextPos);
377: if (! $s->successful) {
378: break;
379: }
380: $r = $p->parse($s->nextInput, $s->nextPos);
381: if (! $r->successful) {
382: return $r;
383: }
384: $list[] = $r->result;
385: $input = $r->nextInput;
386: $nextPos = $r->nextPos;
387: }
388: return new Success($list, $pos, $input, $nextPos);
389: });
390: }
391:
392: /**
393: * N-repetitions parser.
394: *
395: * `repN($n, $p)` is a parser that uses `$p` to parse the input exactly `$n`
396: * times. It fails if any of the uses of `$p` fails. The result is an array
397: * of all results.
398: *
399: * @param int $num
400: * Number of repetitions.
401: * @param Parser $p
402: * A parser.
403: * @return Parser A repetition parser.
404: */
405: public function repN($num, Parser $p)
406: {
407: return new FuncParser(function ($input, array $pos) use ($num, $p) {
408: $list = array();
409: $nextPos = $pos;
410: for ($i = 0; $i < $num; $i++) {
411: $r = $p->parse($input, $nextPos);
412: if (! $r->successful) {
413: return $r;
414: }
415: $list[] = $r->result;
416: $input = $r->nextInput;
417: $nextPos = $r->nextPos;
418: }
419: return new Success($list, $pos, $input, $nextPos);
420: });
421: }
422:
423: /**
424: * Sequential composition of two or more parsers.
425: *
426: * `seq($p, $q)` is a parser that uses `$p` on the input followed by `$q`
427: * on the remaining input. The parser fails if either $p or $q fails. The
428: * result is an array of all results.
429: *
430: * Additional parameters are accepted such that:
431: * `seq($p, $q, $r) = seq($p, seq($q, $r))`.
432: *
433: * @param Parser $p
434: * First parser.
435: * @param Parser $q
436: * Second parser.
437: * @param Parser $r,...
438: * Any additional parsers.
439: * @return Parser A sequential composition of the input parsers.
440: */
441: public function seq(Parser $p, Parser $q)
442: {
443: $parsers = func_get_args();
444: return new FuncParser(function ($input, array $pos) use ($parsers) {
445: $list = array();
446: $nextPos = $pos;
447: foreach ($parsers as $p) {
448: $r = $p->parse($input, $nextPos);
449: if (! $r->successful) {
450: return $r;
451: }
452: $list[] = $r->result;
453: $input = $r->nextInput;
454: $nextPos = $r->nextPos;
455: }
456: return new Success($list, $pos, $input, $nextPos);
457: });
458: }
459:
460: /**
461: * Alternative composition of two or more parsers.
462: *
463: * `alt($p, $q)` is a parser that uses `$p` on the input and if `$p` fails
464: * uses `$q` on the same input. The parser fails if both $p or $q fail. The
465: * result is the result of the first parser that succeeded.
466: *
467: * An arbitrary number of additional parameters are accepted such that:
468: * `alt($p, $q, $r) = alt($p, alt($q, $r))`.
469: *
470: * @param Parser $p
471: * First parser.
472: * @param Parser $q
473: * Second parser.
474: * @param Parser $r,...
475: * Any additional parsers.
476: * @return Parser An alternative composition of the input parsers.
477: */
478: public function alt(Parser $p, Parser $q)
479: {
480: $parsers = func_get_args();
481: return new FuncParser(function ($input, array $pos) use ($parsers) {
482: $longest = null;
483: foreach ($parsers as $p) {
484: $r = $p->parse($input, $pos);
485: if ($r->successful) {
486: return $r;
487: } else {
488: if (! isset($longest) or Result::comparePositions($r->nextPos, $longest->nextPos) > 0) {
489: $longest = $r;
490: }
491: }
492: $input = $r->nextInput;
493: $pos = $r->nextPos;
494: }
495: return $longest;
496: });
497: }
498:
499: /**
500: * A parser that always succeeds.
501: *
502: * @param mixed $result
503: * Parse result.
504: * @return Parser A parser.
505: */
506: public function success($result)
507: {
508: return new FuncParser(function ($input, array $pos) use ($result) {
509: return new Success($result, $pos, $input, $pos);
510: });
511: }
512:
513: /**
514: * A parser that always fails.
515: *
516: * @param string $message
517: * Failure message.
518: * @return Parser A parser.
519: */
520: public function failure($message)
521: {
522: return new FuncParser(function ($input, array $pos) use ($message) {
523: return new Failure($message, $pos, $input, $pos);
524: });
525: }
526:
527: /**
528: * A parser for left-associative chaining.
529: *
530: * @param Parser $p
531: * A parser.
532: * @param Parser $sep
533: * A parser that parses the elements that separate the elements
534: * parsed by `$p` and returns a left-associative function that
535: * combines two elements returned by `$p`.
536: */
537: public function chainl(Parser $p, Parser $sep)
538: {
539: return new FuncParser(function ($input, array $pos) use ($p, $sep) {
540: $r = $p->parse($input, $pos);
541: if (! $r->successful) {
542: return $r;
543: }
544: $leftOperand = $r->result;
545: $input = $r->nextInput;
546: $nextPos = $r->nextPos;
547: while (true) {
548: $s = $sep->parse($input, $nextPos);
549: if (! $s->successful) {
550: break;
551: }
552: $r = $p->parse($s->nextInput, $s->nextPos);
553: if (! $r->successful) {
554: break;
555: }
556: $f = $s->result;
557: $rightOperand = $r->result;
558: $leftOperand = call_user_func($f, $leftOperand, $rightOperand);
559: $input = $r->nextInput;
560: $nextPos = $r->nextPos;
561: }
562: return new Success($leftOperand, $pos, $input, $nextPos);
563: });
564: }
565:
566: /**
567: * A parser for right-associative chaining.
568: *
569: * @param Parser $p
570: * A parser.
571: * @param Parser $sep
572: * A parser that parses the elements that separate the elements
573: * parsed by `$p` and returns a right-associative function that
574: * combines two elements returned by `$p`.
575: */
576: public function chainr(Parser $p, Parser $sep)
577: {
578: return new FuncParser(function ($input, array $pos) use ($p, $sep) {
579: $r = $p->parse($input, $pos);
580: if (! $r->successful) {
581: return $r;
582: }
583: $ops = array(array($r->result, null));
584: $input = $r->nextInput;
585: $nextPos = $r->nextPos;
586: while (true) {
587: $s = $sep->parse($input, $nextPos);
588: if (! $s->successful) {
589: break;
590: }
591: $r = $p->parse($s->nextInput, $s->nextPos);
592: if (! $r->successful) {
593: break;
594: }
595: $f = $s->result;
596: $ops[] = array($r->result, $f);
597: $input = $r->nextInput;
598: $nextPos = $r->nextPos;
599: }
600: $length = count($ops);
601: $rightOperand = $ops[$length - 1][0];
602: for ($i = $length - 2; $i >= 0; $i--) {
603: $f = $ops[$i + 1][1];
604: $leftOperand = $ops[$i][0];
605: $rightOpreand = call_user_func($f, $leftOperand, $rightOperand);
606: }
607: return new Success($rightOperand, $pos, $input, $nextPos);
608: });
609: }
610: }
611: