1: <?php
2:
3:
4:
5:
6: namespace Parco\Combinator;
7:
8: use Parco\Parser;
9: use Parco\FuncParser;
10: use Parco\Success;
11: use Parco\Failure;
12: use Parco\Match;
13:
14: 15: 16: 17: 18: 19:
20: trait RegexParsers
21: {
22: use Parsers;
23:
24: 25: 26: 27: 28: 29:
30: protected $skipWhitespace = true;
31:
32: 33: 34:
35: protected function atEnd($input)
36: {
37: return count($input) == 0;
38: }
39:
40: 41: 42:
43: protected function head($input)
44: {
45: return $input[0];
46: }
47:
48: 49: 50:
51: protected function tail($input, array $pos)
52: {
53: $head = $input[0];
54: $tail = array_slice($input, 1);
55: if (! isset($tail[0])) {
56: $pos = array(-1, -1);
57: } elseif ($head === "\n") {
58: $pos[0]++;
59: $pos[1] = 1;
60: } else {
61: $pos[1]++;
62: }
63: return array($tail, $pos);
64: }
65:
66: 67: 68:
69: protected function show($element)
70: {
71: return '"' . $element . '"';
72: }
73:
74: 75: 76: 77: 78: 79: 80: 81: 82:
83: public function parse(Parser $p, $string)
84: {
85: $input = preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY);
86: return $p->parse($input, array(1, 1));
87: }
88:
89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100:
101: public function parseAll(Parser $p, $string)
102: {
103: if ($this->skipWhitespace) {
104: $p = $p->seqL($this->whitespace());
105: }
106: $p = $this->phrase($p);
107: return $this->parse($p, $string);
108: }
109:
110: 111: 112: 113: 114: 115: 116: 117: 118:
119: public function whitespace()
120: {
121: if (! isset($this->parserCache['@ws'])) {
122: $this->parserCache['@ws'] = new FuncParser(function ($input, array $pos) {
123: $i = 0;
124: $nextPos = $pos;
125: while (true) {
126: if (! isset($input[$i])) {
127: return new Success(null, $pos, array(), array(-1, -1));
128: }
129: switch ($input[$i]) {
130: case "\x0A":
131: $nextPos[0]++;
132: $nextPos[1] = 1;
133: break;
134: case "\x09":
135: case "\x0B":
136: case "\x0C":
137: case "\x0D":
138: case "\x20":
139: $nextPos[1]++;
140: break;
141: default:
142: $input = array_slice($input, $i);
143: if (! count($input)) {
144: $nextPos = array(-1, -1);
145: }
146: return new Success(null, $pos, $input, $nextPos);
147: }
148: $i++;
149: }
150: });
151: }
152: return $this->parserCache['@ws'];
153: }
154:
155: 156: 157: 158: 159: 160: 161:
162: public function noSkip(Parser $p)
163: {
164: return new FuncParser(function ($input, array $pos) use ($p) {
165: $skip = $this->skipWhitespace;
166: $this->skipWhitespace = false;
167: $r = $p->parse($input, $pos);
168: $this->skipWhitespace = $skip;
169: return $r;
170: });
171: }
172:
173: 174: 175: 176: 177: 178: 179: 180: 181: 182:
183: public function char($c)
184: {
185: return new FuncParser(function ($input, array $pos) use ($c) {
186: if ($this->skipWhitespace) {
187: $r = $this->whitespace()->parse($input, $pos);
188: $input = $r->nextInput;
189: $pos = $r->nextPos;
190: }
191: if ($this->atEnd($input)) {
192: return new Failure(
193: 'unexpected end of input, expected ' . $this->show($c),
194: $pos,
195: $input,
196: $pos
197: );
198: }
199: $head = $this->head($input);
200: if ($head !== $c) {
201: return new Failure(
202: 'unexpected ' . $this->show($head) . ', expected ' . $this->show($c),
203: $pos,
204: $input,
205: $pos
206: );
207: }
208: list($input, $nextPos) = $this->tail($input, $pos);
209: return new Success($c, $pos, $input, $nextPos);
210: });
211: }
212:
213: 214: 215: 216: 217: 218: 219: 220: 221: 222:
223: public function string($s)
224: {
225: return new FuncParser(function ($input, array $pos) use ($s) {
226: if ($this->skipWhitespace) {
227: $r = $this->whitespace()->parse($input, $pos);
228: $input = $r->nextInput;
229: $pos = $r->nextPos;
230: }
231: $length = strlen($s);
232: $nextPos = $pos;
233: for ($i = 0; $i < $length; $i++) {
234: if (! isset($input[$i])) {
235: return new Failure(
236: 'unexpected end of input, expected ' . $this->show($s[$i]),
237: $pos,
238: $input,
239: $pos
240: );
241: }
242: if ($input[$i] !== $s[$i]) {
243: return new Failure(
244: 'unexpected ' . $this->show($input[$i]) . ', expected ' . $this->show($s[$i]),
245: $pos,
246: $input,
247: $pos
248: );
249: }
250: if ($input[$i] === "\n") {
251: $nextPos[0]++;
252: $nextPos[1] = 1;
253: } else {
254: $nextPos[1]++;
255: }
256: }
257: $input = array_slice($input, $length);
258: if (! count($input)) {
259: $nextPos = array(-1, -1);
260: }
261: return new Success($s, $pos, $input, $nextPos);
262: });
263: }
264:
265: 266: 267: 268: 269: 270: 271: 272: 273:
274: public function regex($regex)
275: {
276: return new FuncParser(function ($input, array $pos) use ($regex) {
277: if ($this->skipWhitespace) {
278: $r = $this->whitespace()->parse($input, $pos);
279: $input = $r->nextInput;
280: $pos = $r->nextPos;
281: }
282: $ret = preg_match($regex, implode('', $input), $matches, PREG_OFFSET_CAPTURE);
283: if ($ret !== 1 or $matches[0][1] !== 0) {
284: if (! count($input)) {
285: return new Failure('unexpected end of input', $pos, $input, $pos);
286: }
287: return new Failure('unexpected ' . $this->show($input[0]), $pos, $input, $pos);
288: }
289: $length = strlen($matches[0][0]);
290: $input = array_slice($input, $length);
291: $nextPos = $pos;
292: $nextPos[1] += $length;
293: if (! count($input)) {
294: $nextPos = array(-1, -1);
295: }
296: return new Match($matches, $pos, $input, $nextPos);
297: });
298: }
299:
300: 301: 302: 303: 304: 305: 306: 307: 308: 309: 310:
311: public function group($i, Parser $p)
312: {
313: return new FuncParser(function ($input, array $pos) use ($i, $p) {
314: $r = $p->parse($input, $pos);
315: if (! $r->successful) {
316: return $r;
317: }
318: $group = $r->group($i);
319: $offset = $r->offset($i);
320: if (isset($offset)) {
321: $pos[1] += $offset;
322: }
323: if (! count($input)) {
324: $nextPos = array(-1, -1);
325: }
326: return new Success($group, $pos, $r->nextInput, $r->nextPos);
327: });
328: }
329: }
330: