1 package net.avcompris.commons3.yaml;
2
3 import static com.google.common.base.Preconditions.checkNotNull;
4 import static com.google.common.collect.Lists.newArrayList;
5 import static org.apache.commons.lang3.StringUtils.isBlank;
6
7 import java.io.IOException;
8 import java.io.Reader;
9 import java.util.List;
10
11 import javax.annotation.Nullable;
12
13 import org.apache.commons.lang3.NotImplementedException;
14
15 final class YamlReader {
16
17 private final Reader reader;
18
19 private int currentLineNumber = 1;
20 private int currentColumnNumber = 1;
21
22 private int currentTokenLineNumber = 1;
23 private int currentTokenColumnNumber = 1;
24
25 public int getCurrentLineNumber() {
26
27 return currentLineNumber;
28 }
29
30 public int getCurrentColumnNumber() {
31
32 return currentColumnNumber;
33 }
34
35 public int getCurrentTokenLineNumber() {
36
37 return currentTokenLineNumber;
38 }
39
40 public int getCurrentTokenColumnNumber() {
41
42 return currentTokenColumnNumber;
43 }
44
45 public YamlReader(
46 final Reader reader
47 ) {
48
49 this.reader = checkNotNull(reader, "reader");
50 }
51
52 private final List<Character> nextC = newArrayList();
53
54 private int read() throws IOException {
55
56 if (!nextC.isEmpty()) {
57
58 final int c = nextC.get(0);
59
60 nextC.remove(0);
61
62 if (c == -1) {
63
64 isEof = true;
65 }
66
67 return c;
68
69 } else {
70
71 final int n = reader.read();
72
73 if (n == -1) {
74
75 isEof = true;
76 }
77
78 return n;
79 }
80 }
81
82 private Token nextToken = null;
83
84 public static final class Token {
85
86 public static final Token OPEN_CURLY_BRACKET = new Token("{", false);
87 public static final Token CLOSE_CURLY_BRACKET = new Token("}", false);
88 public static final Token OPEN_SQUARE_BRACKET = new Token("[", false);
89 public static final Token CLOSE_SQUARE_BRACKET = new Token("]", false);
90 public static final Token HYPHEN = new Token("-", false);
91 public static final Token COLON = new Token(":", false);
92 public static final Token COMMA = new Token(",", false);
93 public static final Token LINEBREAK = new Token("\n", false);
94
95 public static final Token get(
96 final char c
97 ) {
98
99 if (c == ':') {
100
101 return COLON;
102
103 } else if (c == ',') {
104
105 return COMMA;
106
107 } else if (c == '}') {
108
109 return CLOSE_CURLY_BRACKET;
110
111 } else if (c == ']') {
112
113 return CLOSE_SQUARE_BRACKET;
114
115 } else {
116
117 throw new IllegalArgumentException("c: " + c + " (" + ((int) c) + ")");
118 }
119 }
120
121 public final String s;
122 public final boolean isQuoted;
123
124 private Token(
125 final String s,
126 final boolean isQuoted
127 ) {
128
129 this.s = checkNotNull(s, "s");
130 this.isQuoted = isQuoted;
131 }
132
133 @Override
134 public String toString() {
135
136 if (this == OPEN_CURLY_BRACKET) {
137
138 return "OPEN_CURLY_BRACKET[{]";
139
140 } else if (this == CLOSE_CURLY_BRACKET) {
141
142 return "CLOSE_CURLY_BRACKET[}]";
143
144 } else if (this == OPEN_SQUARE_BRACKET) {
145
146 return "OPEN_SQUARE_BRACKET<[>";
147
148 } else if (this == CLOSE_SQUARE_BRACKET) {
149
150 return "CLOSE_SQUARE_BRACKET<]>";
151
152 } else if (this == COLON) {
153
154 return "COLON[:]";
155
156 } else if (this == COMMA) {
157
158 return "COMMA[,]";
159
160 } else if (this == HYPHEN) {
161
162 return "HYPHEN[-]";
163
164 } else if (this == LINEBREAK) {
165
166 return "LINEBREAK";
167
168 } else {
169
170 return s;
171 }
172 }
173
174 @Override
175 public boolean equals(
176 @Nullable final Object o
177 ) {
178
179 if (o == null || !(o instanceof Token)) {
180
181 return false;
182 }
183
184 return s.equals(((Token) o).s);
185 }
186
187 @Override
188 public int hashCode() {
189
190 return s.hashCode();
191 }
192 }
193
194 @Nullable
195 public Token readNextToken(
196 final boolean breakAtComma
197 ) throws IOException {
198
199 if (nextToken == null) {
200
201 return readToken(breakAtComma);
202
203 } else {
204
205 final Token token = nextToken;
206
207 nextToken = null;
208
209 return token;
210 }
211 }
212
213 private boolean isEof = false;
214
215 private int currentIndent = -1;
216
217 public int readIndent() throws IOException {
218
219 if (!isEof) {
220
221 if (currentColumnNumber != 1) {
222
223 return currentIndent;
224 }
225
226
227
228 }
229
230 int indent = 0;
231
232 loop: while (true) {
233
234 final int n = read();
235
236 if (n == -1) {
237
238 currentIndent = -1;
239
240 return currentIndent;
241 }
242
243 final char c = (char) n;
244
245 if (c == '\t') {
246
247 throw new YamlReaderException(
248 currentLineNumber,
249 currentColumnNumber,
250 "Illegal char: " + c + " (" + ((int) c) + ")");
251 }
252
253 ++currentColumnNumber;
254
255 if (c == ' ') {
256
257 ++indent;
258
259 continue;
260
261 } else if (c == '\n') {
262
263 ++currentLineNumber;
264 currentColumnNumber = 1;
265
266 indent = 0;
267
268 continue;
269
270 } else if (c == '\r') {
271
272 throw new NotImplementedException("");
273
274 } else if (c == '#') {
275
276 while (true) {
277
278 final int n2 = read();
279
280 if (n2 == -1) {
281
282 return -1;
283 }
284
285 final char c2 = (char) n2;
286
287 if (c2 == '\n') {
288
289 ++currentLineNumber;
290 currentColumnNumber = 1;
291
292 indent = 0;
293
294 continue loop;
295
296 } else if (c == '\r') {
297
298 throw new NotImplementedException("");
299 }
300 }
301
302 } else {
303
304 nextC.add(c);
305
306 currentIndent = indent;
307
308 return currentIndent;
309 }
310 }
311 }
312
313 private StringBuilder reservedToken = null;
314
315 private void reserveToken(
316 final char c
317 ) {
318
319 if (reservedToken == null) {
320
321 reservedToken = new StringBuilder();
322 }
323
324 reservedToken.append(c);
325 }
326
327 @Nullable
328 private Token readToken(
329 final boolean breakAtComma
330 ) throws IOException {
331
332 reservedToken = null;
333
334 final StringBuilder sb = new StringBuilder();
335
336 currentTokenLineNumber = currentLineNumber;
337 currentTokenColumnNumber = currentColumnNumber;
338
339 while (true) {
340
341 final int n = read();
342
343 if (n == -1) {
344
345 if (sb.length() == 0) {
346
347 return null;
348
349 } else {
350
351 return new Token(sb.toString(), false);
352 }
353 }
354
355 final char c = (char) n;
356
357 if (c == '\t') {
358
359 throw new YamlReaderException(
360 currentLineNumber,
361 currentColumnNumber,
362 "Illegal char: " + c + " (" + ((int) c) + ")");
363 }
364
365 ++currentColumnNumber;
366
367 if (c == '\n') {
368
369 ++currentLineNumber;
370 currentColumnNumber = 1;
371
372 if (sb.length() == 0) {
373
374 currentTokenColumnNumber = currentColumnNumber;
375
376 return Token.LINEBREAK;
377
378 } else {
379
380 nextToken = Token.LINEBREAK;
381
382 break;
383 }
384
385 } else if (c == '\r') {
386
387 throw new NotImplementedException("");
388
389 } else if (c == '{') {
390
391 if (sb.length() == 0) {
392
393 return Token.OPEN_CURLY_BRACKET;
394
395 } else {
396
397 sb.append(c);
398 }
399
400 } else if (c == '-') {
401
402 if (sb.length() == 0) {
403
404 return Token.HYPHEN;
405 }
406
407 if (reservedToken != null) {
408
409 sb.append(reservedToken.toString());
410
411 reservedToken = null;
412 }
413
414 sb.append(c);
415
416 } else if (c == '[') {
417
418 if (sb.length() == 0) {
419
420 return Token.OPEN_SQUARE_BRACKET;
421
422
423
424
425
426
427
428 }
429
430 sb.append(c);
431
432 } else if (c == '#' && sb.isEmpty()) {
433
434 while (true) {
435
436 final int n2 = read();
437
438 if (n2 == -1) {
439
440 return null;
441 }
442
443 final char c2 = (char) n2;
444
445 if (c2 == '\n') {
446
447 ++currentLineNumber;
448 currentColumnNumber = 1;
449
450 if (sb.length() == 0) {
451
452 currentTokenColumnNumber = currentColumnNumber;
453
454 return Token.LINEBREAK;
455
456
457
458 } else {
459
460 return new Token(sb.toString(), false);
461 }
462
463 } else if (c2 == '\r') {
464
465 throw new NotImplementedException("");
466 }
467 }
468
469 } else if (c == ':') {
470
471 if (sb.length() == 0) {
472
473 return Token.get(c);
474 }
475
476 final int n2 = read();
477
478 if (n2 == -1) {
479 throw new NotImplementedException("n2 == -1");
480 }
481
482 final char nextC = (char) n2;
483
484 if (nextC == ' ' || nextC == '\n' || nextC == '\r' || nextC == '{') {
485
486 this.nextC.add(c);
487 this.nextC.add(nextC);
488
489 return new Token(sb.toString(), false);
490
491 } else {
492
493
494
495 sb.append(c).append(nextC);
496 }
497
498 } else if (c == ',' && breakAtComma) {
499
500 if (sb.length() == 0) {
501
502 return Token.get(c);
503
504 } else {
505
506 nextC.add(c);
507
508 return new Token(sb.toString(), false);
509
510
511
512
513
514
515
516
517
518 }
519
520 } else if (c == '}' || c == ']') {
521
522 if (sb.length() == 0) {
523
524 return Token.get(c);
525
526 } else if (previousIs(' ')) {
527
528 nextC.add(c);
529
530 return new Token(sb.toString(), false);
531
532 } else {
533
534 if (reservedToken != null) {
535
536 sb.append(reservedToken.toString());
537
538 reservedToken = null;
539 }
540
541 sb.append(c);
542 }
543
544 } else if (c == ' ') {
545
546 if (sb.length() == 0) {
547
548 currentTokenColumnNumber = currentColumnNumber;
549
550 continue;
551
552 } else {
553
554 reserveToken(' ');
555
556 continue;
557 }
558
559 } else if (c == '\'' && sb.length() == 0) {
560
561 final int currentLineNumber0 = currentLineNumber;
562 final int currentColumnNumber0 = currentColumnNumber;
563
564 while (true) {
565
566 final int n2 = read();
567
568 if (n2 == -1) {
569
570 throw new YamlReaderException(
571 currentLineNumber0,
572 currentColumnNumber0,
573 "Unterminated quoted string");
574 }
575
576 ++currentColumnNumber;
577
578 final char c2 = (char) n2;
579
580 if (c2 == '\'') {
581
582 boolean quoted = true;
583
584 final StringBuilder trailing = new StringBuilder();
585
586 while (true) {
587
588 final int n3 = read();
589
590 if (n3 == ' ') {
591
592 if (!isBlank(trailing)) {
593
594 if (quoted) {
595 sb.insert(0, '\'');
596 sb.append('\'');
597 quoted = false;
598 }
599
600 sb.append(trailing);
601
602 trailing.setLength(0);
603 }
604 }
605
606 if (n3 == -1 || n3 == '\n' || n3 == '\r' || n3 == '}') {
607
608 if (n3 != -1) {
609
610 nextC.add((char) n3);
611 }
612
613 if (!isBlank(trailing)) {
614
615 if (quoted) {
616 sb.insert(0, '\'');
617 sb.append('\'');
618 quoted = false;
619 }
620
621 sb.append(trailing);
622 }
623
624 break;
625 }
626
627 trailing.append((char) n3);
628 }
629
630 return new Token(sb.toString(), quoted);
631
632 } else if (c2 == '\\') {
633
634 final int n3 = read();
635
636 if (n3 == -1) {
637
638 throw new YamlReaderException(
639 currentLineNumber0,
640 currentColumnNumber0,
641 "Unterminated quoted string");
642 }
643
644 final char c3 = (char) n3;
645
646 if (c3 == '\'') {
647
648 sb.append('\'');
649
650 } else if (c3 == '\\') {
651
652 sb.append('\\');
653
654 } else {
655
656 throw new YamlReaderException(
657 currentLineNumber,
658 currentColumnNumber,
659 "Illegal escape code: \"" + c2 + c3 + "\"");
660 }
661
662 ++currentColumnNumber;
663
664 continue;
665 }
666
667 sb.append(c2);
668 }
669
670 } else if (c == '"' && sb.length() == 0) {
671
672 final int currentLineNumber0 = currentLineNumber;
673 final int currentColumnNumber0 = currentColumnNumber;
674
675 while (true) {
676
677 final int n2 = read();
678
679 if (n2 == -1) {
680
681 throw new YamlReaderException(
682 currentLineNumber0,
683 currentColumnNumber0,
684 "Unterminated quoted string");
685 }
686
687 ++currentColumnNumber;
688
689 final char c2 = (char) n2;
690
691 if (c2 == '"') {
692
693 return new Token(sb.toString(), true);
694
695 } else if (c2 == '\\') {
696
697 final int n3 = read();
698
699 if (n3 == -1) {
700
701 throw new YamlReaderException(
702 currentLineNumber0,
703 currentColumnNumber0,
704 "Unterminated quoted string");
705 }
706
707 final char c3 = (char) n3;
708
709 if (c3 == '"') {
710
711 sb.append('"');
712
713 } else if (c3 == '\\') {
714
715 sb.append('\\');
716
717 } else {
718
719 throw new YamlReaderException(
720 currentLineNumber,
721 currentColumnNumber,
722 "Illegal escape code: \"" + c2 + c3 + "\"");
723 }
724
725 ++currentColumnNumber;
726
727 continue;
728 }
729
730 sb.append(c2);
731 }
732
733 } else {
734
735 if (reservedToken != null) {
736
737 sb.append(reservedToken.toString());
738
739 reservedToken = null;
740 }
741
742 sb.append(c);
743 }
744 }
745
746 return new Token(sb.toString(), false);
747 }
748
749 private boolean previousIs(
750 final char c
751 ) {
752
753 if (reservedToken != null && !reservedToken.isEmpty()) {
754
755 final int l = reservedToken.length();
756
757 return reservedToken.substring(l - 1, l).charAt(0) == c;
758 }
759
760 return nextC != null && !nextC.isEmpty() && nextC.get(nextC.size() - 1) == c;
761 }
762
763 @Nullable
764 public Token peekNextToken(
765 final boolean inArray
766 ) throws IOException {
767
768 if (nextToken != null) {
769
770 return nextToken;
771
772 } else {
773
774 nextToken = readToken(inArray);
775
776 return nextToken;
777 }
778 }
779 }