View Javadoc
1   package net.avcompris.commons3.yaml;
2   
3   import static com.google.common.base.Preconditions.checkNotNull;
4   import static com.google.common.collect.Lists.newArrayList;
5   import static org.apache.commons.lang3.StringUtils.isBlank;
6   
7   import java.io.IOException;
8   import java.io.Reader;
9   import java.util.List;
10  
11  import javax.annotation.Nullable;
12  
13  import org.apache.commons.lang3.NotImplementedException;
14  
15  final class YamlReader {
16  
17  	private final Reader reader;
18  
19  	private int currentLineNumber = 1;
20  	private int currentColumnNumber = 1;
21  
22  	private int currentTokenLineNumber = 1;
23  	private int currentTokenColumnNumber = 1;
24  
25  	public int getCurrentLineNumber() {
26  
27  		return currentLineNumber;
28  	}
29  
30  	public int getCurrentColumnNumber() {
31  
32  		return currentColumnNumber;
33  	}
34  
35  	public int getCurrentTokenLineNumber() {
36  
37  		return currentTokenLineNumber;
38  	}
39  
40  	public int getCurrentTokenColumnNumber() {
41  
42  		return currentTokenColumnNumber;
43  	}
44  
45  	public YamlReader(
46  		final Reader reader
47  	) {
48  
49  		this.reader = checkNotNull(reader, "reader");
50  	}
51  
52  	private final List<Character> nextC = newArrayList();
53  
54  	private int read() throws IOException {
55  
56  		if (!nextC.isEmpty()) {
57  
58  			final int c = nextC.get(0);
59  
60  			nextC.remove(0);
61  
62  			if (c == -1) {
63  
64  				isEof = true;
65  			}
66  
67  			return c;
68  
69  		} else {
70  
71  			final int n = reader.read();
72  
73  			if (n == -1) {
74  
75  				isEof = true;
76  			}
77  
78  			return n;
79  		}
80  	}
81  
82  	private Token nextToken = null;
83  
84  	public static final class Token {
85  
86  		public static final Token OPEN_CURLY_BRACKET = new Token("{", false);
87  		public static final Token CLOSE_CURLY_BRACKET = new Token("}", false);
88  		public static final Token OPEN_SQUARE_BRACKET = new Token("[", false);
89  		public static final Token CLOSE_SQUARE_BRACKET = new Token("]", false);
90  		public static final Token HYPHEN = new Token("-", false);
91  		public static final Token COLON = new Token(":", false);
92  		public static final Token COMMA = new Token(",", false);
93  		public static final Token LINEBREAK = new Token("\n", false);
94  
95  		public static final Token get(
96  			final char c
97  		) {
98  
99  			if (c == ':') {
100 
101 				return COLON;
102 
103 			} else if (c == ',') {
104 
105 				return COMMA;
106 
107 			} else if (c == '}') {
108 
109 				return CLOSE_CURLY_BRACKET;
110 
111 			} else if (c == ']') {
112 
113 				return CLOSE_SQUARE_BRACKET;
114 
115 			} else {
116 
117 				throw new IllegalArgumentException("c: " + c + " (" + ((int) c) + ")");
118 			}
119 		}
120 
121 		public final String s;
122 		public final boolean isQuoted;
123 
124 		private Token(
125 			final String s,
126 			final boolean isQuoted
127 		) {
128 
129 			this.s = checkNotNull(s, "s");
130 			this.isQuoted = isQuoted;
131 		}
132 
133 		@Override
134 		public String toString() {
135 
136 			if (this == OPEN_CURLY_BRACKET) {
137 
138 				return "OPEN_CURLY_BRACKET[{]";
139 
140 			} else if (this == CLOSE_CURLY_BRACKET) {
141 
142 				return "CLOSE_CURLY_BRACKET[}]";
143 
144 			} else if (this == OPEN_SQUARE_BRACKET) {
145 
146 				return "OPEN_SQUARE_BRACKET<[>";
147 
148 			} else if (this == CLOSE_SQUARE_BRACKET) {
149 
150 				return "CLOSE_SQUARE_BRACKET<]>";
151 
152 			} else if (this == COLON) {
153 
154 				return "COLON[:]";
155 
156 			} else if (this == COMMA) {
157 
158 				return "COMMA[,]";
159 
160 			} else if (this == HYPHEN) {
161 
162 				return "HYPHEN[-]";
163 
164 			} else if (this == LINEBREAK) {
165 
166 				return "LINEBREAK";
167 
168 			} else {
169 
170 				return s;
171 			}
172 		}
173 
174 		@Override
175 		public boolean equals(
176 			@Nullable final Object o
177 		) {
178 
179 			if (o == null || !(o instanceof Token)) {
180 
181 				return false;
182 			}
183 
184 			return s.equals(((Token) o).s);
185 		}
186 
187 		@Override
188 		public int hashCode() {
189 
190 			return s.hashCode();
191 		}
192 	}
193 
194 	@Nullable
195 	public Token readNextToken(
196 		final boolean breakAtComma
197 	) throws IOException {
198 
199 		if (nextToken == null) {
200 
201 			return readToken(breakAtComma);
202 
203 		} else {
204 
205 			final Token token = nextToken;
206 
207 			nextToken = null;
208 
209 			return token;
210 		}
211 	}
212 
213 	private boolean isEof = false;
214 
215 	private int currentIndent = -1;
216 
217 	public int readIndent() throws IOException {
218 
219 		if (!isEof) {
220 
221 			if (currentColumnNumber != 1) {
222 
223 				return currentIndent;
224 			}
225 
226 			// throw new IllegalStateException("currentColumnNumber: " + currentColumnNumber
227 			// + " != 1");
228 		}
229 
230 		int indent = 0;
231 
232 		loop: while (true) {
233 
234 			final int n = read();
235 
236 			if (n == -1) {
237 
238 				currentIndent = -1;
239 
240 				return currentIndent;
241 			}
242 
243 			final char c = (char) n;
244 
245 			if (c == '\t') {
246 
247 				throw new YamlReaderException( //
248 						currentLineNumber, //
249 						currentColumnNumber, //
250 						"Illegal char: " + c + " (" + ((int) c) + ")");
251 			}
252 
253 			++currentColumnNumber;
254 
255 			if (c == ' ') {
256 
257 				++indent;
258 
259 				continue;
260 
261 			} else if (c == '\n') {
262 
263 				++currentLineNumber;
264 				currentColumnNumber = 1;
265 
266 				indent = 0;
267 
268 				continue;
269 
270 			} else if (c == '\r') {
271 
272 				throw new NotImplementedException("");
273 
274 			} else if (c == '#') {
275 
276 				while (true) {
277 
278 					final int n2 = read();
279 
280 					if (n2 == -1) {
281 
282 						return -1;
283 					}
284 
285 					final char c2 = (char) n2;
286 
287 					if (c2 == '\n') {
288 
289 						++currentLineNumber;
290 						currentColumnNumber = 1;
291 
292 						indent = 0;
293 
294 						continue loop;
295 
296 					} else if (c == '\r') {
297 
298 						throw new NotImplementedException("");
299 					}
300 				}
301 
302 			} else {
303 
304 				nextC.add(c);
305 
306 				currentIndent = indent;
307 
308 				return currentIndent;
309 			}
310 		}
311 	}
312 
313 	private StringBuilder reservedToken = null;
314 
315 	private void reserveToken(
316 		final char c
317 	) {
318 
319 		if (reservedToken == null) {
320 
321 			reservedToken = new StringBuilder();
322 		}
323 
324 		reservedToken.append(c);
325 	}
326 
327 	@Nullable
328 	private Token readToken(
329 		final boolean breakAtComma
330 	) throws IOException {
331 
332 		reservedToken = null;
333 
334 		final StringBuilder sb = new StringBuilder();
335 
336 		currentTokenLineNumber = currentLineNumber;
337 		currentTokenColumnNumber = currentColumnNumber;
338 
339 		while (true) {
340 
341 			final int n = read();
342 
343 			if (n == -1) {
344 
345 				if (sb.length() == 0) {
346 
347 					return null;
348 
349 				} else {
350 
351 					return new Token(sb.toString(), false);
352 				}
353 			}
354 
355 			final char c = (char) n;
356 
357 			if (c == '\t') {
358 
359 				throw new YamlReaderException( //
360 						currentLineNumber, //
361 						currentColumnNumber, //
362 						"Illegal char: " + c + " (" + ((int) c) + ")");
363 			}
364 
365 			++currentColumnNumber;
366 
367 			if (c == '\n') {
368 
369 				++currentLineNumber;
370 				currentColumnNumber = 1;
371 
372 				if (sb.length() == 0) {
373 
374 					currentTokenColumnNumber = currentColumnNumber;
375 
376 					return Token.LINEBREAK;
377 
378 				} else {
379 
380 					nextToken = Token.LINEBREAK;
381 
382 					break;
383 				}
384 
385 			} else if (c == '\r') {
386 
387 				throw new NotImplementedException("");
388 
389 			} else if (c == '{') {
390 
391 				if (sb.length() == 0) {
392 
393 					return Token.OPEN_CURLY_BRACKET;
394 
395 				} else {
396 
397 					sb.append(c);
398 				}
399 
400 			} else if (c == '-') {
401 
402 				if (sb.length() == 0) {
403 
404 					return Token.HYPHEN;
405 				}
406 
407 				if (reservedToken != null) {
408 
409 					sb.append(reservedToken.toString());
410 
411 					reservedToken = null;
412 				}
413 
414 				sb.append(c);
415 
416 			} else if (c == '[') {
417 
418 				if (sb.length() == 0) {
419 
420 					return Token.OPEN_SQUARE_BRACKET;
421 
422 					// } else {
423 					//
424 					// throw new YamlReaderException( //
425 					// currentLineNumber, //
426 					// currentColumnNumber, //
427 					// "Illegal opening bracket \"[\" after: \"" + sb + "\"");
428 				}
429 
430 				sb.append(c);
431 
432 			} else if (c == '#' && sb.isEmpty()) {
433 
434 				while (true) {
435 
436 					final int n2 = read();
437 
438 					if (n2 == -1) {
439 
440 						return null;
441 					}
442 
443 					final char c2 = (char) n2;
444 
445 					if (c2 == '\n') {
446 
447 						++currentLineNumber;
448 						currentColumnNumber = 1;
449 
450 						if (sb.length() == 0) {
451 
452 							currentTokenColumnNumber = currentColumnNumber;
453 
454 							return Token.LINEBREAK;
455 
456 							// continue loop;
457 
458 						} else {
459 
460 							return new Token(sb.toString(), false);
461 						}
462 
463 					} else if (c2 == '\r') {
464 
465 						throw new NotImplementedException("");
466 					}
467 				}
468 
469 			} else if (c == ':') {
470 
471 				if (sb.length() == 0) {
472 
473 					return Token.get(c);
474 				}
475 
476 				final int n2 = read();
477 
478 				if (n2 == -1) {
479 					throw new NotImplementedException("n2 == -1");
480 				}
481 
482 				final char nextC = (char) n2;
483 
484 				if (nextC == ' ' || nextC == '\n' || nextC == '\r' || nextC == '{') {
485 
486 					this.nextC.add(c);
487 					this.nextC.add(nextC);
488 
489 					return new Token(sb.toString(), false);
490 
491 				} else {
492 
493 					// this.nextC = null;
494 
495 					sb.append(c).append(nextC);
496 				}
497 
498 			} else if (c == ',' && breakAtComma) {
499 
500 				if (sb.length() == 0) {
501 
502 					return Token.get(c);
503 
504 				} else {
505 
506 					nextC.add(c);
507 
508 					return new Token(sb.toString(), false);
509 
510 					// if (reservedToken != null) {
511 
512 					// sb.append(reservedToken.toString());
513 
514 					// reservedToken = null;
515 					// }
516 
517 					// sb.append(c);
518 				}
519 
520 			} else if (c == '}' || c == ']') {
521 
522 				if (sb.length() == 0) {
523 
524 					return Token.get(c);
525 
526 				} else if (previousIs(' ')) {
527 
528 					nextC.add(c);
529 
530 					return new Token(sb.toString(), false);
531 
532 				} else {
533 
534 					if (reservedToken != null) {
535 
536 						sb.append(reservedToken.toString());
537 
538 						reservedToken = null;
539 					}
540 
541 					sb.append(c);
542 				}
543 
544 			} else if (c == ' ') {
545 
546 				if (sb.length() == 0) {
547 
548 					currentTokenColumnNumber = currentColumnNumber;
549 
550 					continue;
551 
552 				} else {
553 
554 					reserveToken(' ');
555 
556 					continue;
557 				}
558 
559 			} else if (c == '\'' && sb.length() == 0) {
560 
561 				final int currentLineNumber0 = currentLineNumber;
562 				final int currentColumnNumber0 = currentColumnNumber;
563 
564 				while (true) {
565 
566 					final int n2 = read();
567 
568 					if (n2 == -1) {
569 
570 						throw new YamlReaderException( //
571 								currentLineNumber0, //
572 								currentColumnNumber0, //
573 								"Unterminated quoted string");
574 					}
575 
576 					++currentColumnNumber;
577 
578 					final char c2 = (char) n2;
579 
580 					if (c2 == '\'') {
581 
582 						boolean quoted = true;
583 
584 						final StringBuilder trailing = new StringBuilder();
585 
586 						while (true) {
587 
588 							final int n3 = read();
589 
590 							if (n3 == ' ') {
591 
592 								if (!isBlank(trailing)) {
593 
594 									if (quoted) {
595 										sb.insert(0, '\'');
596 										sb.append('\'');
597 										quoted = false;
598 									}
599 
600 									sb.append(trailing);
601 
602 									trailing.setLength(0);
603 								}
604 							}
605 
606 							if (n3 == -1 || n3 == '\n' || n3 == '\r' || n3 == '}') {
607 
608 								if (n3 != -1) {
609 
610 									nextC.add((char) n3);
611 								}
612 
613 								if (!isBlank(trailing)) {
614 
615 									if (quoted) {
616 										sb.insert(0, '\'');
617 										sb.append('\'');
618 										quoted = false;
619 									}
620 
621 									sb.append(trailing);
622 								}
623 
624 								break;
625 							}
626 
627 							trailing.append((char) n3);
628 						}
629 
630 						return new Token(sb.toString(), quoted);
631 
632 					} else if (c2 == '\\') {
633 
634 						final int n3 = read();
635 
636 						if (n3 == -1) {
637 
638 							throw new YamlReaderException( //
639 									currentLineNumber0, //
640 									currentColumnNumber0, //
641 									"Unterminated quoted string");
642 						}
643 
644 						final char c3 = (char) n3;
645 
646 						if (c3 == '\'') {
647 
648 							sb.append('\'');
649 
650 						} else if (c3 == '\\') {
651 
652 							sb.append('\\');
653 
654 						} else {
655 
656 							throw new YamlReaderException( //
657 									currentLineNumber, //
658 									currentColumnNumber, //
659 									"Illegal escape code: \"" + c2 + c3 + "\"");
660 						}
661 
662 						++currentColumnNumber;
663 
664 						continue;
665 					}
666 
667 					sb.append(c2);
668 				}
669 
670 			} else if (c == '"' && sb.length() == 0) {
671 
672 				final int currentLineNumber0 = currentLineNumber;
673 				final int currentColumnNumber0 = currentColumnNumber;
674 
675 				while (true) {
676 
677 					final int n2 = read();
678 
679 					if (n2 == -1) {
680 
681 						throw new YamlReaderException( //
682 								currentLineNumber0, //
683 								currentColumnNumber0, //
684 								"Unterminated quoted string");
685 					}
686 
687 					++currentColumnNumber;
688 
689 					final char c2 = (char) n2;
690 
691 					if (c2 == '"') {
692 
693 						return new Token(sb.toString(), true);
694 
695 					} else if (c2 == '\\') {
696 
697 						final int n3 = read();
698 
699 						if (n3 == -1) {
700 
701 							throw new YamlReaderException( //
702 									currentLineNumber0, //
703 									currentColumnNumber0, //
704 									"Unterminated quoted string");
705 						}
706 
707 						final char c3 = (char) n3;
708 
709 						if (c3 == '"') {
710 
711 							sb.append('"');
712 
713 						} else if (c3 == '\\') {
714 
715 							sb.append('\\');
716 
717 						} else {
718 
719 							throw new YamlReaderException( //
720 									currentLineNumber, //
721 									currentColumnNumber, //
722 									"Illegal escape code: \"" + c2 + c3 + "\"");
723 						}
724 
725 						++currentColumnNumber;
726 
727 						continue;
728 					}
729 
730 					sb.append(c2);
731 				}
732 
733 			} else {
734 
735 				if (reservedToken != null) {
736 
737 					sb.append(reservedToken.toString());
738 
739 					reservedToken = null;
740 				}
741 
742 				sb.append(c);
743 			}
744 		}
745 
746 		return new Token(sb.toString(), false);
747 	}
748 
749 	private boolean previousIs(
750 		final char c
751 	) {
752 
753 		if (reservedToken != null && !reservedToken.isEmpty()) {
754 
755 			final int l = reservedToken.length();
756 
757 			return reservedToken.substring(l - 1, l).charAt(0) == c;
758 		}
759 
760 		return nextC != null && !nextC.isEmpty() && nextC.get(nextC.size() - 1) == c;
761 	}
762 
763 	@Nullable
764 	public Token peekNextToken(
765 		final boolean inArray
766 	) throws IOException {
767 
768 		if (nextToken != null) {
769 
770 			return nextToken;
771 
772 		} else {
773 
774 			nextToken = readToken(inArray);
775 
776 			return nextToken;
777 		}
778 	}
779 }