JsDocTokenStream.java
/*
* Copyright 2009 The Closure Compiler Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.javascript.jscomp.parsing;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.javascript.rhino.TokenUtil;
/**
* This class implements the scanner for JsDoc strings.
*
* It is heavily based on Rhino's TokenStream.
*
*/
class JsDocTokenStream {
/*
* For chars - because we need something out-of-range
* to check. (And checking EOF by exception is annoying.)
* Note distinction from EOF token type!
*/
private static final int EOF_CHAR = -1;
JsDocTokenStream(String sourceString) {
this(sourceString, 0);
}
JsDocTokenStream(String sourceString, int lineno) {
this(sourceString, lineno, 0);
}
JsDocTokenStream(String sourceString, int lineno, int initCharno) {
checkNotNull(sourceString);
this.lineno = lineno;
this.sourceString = sourceString;
this.sourceEnd = sourceString.length();
this.sourceCursor = this.cursor = 0;
this.initLineno = lineno;
this.initCharno = initCharno;
}
/**
* Tokenizes JSDoc comments.
*/
@SuppressWarnings("fallthrough")
final JsDocToken getJsDocToken() {
int c;
stringBufferTop = 0;
for (;;) {
// eat white spaces
for (;;) {
charno = -1;
c = getChar();
if (c == EOF_CHAR) {
return JsDocToken.EOF;
} else if (c == '\n') {
return JsDocToken.EOL;
} else if (!TokenUtil.isJSSpace(c)) {
break;
}
}
switch (c) {
// annotation, e.g. @type or @constructor
case '@':
do {
c = getChar();
if (isAlpha(c)) {
addToString(c);
} else {
ungetChar(c);
this.string = getStringFromBuffer();
stringBufferTop = 0;
return JsDocToken.ANNOTATION;
}
} while (true);
case '*':
if (matchChar('/')) {
return JsDocToken.EOC;
} else {
return JsDocToken.STAR;
}
case ',':
return JsDocToken.COMMA;
case '>':
return JsDocToken.RIGHT_ANGLE;
case '(':
return JsDocToken.LEFT_PAREN;
case ')':
return JsDocToken.RIGHT_PAREN;
case '{':
return JsDocToken.LEFT_CURLY;
case '}':
return JsDocToken.RIGHT_CURLY;
case '[':
return JsDocToken.LEFT_SQUARE;
case ']':
return JsDocToken.RIGHT_SQUARE;
case '?':
return JsDocToken.QMARK;
case '!':
return JsDocToken.BANG;
case ':':
return JsDocToken.COLON;
case '=':
return JsDocToken.EQUALS;
case '|':
return JsDocToken.PIPE;
case '<':
return JsDocToken.LEFT_ANGLE;
case '.':
c = getChar();
if (c == '<') {
return JsDocToken.LEFT_ANGLE;
} else {
if (c == '.') {
c = getChar();
if (c == '.') {
return JsDocToken.ELLIPSIS;
} else {
addToString('.');
}
}
// we may backtrack across line boundary
ungetBuffer[ungetCursor++] = c;
c = '.';
}
// fall through
default: {
// recognize a JsDoc string but discard last . if it is followed by
// a non-JsDoc comment char, e.g. Array.<
int c1 = c;
addToString(c);
int c2 = getChar();
if (!isJSDocString(c2)) {
ungetChar(c2);
this.string = getStringFromBuffer();
stringBufferTop = 0;
return JsDocToken.STRING;
} else {
do {
c1 = c2;
c2 = getChar();
if (c1 == '.' && c2 == '<') {
ungetChar(c2);
ungetChar(c1);
this.string = getStringFromBuffer();
stringBufferTop = 0;
return JsDocToken.STRING;
} else {
if (isJSDocString(c2)) {
addToString(c1);
} else {
ungetChar(c2);
addToString(c1);
this.string = getStringFromBuffer();
stringBufferTop = 0;
return JsDocToken.STRING;
}
}
} while (true);
}
}
}
}
}
/**
* Gets the remaining JSDoc line without the {@link JsDocToken#EOL},
* {@link JsDocToken#EOF} or {@link JsDocToken#EOC}.
*/
@SuppressWarnings("fallthrough")
String getRemainingJSDocLine() {
int c;
for (;;) {
c = getChar();
switch (c) {
case '*':
if (peekChar() != '/') {
addToString(c);
break;
}
// fall through
case EOF_CHAR:
case '\n':
ungetChar(c);
this.string = getStringFromBuffer();
stringBufferTop = 0;
return this.string;
default:
addToString(c);
break;
}
}
}
final int getLineno() { return lineno; }
final int getCharno() {
return lineno == initLineno ? initCharno + charno : charno;
}
final String getString() { return string; }
private String getStringFromBuffer() {
String s = new String(stringBuffer, 0, stringBufferTop);
return s.intern();
}
private void addToString(int c) {
int n = stringBufferTop;
if (n == stringBuffer.length) {
char[] tmp = new char[stringBuffer.length * 2];
System.arraycopy(stringBuffer, 0, tmp, 0, n);
stringBuffer = tmp;
}
stringBuffer[n] = (char) c;
stringBufferTop = n + 1;
}
void ungetChar(int c) {
// can not unread past across line boundary
assert(!(ungetCursor != 0 && ungetBuffer[ungetCursor - 1] == '\n'));
ungetBuffer[ungetCursor++] = c;
cursor--;
}
private boolean matchChar(int test) {
int c = getCharIgnoreLineEnd();
if (c == test) {
return true;
} else {
ungetCharIgnoreLineEnd(c);
return false;
}
}
private static boolean isAlpha(int c) {
// Use 'Z' < 'a'
if (c <= 'Z') {
return 'A' <= c;
} else {
return 'a' <= c && c <= 'z';
}
}
private static boolean isJSDocString(int c) {
switch (c) {
case '@':
case '*':
case ',':
case '<':
case '>':
case ':':
case '(':
case ')':
case '{':
case '}':
case '[':
case ']':
case '?':
case '!':
case '|':
case '=':
case EOF_CHAR:
case '\n':
return false;
default:
return !TokenUtil.isJSSpace(c);
}
}
/**
* Allows the JSDocParser to update the character offset
* so that getCharno() returns a valid character position.
*/
void update() {
charno = getOffset();
}
private int peekChar() {
int c = getChar();
ungetChar(c);
return c;
}
protected int getChar() {
if (ungetCursor != 0) {
cursor++;
--ungetCursor;
if (charno == -1) {
charno = getOffset();
}
return ungetBuffer[ungetCursor];
}
for (;;) {
int c;
if (sourceCursor == sourceEnd) {
if (charno == -1) {
charno = getOffset();
}
return EOF_CHAR;
}
cursor++;
c = sourceString.charAt(sourceCursor++);
if (lineEndChar >= 0) {
if (lineEndChar == '\r' && c == '\n') {
lineEndChar = '\n';
continue;
}
lineEndChar = -1;
lineStart = sourceCursor - 1;
lineno++;
}
if (c <= 127) {
if (c == '\n' || c == '\r') {
lineEndChar = c;
c = '\n';
}
} else {
if (TokenUtil.isJSFormatChar(c)) {
continue;
}
if (isJSLineTerminator(c)) {
lineEndChar = c;
c = '\n';
}
}
if (charno == -1) {
charno = getOffset();
}
return c;
}
}
private int getCharIgnoreLineEnd() {
if (ungetCursor != 0) {
cursor++;
--ungetCursor;
if (charno == -1) {
charno = getOffset();
}
return ungetBuffer[ungetCursor];
}
for (;;) {
int c;
if (sourceCursor == sourceEnd) {
if (charno == -1) {
charno = getOffset();
}
return EOF_CHAR;
}
cursor++;
c = sourceString.charAt(sourceCursor++);
if (c <= 127) {
if (c == '\n' || c == '\r') {
lineEndChar = c;
c = '\n';
}
} else {
if (TokenUtil.isJSFormatChar(c)) {
continue;
}
if (isJSLineTerminator(c)) {
lineEndChar = c;
c = '\n';
}
}
if (charno == -1) {
charno = getOffset();
}
return c;
}
}
private static boolean isJSLineTerminator(int c) {
// Optimization for faster check for eol character:
// they do not have 0xDFD0 bits set
if ((c & 0xDFD0) != 0) {
return false;
}
return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
}
private void ungetCharIgnoreLineEnd(int c) {
ungetBuffer[ungetCursor++] = c;
cursor--;
}
/**
* Returns the offset into the current line.
*/
final int getOffset() {
return sourceCursor - lineStart - ungetCursor - 1;
}
// Set this to an initial non-null value so that the Parser has
// something to retrieve even if an error has occurred and no
// string is found. Fosters one class of error, but saves lots of
// code.
private String string = "";
private char[] stringBuffer = new char[128];
private int stringBufferTop;
// Room to backtrace from to < on failed match of the last - in <!--
private final int[] ungetBuffer = new int[3];
private int ungetCursor;
private int lineStart = 0;
private int lineEndChar = -1;
int lineno;
private int charno = -1;
private final int initCharno;
private final int initLineno;
private final String sourceString;
private final int sourceEnd;
// sourceCursor is an index into a small buffer that keeps a
// sliding window of the source stream.
int sourceCursor;
// cursor is a monotonically increasing index into the original
// source stream, tracking exactly how far scanning has progressed.
// Its value is the index of the next character to be scanned.
int cursor;
}