Commit 70ab400c authored by Johannes Roith's avatar Johannes Roith
Browse files

Initial import of the old Java 1.5 scanner.

parent 71b8de1e
package de.in.tum.www2.java;
import java.io.Reader;
import java.io.IOException;
import java.io.InputStream;
import de.in.tum.www2.cup.IErrorReporter;
import de.in.tum.www2.cup.ErrorManager;
import java_cup.runtime.ComplexSymbolFactory;
import java_cup.runtime.ComplexSymbolFactory.ComplexSymbol;
import de.in.tum.www2.java.internal.onefivelexer.Lexer;
public class JavaScanner
{
// TODO: the java scanner should be able to check
// against a declaration class, similar to the cup one,
// that marks the cup variables (after colon) and the derived symbols
// such as blahxleft, blahxright in a special color.
// -> however, this is context-sensitive and should only happen in the
// local correct scope.
//
// Also, in an action block, the RESULT variable should be colored the same.
// likewise, all fields declared in the special global "action code" block should
// be marked (differently). REQUIRES JAVA PARSER!
//
// All other variables must necessarily be local variables and can be colored
// appropriately (in the local block). REQUIRES JAVA PARSER!
//
// The syntax highlighter does not need to care if functions exist and can
// just make them italic, like eclipse.
//
// Finally, there are types. These are not colored in eclipse anyway.
/*
private Lexer lexer;
public JavaScanner(IErrorReporter er, Reader r) {
ComplexSymbolFactory factory = new ComplexSymbolFactory();
ErrorManager errMan = new ErrorManager(er);
this.lexer = new Lexer(errMan, factory, r);
}
public JavaSymbol next_token() throws IOException {
ComplexSymbol symbol = (ComplexSymbol) lexer.next_token();
return new JavaSymbol(symbol);
}
*/
}
package de.in.tum.www2.java.internal.onefivelexer;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
class BooleanLiteral extends Literal {
Boolean val;
BooleanLiteral(boolean b) { this.val = new Boolean(b); }
Symbol token() { return new Symbol(Sym.BOOLEAN_LITERAL, val); }
public String toString() { return "BooleanLiteral <"+val.toString()+">"; }
}
package de.in.tum.www2.java.internal.onefivelexer;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
class CharacterLiteral extends Literal {
Character val;
CharacterLiteral(char c) { this.val = new Character(c); }
Symbol token() { return new Symbol(Sym.CHARACTER_LITERAL, val); }
public String toString() {
return "CharacterLiteral <"+Token.escape(val.toString())+">";
}
}
package de.in.tum.www2.java.internal.onefivelexer;
abstract class Comment extends InputElement {
private StringBuffer comment = new StringBuffer();
String getComment() { return comment.toString(); }
void appendLine(String more) { // 'more' is '\n' terminated.
int i=0;
// skip leading white space.
for (; i<more.length(); i++)
if (!Character.isSpaceChar(more.charAt(i)))
break;
// skip any leading stars.
for (; i<more.length(); i++)
if (more.charAt(i)!='*')
break;
// the rest of the string belongs to the comment.
if (i<more.length())
comment.append(more.substring(i));
}
}
package de.in.tum.www2.java.internal.onefivelexer;
class DocumentationComment extends Comment {
DocumentationComment() { }
}
package de.in.tum.www2.java.internal.onefivelexer;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
class DoubleLiteral extends NumericLiteral {
DoubleLiteral(double d) { this.val = new Double(d); }
Symbol token() { return new Symbol(Sym.FLOATING_POINT_LITERAL, val); }
}
package de.in.tum.www2.java.internal.onefivelexer;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
class EOF extends Token {
EOF() {}
Symbol token() { return new Symbol(Sym.EOF); }
public String toString() { return "EOF"; }
}
package de.in.tum.www2.java.internal.onefivelexer;
class EndOfLineComment extends Comment {
EndOfLineComment(String comment) { appendLine(comment); }
}
package de.in.tum.www2.java.internal.onefivelexer;
import java.io.Reader;
import java.io.FilterReader;
import java.io.IOException;
public class EscapedUnicodeReader extends FilterReader {
int pushback=-1;
boolean isEvenSlash = true;
public EscapedUnicodeReader(Reader in) {
super(in);
}
public int read() throws IOException {
int r = (pushback==-1)?in.read():pushback; pushback=-1;
if (r!='\\') {
isEvenSlash=true;
return r;
} else { // found a backslash;
if (!isEvenSlash) { // Only even slashes are eligible unicode escapes.
isEvenSlash=true;
return r;
}
// Check for the trailing u.
pushback=in.read();
if (pushback!='u') {
isEvenSlash=false;
return '\\';
}
// OK, we've found backslash-u.
// Reset pushback and snarf up all trailing u's.
pushback=-1;
while((r=in.read())=='u')
;
// Now we should find 4 hex digits.
// If we don't, we can raise bloody hell.
int val=0;
for (int i=0; i<4; i++, r=in.read()) {
int d=Character.digit((char)r, 16);
if (r<0 || d<0)
throw new Error("Invalid unicode escape character.");
val = (val*16) + d;
}
// yeah, we made it.
pushback = r;
isEvenSlash=true;
return val;
}
}
// synthesize array read from single-character read.
public int read(char cbuf[], int off, int len) throws IOException {
for (int i=0; i<len; i++) {
int c = read();
if (c==-1) return (i==0)?-1:i; // end of stream reached.
else cbuf[i+off] = (char) c;
}
return len;
}
public boolean markSupported() { return false; }
public boolean ready() throws IOException {
if (pushback!=-1) return true;
else return in.ready();
}
}
package de.in.tum.www2.java.internal.onefivelexer;
/** FIFO class. This helps implement the lookahead we need for JSR-14.
* Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
* This program is released under the terms of the GPL; see the file
* COPYING for more details. There is NO WARRANTY on this code.
*/
class FIFO {
java_cup.runtime.Symbol[] backing = new java_cup.runtime.Symbol[10];
int start=0, end=0;
final Getter getter;
FIFO(Getter getter) { this.getter = getter; }
public boolean isEmpty() { return start==end; }
private boolean isFull() {
return start==end+1 || (start==0 && end==backing.length-1);
}
private int size() {
return ((end<start)?end+backing.length:end)-start;
}
public void put(java_cup.runtime.Symbol o) {
if (isFull()) {
java_cup.runtime.Symbol[] nbacking =
new java_cup.runtime.Symbol[backing.length*2];
System.arraycopy(backing, start, nbacking, 0, backing.length-start);
System.arraycopy(backing, 0, nbacking, backing.length-start, start);
start = 0;
end = backing.length-1;
backing = nbacking;
}
ASSERT(!isFull());
backing[end++] = o;
if (end == backing.length)
end = 0;
ASSERT(!isEmpty());
}
public java_cup.runtime.Symbol get() throws java.io.IOException {
if (isEmpty())
put(getter.next());
ASSERT(!isEmpty());
java_cup.runtime.Symbol o = backing[start++];
if (start == backing.length)
start = 0;
ASSERT(!isFull());
return o;
}
public java_cup.runtime.Symbol peek(int i) throws java.io.IOException {
while (i >= size())
put(getter.next());
int index = start+i;
if (index >= backing.length) index -= backing.length;
ASSERT(0<= index && index < backing.length);
return backing[index];
}
abstract static class Getter {
abstract java_cup.runtime.Symbol next()
throws java.io.IOException;
}
private static void ASSERT(boolean b) {
if (!b) throw new RuntimeException();
}
}
package de.in.tum.www2.java.internal.onefivelexer;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
class FloatLiteral extends NumericLiteral {
FloatLiteral(float f) { this.val = new Float(f); }
Symbol token() { return new Symbol(Sym.FLOATING_POINT_LITERAL, val); }
}
package de.in.tum.www2.java.internal.onefivelexer;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
public class Identifier extends Token {
String identifier;
public Identifier(String identifier) { this.identifier=identifier; }
public String toString() { return "Identifier <"+identifier+">"; }
/* Ben Walter <bwalter@mit.edu> correctly pointed out that
* the first released version of this grammar/lexer did not
* return the string value of the identifier in the parser token.
* Should be fixed now. ;-) <cananian@alumni.princeton.edu>
*/
Symbol token() { return new Symbol(Sym.IDENTIFIER, identifier); }
}
package de.in.tum.www2.java.internal.onefivelexer;
abstract class InputElement {}
package de.in.tum.www2.java.internal.onefivelexer;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
class IntegerLiteral extends NumericLiteral {
IntegerLiteral(int i) { this.val = new Integer(i); }
Symbol token() { return new Symbol(Sym.INTEGER_LITERAL, val); }
}
package de.in.tum.www2.java.internal.onefivelexer;
import java.util.Hashtable;
import java_cup.runtime.Symbol;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
class Keyword extends Token {
String keyword;
Keyword(String s) { keyword = s; }
Symbol token() {
Integer i = (Integer) key_table.get(keyword);
return new Symbol(i.intValue());
}
public String toString() { return "Keyword <"+keyword+">"; }
static private final Hashtable key_table = new Hashtable();
static {
key_table.put("abstract", new Integer(Sym.ABSTRACT));
key_table.put("assert", new Integer(Sym.ASSERT));
key_table.put("boolean", new Integer(Sym.BOOLEAN));
key_table.put("break", new Integer(Sym.BREAK));
key_table.put("byte", new Integer(Sym.BYTE));
key_table.put("case", new Integer(Sym.CASE));
key_table.put("catch", new Integer(Sym.CATCH));
key_table.put("char", new Integer(Sym.CHAR));
key_table.put("class", new Integer(Sym.CLASS));
key_table.put("const", new Integer(Sym.CONST));
key_table.put("continue", new Integer(Sym.CONTINUE));
key_table.put("default", new Integer(Sym.DEFAULT));
key_table.put("do", new Integer(Sym.DO));
key_table.put("double", new Integer(Sym.DOUBLE));
key_table.put("else", new Integer(Sym.ELSE));
key_table.put("enum", new Integer(Sym.ENUM));
key_table.put("extends", new Integer(Sym.EXTENDS));
key_table.put("final", new Integer(Sym.FINAL));
key_table.put("finally", new Integer(Sym.FINALLY));
key_table.put("float", new Integer(Sym.FLOAT));
key_table.put("for", new Integer(Sym.FOR));
key_table.put("goto", new Integer(Sym.GOTO));
key_table.put("if", new Integer(Sym.IF));
key_table.put("implements", new Integer(Sym.IMPLEMENTS));
key_table.put("import", new Integer(Sym.IMPORT));
key_table.put("instanceof", new Integer(Sym.INSTANCEOF));
key_table.put("int", new Integer(Sym.INT));
key_table.put("interface", new Integer(Sym.INTERFACE));
key_table.put("long", new Integer(Sym.LONG));
key_table.put("native", new Integer(Sym.NATIVE));
key_table.put("new", new Integer(Sym.NEW));
key_table.put("package", new Integer(Sym.PACKAGE));
key_table.put("private", new Integer(Sym.PRIVATE));
key_table.put("protected", new Integer(Sym.PROTECTED));
key_table.put("public", new Integer(Sym.PUBLIC));
key_table.put("return", new Integer(Sym.RETURN));
key_table.put("short", new Integer(Sym.SHORT));
key_table.put("static", new Integer(Sym.STATIC));
key_table.put("strictfp", new Integer(Sym.STRICTFP));
key_table.put("super", new Integer(Sym.SUPER));
key_table.put("switch", new Integer(Sym.SWITCH));
key_table.put("synchronized", new Integer(Sym.SYNCHRONIZED));
key_table.put("this", new Integer(Sym.THIS));
key_table.put("throw", new Integer(Sym.THROW));
key_table.put("throws", new Integer(Sym.THROWS));
key_table.put("transient", new Integer(Sym.TRANSIENT));
key_table.put("try", new Integer(Sym.TRY));
key_table.put("void", new Integer(Sym.VOID));
key_table.put("volatile", new Integer(Sym.VOLATILE));
key_table.put("while", new Integer(Sym.WHILE));
}
}
package de.in.tum.www2.java.internal.onefivelexer;
import java.io.InputStream;
import java.io.Reader;
import java.io.LineNumberReader;
import java_cup.runtime.ComplexSymbolFactory;
import de.in.tum.www2.cup.ErrorManager;
import de.in.tum.www2.java.internal.onefivelexer.Sym;
/* Java lexer.
* Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
* This program is released under the terms of the GPL; see the file
* COPYING for more details. There is NO WARRANTY on this code.
*/
public class Lexer implements java_cup.runtime.Scanner {
LineNumberReader reader;
boolean isJava12;
boolean isJava14;
boolean isJava15;
String line = null;
int line_pos = 1;
int line_num = 0;
LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
public Lexer(Reader reader) {
this(reader, 2); // by default, use a Java 1.2-compatible lexer.
}
public Lexer(Reader reader, int java_minor_version) {
this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
this.isJava12 = java_minor_version >= 2;
this.isJava14 = java_minor_version >= 4;
this.isJava15 = java_minor_version >= 5;
}
public Lexer(ErrorManager errMan, ComplexSymbolFactory factory, InputStream is) {
// TODO!
}
public java_cup.runtime.Symbol next_token() throws java.io.IOException {
java_cup.runtime.Symbol sym =
lookahead==null ? _nextToken() : lookahead.get();
/* Old "smart lexer" hack to parse JSR-14 syntax. New, better, grammar
* makes this unnecessary. (Credit to Eric Blake for its discovery.)
*
if (isJava15 && sym.sym==Sym.LT && shouldBePLT())
sym.sym=Sym.PLT;
*/
last = sym;
return sym;
}
private boolean shouldBePLT() throws java.io.IOException {
// look ahead to see if this LT should be changed to a PLT
if (last==null || last.sym!=Sym.IDENTIFIER)
return false;
if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
java_cup.runtime.Symbol next() throws java.io.IOException
{ return _nextToken(); }
});
int i=0;
// skip past IDENTIFIER (DOT IDENTIFIER)*
if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
return false;
while (lookahead.peek(i).sym == Sym.DOT) {
i++;
if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
return false;
}
// skip past (LBRACK RBRACK)*
while (lookahead.peek(i).sym == Sym.LBRACK) {
i++;
if (lookahead.peek(i++).sym != Sym.RBRACK)
return false;
}
// now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
switch(lookahead.peek(i).sym) {
default:
return false;
case Sym.LT:
case Sym.GT:
case Sym.COMMA:
case Sym.EXTENDS:
case Sym.IMPLEMENTS:
return true;
}
}
private java_cup.runtime.Symbol last = null;
private FIFO lookahead = null;
public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
/* tokens are:
* Identifiers/Keywords/true/false/null (start with java letter)
* numeric literal (start with number)
* character literal (start with single quote)
* string (start with double quote)
* separator (parens, braces, brackets, semicolon, comma, period)
* operator (equals, plus, minus, etc)
* whitespace
* comment (start with slash)
*/
InputElement ie;
int startpos, endpos;
do {
startpos = lineL.head + line_pos;
ie = getInputElement();
if (ie instanceof DocumentationComment)
comment = ((Comment)ie).getComment();
} while (!(ie instanceof Token));
endpos = lineL.head + line_pos - 1;
//System.out.println(ie.toString()); // uncomment to debug lexer.
java_cup.runtime.Symbol sym = ((Token)ie).token();
// fix up left/right positions.
sym.left = startpos; sym.right = endpos;
// return token.
return sym;
}
public boolean debug_lex() throws java.io.IOException {
InputElement ie = getInputElement();
System.out.println(ie);
return !(ie instanceof EOF);
}
String comment;
public String lastComment() { return comment; }
public void clearComment() { comment=""; }
InputElement getInputElement() throws java.io.IOException {
if (line_num == 0)
nextLine();
if (line==null)
return new EOF();
if (line.length()<=line_pos) { // end of line.
nextLine();
if (line==null)
return new EOF();
}
switch (line.charAt(line_pos)) {
// White space:
case ' ': // ASCII SP
case '\t': // ASCII HT
case '\f': // ASCII FF
case '\n': // LineTerminator
return new WhiteSpace(consume());
// EOF character:
case '\020': // ASCII SUB
consume();
return new EOF();
// Comment prefix:
case '/':
return getComment();
// else, a Token
default:
return getToken();
}
}
// May get Token instead of Comment.
InputElement getComment() throws java.io.IOException {
String comment;
// line.charAt(line_pos+0) is '/'