|
/*================================================================*/
|
/*
|
JavaCup Specification for the JavaCup Specification Language
|
by Scott Hudson, GVU Center, Georgia Tech, August 1995
|
|
This JavaCup specification is used to implement JavaCup itself.
|
It specifies the parser for the JavaCup specification language.
|
(It also serves as a reasonable example of what a typical JavaCup
|
spec looks like).
|
|
The specification has the following parts:
|
Package and import declarations
|
These serve the same purpose as in a normal Java source file
|
(and will appear in the generated code for the parser). In this
|
case we are part of the java_cup package and we import both the
|
java_cup runtime system and Hashtable from the standard Java
|
utilities package.
|
|
Action code
|
This section provides code that is included with the class encapsulating
|
the various pieces of user code embedded in the grammar (i.e., the
|
semantic actions). This provides a series of helper routines and
|
data structures that the semantic actions use.
|
|
Parser code
|
This section provides code included in the parser class itself. In
|
this case we override the default error reporting routines.
|
|
Init with and scan with
|
These sections provide small bits of code that initialize, then
|
indicate how to invoke the scanner.
|
|
Symbols and grammar
|
These sections declare all the terminal and non terminal symbols
|
and the types of objects that they will be represented by at runtime,
|
then indicate the start symbol of the grammar (), and finally provide
|
the grammar itself (with embedded actions).
|
|
Operation of the parser
|
The parser acts primarily by accumulating data structures representing
|
various parts of the specification. Various small parts (e.g., single
|
code strings) are stored as static variables of the emit class and
|
in a few cases as variables declared in the action code section.
|
Terminals, non terminals, and productions, are maintained as collection
|
accessible via static methods of those classes. In addition, two
|
symbol tables are kept:
|
symbols maintains the name to object mapping for all symbols
|
non_terms maintains a separate mapping containing only the non terms
|
|
Several intermediate working structures are also declared in the action
|
code section. These include: rhs_parts, rhs_pos, and lhs_nt which
|
build up parts of the current production while it is being parsed.
|
|
Author(s)
|
Scott Hudson, GVU Center, Georgia Tech.
|
|
Revisions
|
v0.9a First released version [SEH] 8/29/95
|
v0.9b Updated for beta language (throws clauses) [SEH] 11/25/95
|
*/
|
/*================================================================*/
|
|
package java_cup;
|
import java_cup.runtime.*;
|
import java.util.Hashtable;
|
|
/*----------------------------------------------------------------*/
|
|
action code {:
|
/** helper routine to clone a new production part adding a given label */
|
protected production_part add_lab(production_part part, String lab)
|
throws internal_error
|
{
|
/* if there is no label, or this is an action, just return the original */
|
if (lab == null || part.is_action()) return part;
|
|
/* otherwise build a new one with the given label attached */
|
return new symbol_part(((symbol_part)part).the_symbol(),lab);
|
}
|
|
/** max size of right hand side we will support */
|
protected final int MAX_RHS = 200;
|
|
/** array for accumulating right hand side parts */
|
protected production_part[] rhs_parts = new production_part[MAX_RHS];
|
|
/** where we are currently in building a right hand side */
|
protected int rhs_pos = 0;
|
|
/** start a new right hand side */
|
protected void new_rhs() {rhs_pos = 0; }
|
|
/** add a new right hand side part */
|
protected void add_rhs_part(production_part part) throws java.lang.Exception
|
{
|
if (rhs_pos >= MAX_RHS)
|
throw new Exception("Internal Error: Productions limited to " +
|
MAX_RHS + " symbols and actions");
|
|
rhs_parts[rhs_pos] = part;
|
rhs_pos++;
|
}
|
|
/** string to build up multiple part names */
|
protected String multipart_name = new String();
|
|
/** append a new name segment to the accumulated multipart name */
|
protected void append_multipart(String name)
|
{
|
String dot = "";
|
|
/* if we aren't just starting out, put on a dot */
|
if (multipart_name.length() != 0) dot = ".";
|
|
multipart_name = multipart_name.concat(dot + name);
|
}
|
|
/** table of declared symbols -- contains production parts indexed by name */
|
protected Hashtable symbols = new Hashtable();
|
|
/** table of just non terminals -- contains non_terminals indexed by name */
|
protected Hashtable non_terms = new Hashtable();
|
|
/** declared start non_terminal */
|
protected non_terminal start_nt = null;
|
|
/** left hand side non terminal of the current production */
|
protected non_terminal lhs_nt;
|
|
:};
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
parser code {:
|
|
/* override error routines */
|
|
public void report_fatal_error(
|
String message,
|
Object info)
|
{
|
done_parsing();
|
lexer.emit_error(message);
|
System.err.println("Can't recover from previous error(s), giving up.");
|
System.exit(1);
|
}
|
|
public void report_error(String message, Object info)
|
{
|
lexer.emit_error(message);
|
}
|
:};
|
|
/*----------------------------------------------------------------*/
|
|
init with {: lexer.init(); :};
|
scan with {: return lexer.next_token(); :};
|
|
/*----------------------------------------------------------------*/
|
|
terminal java_cup.runtime.token
|
PACKAGE, IMPORT, CODE, ACTION, PARSER, TERMINAL, NON, INIT, SCAN, WITH,
|
START, SEMI, COMMA, STAR, DOT, COLON, COLON_COLON_EQUALS, BAR,
|
DEBUG;
|
|
terminal java_cup.runtime.str_token ID, CODE_STRING;
|
|
non terminal java_cup.runtime.symbol
|
spec, package_spec, import_list, code_part, action_code_part,
|
parser_code_part, symbol_list, start_spec, production_list,
|
multipart_id, import_spec, import_id, init_code, scan_code, symbol,
|
debug_grammar,
|
type_id, term_name_list, non_term_name_list, production, prod_part_list,
|
prod_part, new_term_id, new_non_term_id, rhs_list, rhs, empty;
|
|
non terminal java_cup.runtime.str_token nt_id, symbol_id, label_id, opt_label;
|
|
/*----------------------------------------------------------------*/
|
|
start with spec;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
spec ::=
|
{:
|
/* declare "error" as a terminal */
|
symbols.put("error", new symbol_part(terminal.error));
|
|
/* declare start non terminal */
|
non_terms.put("$START", non_terminal.START_nt);
|
:}
|
package_spec
|
import_list
|
code_part
|
debug_grammar
|
init_code
|
scan_code
|
symbol_list
|
start_spec
|
production_list
|
|
|
/* error recovery assuming something went wrong before symbols
|
and we have TERMINAL or NON TERMINAL to sync on. if we get
|
an error after that, we recover inside symbol_list or
|
production_list
|
*/
|
error
|
symbol_list
|
start_spec
|
production_list
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
package_spec ::=
|
PACKAGE
|
multipart_id
|
{:
|
/* save the package name */
|
emit.package_name = multipart_name;
|
|
/* reset the accumulated multipart name */
|
multipart_name = new String();
|
:}
|
SEMI
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
import_list ::=
|
import_list
|
import_spec
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
import_spec ::=
|
IMPORT
|
import_id
|
{:
|
/* save this import on the imports list */
|
emit.import_list.push(multipart_name);
|
|
/* reset the accumulated multipart name */
|
multipart_name = new String();
|
:}
|
SEMI
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
code_part ::= action_code_part parser_code_part ;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
action_code_part ::=
|
ACTION CODE CODE_STRING:user_code SEMI
|
{:
|
/* save the user included code string */
|
emit.action_code = user_code.str_val;
|
:}
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
parser_code_part ::=
|
PARSER CODE CODE_STRING:user_code SEMI
|
{:
|
/* save the user included code string */
|
emit.parser_code = user_code.str_val;
|
:}
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
init_code ::=
|
INIT WITH CODE_STRING:user_code SEMI
|
{:
|
/* save the user code */
|
emit.init_code = user_code.str_val;
|
:}
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
scan_code ::=
|
SCAN WITH CODE_STRING:user_code SEMI
|
{:
|
/* save the user code */
|
emit.scan_code = user_code.str_val;
|
:}
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
debug_grammar ::=
|
DEBUG WITH multipart_id SEMI
|
{:
|
/* save the procedure name */
|
emit.debug_grammar = multipart_name;
|
/* reset the accumulated multipart name */
|
multipart_name = new String();
|
:}
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
symbol_list ::= symbol_list symbol | symbol;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
symbol ::=
|
TERMINAL
|
type_id
|
term_name_list
|
{:
|
/* reset the accumulated multipart name */
|
multipart_name = new String();
|
:}
|
SEMI
|
|
|
NON
|
TERMINAL
|
type_id
|
non_term_name_list
|
{:
|
/* reset the accumulated multipart name */
|
multipart_name = new String();
|
:}
|
SEMI
|
|
|
|
/* error recovery productions -- sync on semicolon */
|
|
TERMINAL
|
error
|
{:
|
/* reset the accumulated multipart name */
|
multipart_name = new String();
|
:}
|
SEMI
|
|
|
NON
|
TERMINAL
|
error
|
{:
|
/* reset the accumulated multipart name */
|
multipart_name = new String();
|
:}
|
SEMI
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
term_name_list ::= term_name_list COMMA new_term_id | new_term_id;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
non_term_name_list ::=
|
non_term_name_list
|
COMMA
|
new_non_term_id
|
|
|
new_non_term_id
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
start_spec ::=
|
START WITH nt_id:start_name
|
{:
|
/* verify that the name has been declared as a non terminal */
|
non_terminal nt = (non_terminal)non_terms.get(start_name.str_val);
|
if (nt == null)
|
{
|
lexer.emit_error( "Start non terminal \"" + start_name.str_val +
|
"\" has not been declared");
|
}
|
else
|
{
|
/* remember the non-terminal for later */
|
start_nt = nt;
|
|
/* build a special start production */
|
new_rhs();
|
add_rhs_part(new symbol_part(start_nt));
|
add_rhs_part(new symbol_part(terminal.EOF));
|
emit.start_production =
|
new production(non_terminal.START_nt, rhs_parts, rhs_pos);
|
new_rhs();
|
}
|
:}
|
SEMI
|
|
|
empty
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
production_list ::= production_list production | production;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
production ::=
|
nt_id:lhs_id
|
{:
|
/* lookup the lhs nt */
|
lhs_nt = (non_terminal)non_terms.get(lhs_id.str_val);
|
|
/* if it wasn't declared, emit a message */
|
if (lhs_nt == null)
|
{
|
if (lexer.error_count == 0)
|
lexer.emit_error("LHS non terminal \"" + lhs_id.str_val +
|
"\" has not been declared");
|
}
|
|
/* reset the rhs accumulation */
|
new_rhs();
|
:}
|
COLON_COLON_EQUALS
|
rhs_list
|
SEMI
|
|
|
error
|
{: lexer.emit_error("Syntax Error"); :}
|
SEMI
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
rhs_list ::= rhs_list BAR rhs | rhs;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
rhs ::=
|
prod_part_list
|
{:
|
if (lhs_nt != null)
|
{
|
/* build the production */
|
production p = new production(lhs_nt, rhs_parts, rhs_pos);
|
|
/* if we have no start non-terminal declared and this is
|
the first production, make its lhs nt the start_nt
|
and build a special start production for it. */
|
if (start_nt == null)
|
{
|
start_nt = lhs_nt;
|
|
/* build a special start production */
|
new_rhs();
|
add_rhs_part(new symbol_part(start_nt));
|
add_rhs_part(new symbol_part(terminal.EOF));
|
emit.start_production =
|
new production(non_terminal.START_nt, rhs_parts, rhs_pos);
|
new_rhs();
|
}
|
}
|
|
/* reset the rhs accumulation in any case */
|
new_rhs();
|
:}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
prod_part_list ::= prod_part_list prod_part | empty;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
prod_part ::=
|
symbol_id:symid opt_label:labid
|
{:
|
/* try to look up the id */
|
production_part symb = (production_part)symbols.get(symid.str_val);
|
|
/* if that fails, symbol is undeclared */
|
if (symb == null)
|
{
|
if (lexer.error_count == 0)
|
lexer.emit_error("Symbol \"" + symid.str_val +
|
"\" has not been declared");
|
}
|
else
|
{
|
/* add a labeled production part */
|
add_rhs_part(add_lab(symb, labid.str_val));
|
}
|
:}
|
|
|
CODE_STRING:code_str
|
{:
|
/* add a new production part */
|
add_rhs_part(new action_part(code_str.str_val));
|
:}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
opt_label ::=
|
COLON label_id:labid
|
{: RESULT.str_val = labid.str_val; :}
|
|
|
empty
|
{: RESULT.str_val = null; :}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
multipart_id ::=
|
multipart_id DOT ID:another_id
|
{: append_multipart(another_id.str_val); :}
|
|
|
ID:an_id
|
{: append_multipart(an_id.str_val); :}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
import_id ::=
|
multipart_id DOT STAR
|
{: append_multipart("*"); :}
|
|
|
multipart_id
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
type_id ::= multipart_id;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
new_term_id ::=
|
ID:term_id
|
{:
|
/* see if this terminal has been declared before */
|
if (symbols.get(term_id.str_val) != null)
|
{
|
/* issue a message */
|
lexer.emit_error("Symbol \"" + term_id.str_val +
|
"\" has already been declared");
|
}
|
else
|
{
|
/* build a production_part and put it in the table */
|
symbols.put(term_id.str_val,
|
new symbol_part(new terminal(term_id.str_val, multipart_name)));
|
}
|
:}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
new_non_term_id ::=
|
ID:non_term_id
|
{:
|
/* see if this non terminal has been declared before */
|
if (symbols.get(non_term_id.str_val) != null)
|
{
|
/* issue a message */
|
lexer.emit_error( "Symbol \"" + non_term_id.str_val +
|
"\" has already been declared");
|
}
|
else
|
{
|
/* build the non terminal object */
|
non_terminal this_nt =
|
new non_terminal(non_term_id.str_val, multipart_name);
|
|
/* put it in the non_terms table */
|
non_terms.put(non_term_id.str_val, this_nt);
|
|
/* build a production_part and put it in the symbols table */
|
symbols.put(non_term_id.str_val, new symbol_part(this_nt));
|
}
|
:}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
nt_id ::=
|
ID:the_id
|
{: RESULT.str_val = the_id.str_val; :}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
symbol_id ::=
|
ID:the_id
|
{: RESULT.str_val = the_id.str_val; :}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
label_id ::=
|
ID:the_id
|
{: RESULT.str_val = the_id.str_val; :}
|
;
|
|
/*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
|
|
empty ::= /* nothing */;
|
|
/*----------------------------------------------------------------*/
|