/** \file
|
* Contains the base functions that all recognizers require.
|
* Any function can be overridden by a lexer/parser/tree parser or by the
|
* ANTLR3 programmer.
|
*
|
* \addtogroup pANTLR3_BASE_RECOGNIZER
|
* @{
|
*/
|
#include <antlr3baserecognizer.h>
|
|
// [The "BSD licence"]
|
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
|
// http://www.temporal-wave.com
|
// http://www.linkedin.com/in/jimidle
|
//
|
// All rights reserved.
|
//
|
// Redistribution and use in source and binary forms, with or without
|
// modification, are permitted provided that the following conditions
|
// are met:
|
// 1. Redistributions of source code must retain the above copyright
|
// notice, this list of conditions and the following disclaimer.
|
// 2. Redistributions in binary form must reproduce the above copyright
|
// notice, this list of conditions and the following disclaimer in the
|
// documentation and/or other materials provided with the distribution.
|
// 3. The name of the author may not be used to endorse or promote products
|
// derived from this software without specific prior written permission.
|
//
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#ifdef ANTLR3_WINDOWS
|
#pragma warning( disable : 4100 )
|
#endif
|
|
/* Interface functions -standard implementations cover parser and treeparser
|
* almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
|
* most of these functions.
|
*/
|
static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer);
|
static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer);
|
static void endResync (pANTLR3_BASE_RECOGNIZER recognizer);
|
static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
|
static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
|
|
static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
|
static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer);
|
static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
|
static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
|
static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
|
static void reportError (pANTLR3_BASE_RECOGNIZER recognizer);
|
static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer);
|
static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
|
static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
|
static void recover (pANTLR3_BASE_RECOGNIZER recognizer);
|
static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
|
static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
|
static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
|
static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
|
static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
|
static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer);
|
static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
|
static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
|
static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
|
static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
|
static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
|
static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
|
static void reset (pANTLR3_BASE_RECOGNIZER recognizer);
|
static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer);
|
static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
|
static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
|
ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
|
static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer);
|
|
ANTLR3_API pANTLR3_BASE_RECOGNIZER
|
antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
|
{
|
pANTLR3_BASE_RECOGNIZER recognizer;
|
|
// Allocate memory for the structure
|
//
|
recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
|
|
if (recognizer == NULL)
|
{
|
// Allocation failed
|
//
|
return NULL;
|
}
|
|
|
// If we have been supplied with a pre-existing recognizer state
|
// then we just install it, otherwise we must create one from scratch
|
//
|
if (state == NULL)
|
{
|
recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
|
|
if (recognizer->state == NULL)
|
{
|
ANTLR3_FREE(recognizer);
|
return NULL;
|
}
|
|
// Initialize any new recognizer state
|
//
|
recognizer->state->errorRecovery = ANTLR3_FALSE;
|
recognizer->state->lastErrorIndex = -1;
|
recognizer->state->failed = ANTLR3_FALSE;
|
recognizer->state->errorCount = 0;
|
recognizer->state->backtracking = 0;
|
recognizer->state->following = NULL;
|
recognizer->state->ruleMemo = NULL;
|
recognizer->state->tokenNames = NULL;
|
recognizer->state->sizeHint = sizeHint;
|
recognizer->state->tokSource = NULL;
|
recognizer->state->tokFactory = NULL;
|
|
// Rather than check to see if we must initialize
|
// the stack every time we are asked for an new rewrite stream
|
// we just always create an empty stack and then just
|
// free it when the base recognizer is freed.
|
//
|
recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size.
|
|
if (recognizer->state->rStreams == NULL)
|
{
|
// Out of memory
|
//
|
ANTLR3_FREE(recognizer->state);
|
ANTLR3_FREE(recognizer);
|
return NULL;
|
}
|
}
|
else
|
{
|
// Install the one we were given, and do not reset it here
|
// as it will either already have been initialized or will
|
// be in a state that needs to be preserved.
|
//
|
recognizer->state = state;
|
}
|
|
// Install the BR API
|
//
|
recognizer->alreadyParsedRule = alreadyParsedRule;
|
recognizer->beginResync = beginResync;
|
recognizer->combineFollows = combineFollows;
|
recognizer->beginBacktrack = beginBacktrack;
|
recognizer->endBacktrack = endBacktrack;
|
recognizer->computeCSRuleFollow = computeCSRuleFollow;
|
recognizer->computeErrorRecoverySet = computeErrorRecoverySet;
|
recognizer->consumeUntil = consumeUntil;
|
recognizer->consumeUntilSet = consumeUntilSet;
|
recognizer->displayRecognitionError = displayRecognitionError;
|
recognizer->endResync = endResync;
|
recognizer->exConstruct = antlr3MTExceptionNew;
|
recognizer->getRuleInvocationStack = getRuleInvocationStack;
|
recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
|
recognizer->getRuleMemoization = getRuleMemoization;
|
recognizer->match = match;
|
recognizer->matchAny = matchAny;
|
recognizer->memoize = memoize;
|
recognizer->mismatch = mismatch;
|
recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken;
|
recognizer->mismatchIsMissingToken = mismatchIsMissingToken;
|
recognizer->recover = recover;
|
recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
|
recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet;
|
recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken;
|
recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors;
|
recognizer->reportError = reportError;
|
recognizer->reset = reset;
|
recognizer->synpred = synpred;
|
recognizer->toStrings = toStrings;
|
recognizer->getCurrentInputSymbol = getCurrentInputSymbol;
|
recognizer->getMissingSymbol = getMissingSymbol;
|
recognizer->debugger = NULL;
|
|
recognizer->free = freeBR;
|
|
/* Initialize variables
|
*/
|
recognizer->type = type;
|
|
|
return recognizer;
|
}
|
static void
|
freeBR (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
pANTLR3_EXCEPTION thisE;
|
|
// Did we have a state allocated?
|
//
|
if (recognizer->state != NULL)
|
{
|
// Free any rule memoization we set up
|
//
|
if (recognizer->state->ruleMemo != NULL)
|
{
|
recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
|
recognizer->state->ruleMemo = NULL;
|
}
|
|
// Free any exception space we have left around
|
//
|
thisE = recognizer->state->exception;
|
if (thisE != NULL)
|
{
|
thisE->freeEx(thisE);
|
}
|
|
// Free any rewrite streams we have allocated
|
//
|
if (recognizer->state->rStreams != NULL)
|
{
|
recognizer->state->rStreams->free(recognizer->state->rStreams);
|
}
|
|
// Free up any token factory we created (error recovery for instance)
|
//
|
if (recognizer->state->tokFactory != NULL)
|
{
|
recognizer->state->tokFactory->close(recognizer->state->tokFactory);
|
}
|
// Free the shared state memory
|
//
|
ANTLR3_FREE(recognizer->state);
|
}
|
|
// Free the actual recognizer space
|
//
|
ANTLR3_FREE(recognizer);
|
}
|
|
/**
|
* Creates a new Mismatched Token Exception and inserts in the recognizer
|
* exception stack.
|
*
|
* \param recognizer
|
* Context pointer for this recognizer
|
*
|
*/
|
ANTLR3_API void
|
antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
/* Create a basic recognition exception structure
|
*/
|
antlr3RecognitionExceptionNew(recognizer);
|
|
/* Now update it to indicate this is a Mismatched token exception
|
*/
|
recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME;
|
recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
|
|
return;
|
}
|
|
ANTLR3_API void
|
antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
pANTLR3_EXCEPTION ex;
|
pANTLR3_LEXER lexer;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
|
pANTLR3_INPUT_STREAM ins;
|
pANTLR3_INT_STREAM is;
|
pANTLR3_COMMON_TOKEN_STREAM cts;
|
pANTLR3_TREE_NODE_STREAM tns;
|
|
ins = NULL;
|
cts = NULL;
|
tns = NULL;
|
is = NULL;
|
lexer = NULL;
|
parser = NULL;
|
tparser = NULL;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_LEXER:
|
|
lexer = (pANTLR3_LEXER) (recognizer->super);
|
ins = lexer->input;
|
is = ins->istream;
|
|
break;
|
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
tns = tparser->ctnstream->tnstream;
|
is = tns->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
|
return;
|
|
break;
|
}
|
|
/* Create a basic exception structure
|
*/
|
ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
|
(void *)ANTLR3_RECOGNITION_EX_NAME,
|
NULL,
|
ANTLR3_FALSE);
|
|
/* Rest of information depends on the base type of the
|
* input stream.
|
*/
|
switch (is->type & ANTLR3_INPUT_MASK)
|
{
|
case ANTLR3_CHARSTREAM:
|
|
ex->c = is->_LA (is, 1); /* Current input character */
|
ex->line = ins->getLine (ins); /* Line number comes from stream */
|
ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */
|
ex->index = is->index (is);
|
ex->streamName = ins->fileName;
|
ex->message = "Unexpected character";
|
break;
|
|
case ANTLR3_TOKENSTREAM:
|
|
ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */
|
ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine ((pANTLR3_COMMON_TOKEN)(ex->token));
|
ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine ((pANTLR3_COMMON_TOKEN)(ex->token));
|
ex->index = cts->tstream->istream->index (cts->tstream->istream);
|
if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
|
{
|
ex->streamName = NULL;
|
}
|
else
|
{
|
ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
|
}
|
ex->message = "Unexpected token";
|
break;
|
|
case ANTLR3_COMMONTREENODE:
|
|
ex->token = tns->_LT (tns, 1); /* Current input tree node */
|
ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine ((pANTLR3_BASE_TREE)(ex->token));
|
ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine ((pANTLR3_BASE_TREE)(ex->token));
|
ex->index = tns->istream->index (tns->istream);
|
|
// Are you ready for this? Deep breath now...
|
//
|
{
|
pANTLR3_COMMON_TREE tnode;
|
|
tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
|
|
if (tnode->token == NULL)
|
{
|
ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
|
}
|
else
|
{
|
if (tnode->token->input == NULL)
|
{
|
ex->streamName = NULL;
|
}
|
else
|
{
|
ex->streamName = tnode->token->input->fileName;
|
}
|
}
|
ex->message = "Unexpected node";
|
}
|
break;
|
}
|
|
ex->input = is;
|
ex->nextException = recognizer->state->exception; /* So we don't leak the memory */
|
recognizer->state->exception = ex;
|
recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */
|
|
return;
|
}
|
|
|
/// Match current input symbol against ttype. Upon error, do one token
|
/// insertion or deletion if possible.
|
/// To turn off single token insertion or deletion error
|
/// recovery, override mismatchRecover() and have it call
|
/// plain mismatch(), which does not recover. Then any error
|
/// in a rule will cause an exception and immediate exit from
|
/// rule. Rule would recover by resynchronizing to the set of
|
/// symbols that can follow rule ref.
|
///
|
static void *
|
match( pANTLR3_BASE_RECOGNIZER recognizer,
|
ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
|
{
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
void * matchedSymbol;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
|
return ANTLR3_FALSE;
|
|
break;
|
}
|
|
// Pick up the current input token/node for assignment to labels
|
//
|
matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
|
|
if (is->_LA(is, 1) == ttype)
|
{
|
// The token was the one we were told to expect
|
//
|
is->consume(is); // Consume that token from the stream
|
recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were)
|
recognizer->state->failed = ANTLR3_FALSE; // The match was a success
|
return matchedSymbol; // We are done
|
}
|
|
// We did not find the expected token type, if we are backtracking then
|
// we just set the failed flag and return.
|
//
|
if (recognizer->state->backtracking > 0)
|
{
|
// Backtracking is going on
|
//
|
recognizer->state->failed = ANTLR3_TRUE;
|
return matchedSymbol;
|
}
|
|
// We did not find the expected token and there is no backtracking
|
// going on, so we mismatch, which creates an exception in the recognizer exception
|
// stack.
|
//
|
matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
|
return matchedSymbol;
|
}
|
|
/// Consumes the next token, whatever it is, and resets the recognizer state
|
/// so that it is not in error.
|
///
|
/// \param recognizer
|
/// Recognizer context pointer
|
///
|
static void
|
matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
|
return;
|
|
break;
|
}
|
recognizer->state->errorRecovery = ANTLR3_FALSE;
|
recognizer->state->failed = ANTLR3_FALSE;
|
is->consume(is);
|
|
return;
|
}
|
///
|
///
|
static ANTLR3_BOOLEAN
|
mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
|
{
|
ANTLR3_UINT32 nextt;
|
|
nextt = is->_LA(is, 2);
|
|
if (nextt == ttype)
|
{
|
if (recognizer->state->exception != NULL)
|
{
|
recognizer->state->exception->expecting = nextt;
|
}
|
return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted
|
}
|
else
|
{
|
return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted
|
}
|
}
|
|
///
|
///
|
static ANTLR3_BOOLEAN
|
mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
|
{
|
ANTLR3_BOOLEAN retcode;
|
pANTLR3_BITSET followClone;
|
pANTLR3_BITSET viableTokensFollowingThisRule;
|
|
if (follow == NULL)
|
{
|
// There is no information about the tokens that can follow the last one
|
// hence we must say that the current one we found is not a member of the
|
// follow set and does not indicate a missing token. We will just consume this
|
// single token and see if the parser works it out from there.
|
//
|
return ANTLR3_FALSE;
|
}
|
|
followClone = NULL;
|
viableTokensFollowingThisRule = NULL;
|
|
// The C bitset maps are laid down at compile time by the
|
// C code generation. Hence we cannot remove things from them
|
// and so on. So, in order to remove EOR (if we need to) then
|
// we clone the static bitset.
|
//
|
followClone = antlr3BitsetLoad(follow);
|
if (followClone == NULL)
|
{
|
return ANTLR3_FALSE;
|
}
|
|
// Compute what can follow this grammar reference
|
//
|
if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
|
{
|
// EOR can follow, but if we are not the start symbol, we
|
// need to remove it.
|
//
|
//if (recognizer->state->following->vector->count >= 0) ml: always true
|
{
|
followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
|
}
|
|
// Now compute the visiable tokens that can follow this rule, according to context
|
// and make them part of the follow set.
|
//
|
viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
|
followClone->borInPlace(followClone, viableTokensFollowingThisRule);
|
}
|
|
/// if current token is consistent with what could come after set
|
/// then we know we're missing a token; error recovery is free to
|
/// "insert" the missing token
|
///
|
/// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
|
/// in follow set to indicate that the fall of the start symbol is
|
/// in the set (EOF can follow).
|
///
|
if ( followClone->isMember(followClone, is->_LA(is, 1))
|
|| followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
|
)
|
{
|
retcode = ANTLR3_TRUE;
|
}
|
else
|
{
|
retcode = ANTLR3_FALSE;
|
}
|
|
if (viableTokensFollowingThisRule != NULL)
|
{
|
viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
|
}
|
if (followClone != NULL)
|
{
|
followClone->free(followClone);
|
}
|
|
return retcode;
|
|
}
|
|
/// Factor out what to do upon token mismatch so tree parsers can behave
|
/// differently. Override and call mismatchRecover(input, ttype, follow)
|
/// to get single token insertion and deletion. Use this to turn off
|
/// single token insertion and deletion. Override mismatchRecover
|
/// to call this instead.
|
///
|
/// \remark mismatch only works for parsers and must be overridden for anything else.
|
///
|
static void
|
mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
|
{
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
// Install a mismatched token exception in the exception stack
|
//
|
antlr3MTExceptionNew(recognizer);
|
recognizer->state->exception->expecting = ttype;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
|
return;
|
|
break;
|
}
|
|
if (mismatchIsUnwantedToken(recognizer, is, ttype))
|
{
|
// Create a basic recognition exception structure
|
//
|
antlr3RecognitionExceptionNew(recognizer);
|
|
// Now update it to indicate this is an unwanted token exception
|
//
|
recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
|
recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
|
|
return;
|
}
|
|
if (mismatchIsMissingToken(recognizer, is, follow))
|
{
|
// Create a basic recognition exception structure
|
//
|
antlr3RecognitionExceptionNew(recognizer);
|
|
// Now update it to indicate this is an unwanted token exception
|
//
|
recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
|
recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
|
|
return;
|
}
|
|
// Just a mismatched token is all we can dtermine
|
//
|
antlr3MTExceptionNew(recognizer);
|
|
return;
|
}
|
/// Report a recognition problem.
|
///
|
/// This method sets errorRecovery to indicate the parser is recovering
|
/// not parsing. Once in recovery mode, no errors are generated.
|
/// To get out of recovery mode, the parser must successfully match
|
/// a token (after a resync). So it will go:
|
///
|
/// 1. error occurs
|
/// 2. enter recovery mode, report error
|
/// 3. consume until token found in resynch set
|
/// 4. try to resume parsing
|
/// 5. next match() will reset errorRecovery mode
|
///
|
/// If you override, make sure to update errorCount if you care about that.
|
///
|
static void
|
reportError (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
// Invoke the debugger event if there is a debugger listening to us
|
//
|
if (recognizer->debugger != NULL)
|
{
|
recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
|
}
|
|
if (recognizer->state->errorRecovery == ANTLR3_TRUE)
|
{
|
// Already in error recovery so don't display another error while doing so
|
//
|
return;
|
}
|
|
// Signal we are in error recovery now
|
//
|
recognizer->state->errorRecovery = ANTLR3_TRUE;
|
|
// Indicate this recognizer had an error while processing.
|
//
|
recognizer->state->errorCount++;
|
|
// Call the error display routine
|
//
|
recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
|
}
|
|
static void
|
beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
|
{
|
if (recognizer->debugger != NULL)
|
{
|
recognizer->debugger->beginBacktrack(recognizer->debugger, level);
|
}
|
}
|
|
static void
|
endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
|
{
|
if (recognizer->debugger != NULL)
|
{
|
recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
|
}
|
}
|
static void
|
beginResync (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
if (recognizer->debugger != NULL)
|
{
|
recognizer->debugger->beginResync(recognizer->debugger);
|
}
|
}
|
|
static void
|
endResync (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
if (recognizer->debugger != NULL)
|
{
|
recognizer->debugger->endResync(recognizer->debugger);
|
}
|
}
|
|
/// Compute the error recovery set for the current rule.
|
/// Documentation below is from the Java implementation.
|
///
|
/// During rule invocation, the parser pushes the set of tokens that can
|
/// follow that rule reference on the stack; this amounts to
|
/// computing FIRST of what follows the rule reference in the
|
/// enclosing rule. This local follow set only includes tokens
|
/// from within the rule; i.e., the FIRST computation done by
|
/// ANTLR stops at the end of a rule.
|
//
|
/// EXAMPLE
|
//
|
/// When you find a "no viable alt exception", the input is not
|
/// consistent with any of the alternatives for rule r. The best
|
/// thing to do is to consume tokens until you see something that
|
/// can legally follow a call to r *or* any rule that called r.
|
/// You don't want the exact set of viable next tokens because the
|
/// input might just be missing a token--you might consume the
|
/// rest of the input looking for one of the missing tokens.
|
///
|
/// Consider grammar:
|
///
|
/// a : '[' b ']'
|
/// | '(' b ')'
|
/// ;
|
/// b : c '^' INT ;
|
/// c : ID
|
/// | INT
|
/// ;
|
///
|
/// At each rule invocation, the set of tokens that could follow
|
/// that rule is pushed on a stack. Here are the various "local"
|
/// follow sets:
|
///
|
/// FOLLOW(b1_in_a) = FIRST(']') = ']'
|
/// FOLLOW(b2_in_a) = FIRST(')') = ')'
|
/// FOLLOW(c_in_b) = FIRST('^') = '^'
|
///
|
/// Upon erroneous input "[]", the call chain is
|
///
|
/// a -> b -> c
|
///
|
/// and, hence, the follow context stack is:
|
///
|
/// depth local follow set after call to rule
|
/// 0 <EOF> a (from main())
|
/// 1 ']' b
|
/// 3 '^' c
|
///
|
/// Notice that ')' is not included, because b would have to have
|
/// been called from a different context in rule a for ')' to be
|
/// included.
|
///
|
/// For error recovery, we cannot consider FOLLOW(c)
|
/// (context-sensitive or otherwise). We need the combined set of
|
/// all context-sensitive FOLLOW sets--the set of all tokens that
|
/// could follow any reference in the call chain. We need to
|
/// resync to one of those tokens. Note that FOLLOW(c)='^' and if
|
/// we resync'd to that token, we'd consume until EOF. We need to
|
/// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
|
/// In this case, for input "[]", LA(1) is in this set so we would
|
/// not consume anything and after printing an error rule c would
|
/// return normally. It would not find the required '^' though.
|
/// At this point, it gets a mismatched token error and throws an
|
/// exception (since LA(1) is not in the viable following token
|
/// set). The rule exception handler tries to recover, but finds
|
/// the same recovery set and doesn't consume anything. Rule b
|
/// exits normally returning to rule a. Now it finds the ']' (and
|
/// with the successful match exits errorRecovery mode).
|
///
|
/// So, you can see that the parser walks up call chain looking
|
/// for the token that was a member of the recovery set.
|
///
|
/// Errors are not generated in errorRecovery mode.
|
///
|
/// ANTLR's error recovery mechanism is based upon original ideas:
|
///
|
/// "Algorithms + Data Structures = Programs" by Niklaus Wirth
|
///
|
/// and
|
///
|
/// "A note on error recovery in recursive descent parsers":
|
/// http://portal.acm.org/citation.cfm?id=947902.947905
|
///
|
/// Later, Josef Grosch had some good ideas:
|
///
|
/// "Efficient and Comfortable Error Recovery in Recursive Descent
|
/// Parsers":
|
/// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
|
///
|
/// Like Grosch I implemented local FOLLOW sets that are combined
|
/// at run-time upon error to avoid overhead during parsing.
|
///
|
static pANTLR3_BITSET
|
computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
|
}
|
|
/// Compute the context-sensitive FOLLOW set for current rule.
|
/// Documentation below is from the Java runtime.
|
///
|
/// This is the set of token types that can follow a specific rule
|
/// reference given a specific call chain. You get the set of
|
/// viable tokens that can possibly come next (look ahead depth 1)
|
/// given the current call chain. Contrast this with the
|
/// definition of plain FOLLOW for rule r:
|
///
|
/// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
|
///
|
/// where x in T* and alpha, beta in V*; T is set of terminals and
|
/// V is the set of terminals and non terminals. In other words,
|
/// FOLLOW(r) is the set of all tokens that can possibly follow
|
/// references to r in///any* sentential form (context). At
|
/// runtime, however, we know precisely which context applies as
|
/// we have the call chain. We may compute the exact (rather
|
/// than covering superset) set of following tokens.
|
///
|
/// For example, consider grammar:
|
///
|
/// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
|
/// | "return" expr '.'
|
/// ;
|
/// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
|
/// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
|
/// | '(' expr ')'
|
/// ;
|
///
|
/// The FOLLOW sets are all inclusive whereas context-sensitive
|
/// FOLLOW sets are precisely what could follow a rule reference.
|
/// For input input "i=(3);", here is the derivation:
|
///
|
/// stat => ID '=' expr ';'
|
/// => ID '=' atom ('+' atom)* ';'
|
/// => ID '=' '(' expr ')' ('+' atom)* ';'
|
/// => ID '=' '(' atom ')' ('+' atom)* ';'
|
/// => ID '=' '(' INT ')' ('+' atom)* ';'
|
/// => ID '=' '(' INT ')' ';'
|
///
|
/// At the "3" token, you'd have a call chain of
|
///
|
/// stat -> expr -> atom -> expr -> atom
|
///
|
/// What can follow that specific nested ref to atom? Exactly ')'
|
/// as you can see by looking at the derivation of this specific
|
/// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
|
///
|
/// You want the exact viable token set when recovering from a
|
/// token mismatch. Upon token mismatch, if LA(1) is member of
|
/// the viable next token set, then you know there is most likely
|
/// a missing token in the input stream. "Insert" one by just not
|
/// throwing an exception.
|
///
|
static pANTLR3_BITSET
|
computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
|
}
|
|
/// Compute the current followset for the input stream.
|
///
|
static pANTLR3_BITSET
|
combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
|
{
|
pANTLR3_BITSET followSet;
|
pANTLR3_BITSET localFollowSet;
|
ANTLR3_UINT32 top;
|
ANTLR3_UINT32 i;
|
|
top = recognizer->state->following->size(recognizer->state->following);
|
|
followSet = antlr3BitsetNew(0);
|
localFollowSet = NULL;
|
|
for (i = top; i>0; i--)
|
{
|
localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
|
|
if (localFollowSet != NULL)
|
{
|
followSet->borInPlace(followSet, localFollowSet);
|
|
if (exact == ANTLR3_TRUE)
|
{
|
if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
|
{
|
// Only leave EOR in the set if at top (start rule); this lets us know
|
// if we have to include the follow(start rule); I.E., EOF
|
//
|
if (i>1)
|
{
|
followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
|
}
|
}
|
else
|
{
|
break; // Cannot see End Of Rule from here, just drop out
|
}
|
}
|
localFollowSet->free(localFollowSet);
|
localFollowSet = NULL;
|
}
|
}
|
|
if (localFollowSet != NULL)
|
{
|
localFollowSet->free(localFollowSet);
|
}
|
return followSet;
|
}
|
|
/// Standard/Example error display method.
|
/// No generic error message display funciton coudl possibly do everything correctly
|
/// for all possible parsers. Hence you are provided with this example routine, which
|
/// you should override in your parser/tree parser to do as you will.
|
///
|
/// Here we depart somewhat from the Java runtime as that has now split up a lot
|
/// of the error display routines into spearate units. However, ther is little advantage
|
/// to this in the C version as you will probably implement all such routines as a
|
/// separate translation unit, rather than install them all as pointers to functions
|
/// in the base recognizer.
|
///
|
static void
|
displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
|
{
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
pANTLR3_STRING ttext;
|
pANTLR3_STRING ftext;
|
pANTLR3_EXCEPTION ex;
|
pANTLR3_COMMON_TOKEN theToken;
|
pANTLR3_BASE_TREE theBaseTree;
|
pANTLR3_COMMON_TREE theCommonTree;
|
|
// Retrieve some info for easy reading.
|
//
|
ex = recognizer->state->exception;
|
ttext = NULL;
|
|
// See if there is a 'filename' we can use
|
//
|
if (ex->streamName == NULL)
|
{
|
if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
|
{
|
ANTLR3_FPRINTF(stderr, "-end of input-(");
|
}
|
else
|
{
|
ANTLR3_FPRINTF(stderr, "-unknown source-(");
|
}
|
}
|
else
|
{
|
ftext = ex->streamName->to8(ex->streamName);
|
ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
|
}
|
|
// Next comes the line number
|
//
|
|
ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
|
ANTLR3_FPRINTF(stderr, " : error %d : %s",
|
recognizer->state->exception->type,
|
(pANTLR3_UINT8) (recognizer->state->exception->message));
|
|
|
// How we determine the next piece is dependent on which thing raised the
|
// error.
|
//
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
// Prepare the knowledge we know we have
|
//
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
|
ttext = theToken->toString(theToken);
|
|
ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
|
if (theToken != NULL)
|
{
|
if (theToken->type == ANTLR3_TOKEN_EOF)
|
{
|
ANTLR3_FPRINTF(stderr, ", at <EOF>");
|
}
|
else
|
{
|
// Guard against null text in a token
|
//
|
ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
|
}
|
}
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
|
ttext = theBaseTree->toStringTree(theBaseTree);
|
|
if (theBaseTree != NULL)
|
{
|
theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
|
|
if (theCommonTree != NULL)
|
{
|
theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
|
}
|
ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
|
ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
|
}
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
|
return;
|
break;
|
}
|
|
// Although this function should generally be provided by the implementation, this one
|
// should be as helpful as possible for grammar developers and serve as an example
|
// of what you can do with each exception type. In general, when you make up your
|
// 'real' handler, you should debug the routine with all possible errors you expect
|
// which will then let you be as specific as possible about all circumstances.
|
//
|
// Note that in the general case, errors thrown by tree parsers indicate a problem
|
// with the output of the parser or with the tree grammar itself. The job of the parser
|
// is to produce a perfect (in traversal terms) syntactically correct tree, so errors
|
// at that stage should really be semantic errors that your own code determines and handles
|
// in whatever way is appropriate.
|
//
|
switch (ex->type)
|
{
|
case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
|
|
// Indicates that the recognizer was fed a token which seesm to be
|
// spurious input. We can detect this when the token that follows
|
// this unwanted token would normally be part of the syntactically
|
// correct stream. Then we can see that the token we are looking at
|
// is just something that should not be there and throw this exception.
|
//
|
if (tokenNames == NULL)
|
{
|
ANTLR3_FPRINTF(stderr, " : Extraneous input...");
|
}
|
else
|
{
|
if (ex->expecting == ANTLR3_TOKEN_EOF)
|
{
|
ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
|
}
|
else
|
{
|
ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
|
}
|
}
|
break;
|
|
case ANTLR3_MISSING_TOKEN_EXCEPTION:
|
|
// Indicates that the recognizer detected that the token we just
|
// hit would be valid syntactically if preceeded by a particular
|
// token. Perhaps a missing ';' at line end or a missing ',' in an
|
// expression list, and such like.
|
//
|
if (tokenNames == NULL)
|
{
|
ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
|
}
|
else
|
{
|
if (ex->expecting == ANTLR3_TOKEN_EOF)
|
{
|
ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
|
}
|
else
|
{
|
ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
|
}
|
}
|
break;
|
|
case ANTLR3_RECOGNITION_EXCEPTION:
|
|
// Indicates that the recognizer received a token
|
// in the input that was not predicted. This is the basic exception type
|
// from which all others are derived. So we assume it was a syntax error.
|
// You may get this if there are not more tokens and more are needed
|
// to complete a parse for instance.
|
//
|
ANTLR3_FPRINTF(stderr, " : syntax error...\n");
|
break;
|
|
case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
|
|
// We were expecting to see one thing and got another. This is the
|
// most common error if we coudl not detect a missing or unwanted token.
|
// Here you can spend your efforts to
|
// derive more useful error messages based on the expected
|
// token set and the last token and so on. The error following
|
// bitmaps do a good job of reducing the set that we were looking
|
// for down to something small. Knowing what you are parsing may be
|
// able to allow you to be even more specific about an error.
|
//
|
if (tokenNames == NULL)
|
{
|
ANTLR3_FPRINTF(stderr, " : syntax error...\n");
|
}
|
else
|
{
|
if (ex->expecting == ANTLR3_TOKEN_EOF)
|
{
|
ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
|
}
|
else
|
{
|
ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
|
}
|
}
|
break;
|
|
case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
|
|
// We could not pick any alt decision from the input given
|
// so god knows what happened - however when you examine your grammar,
|
// you should. It means that at the point where the current token occurred
|
// that the DFA indicates nowhere to go from here.
|
//
|
ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
|
|
break;
|
|
case ANTLR3_MISMATCHED_SET_EXCEPTION:
|
|
{
|
ANTLR3_UINT32 count;
|
ANTLR3_UINT32 bit;
|
ANTLR3_UINT32 size;
|
ANTLR3_UINT32 numbits;
|
pANTLR3_BITSET errBits;
|
|
// This means we were able to deal with one of a set of
|
// possible tokens at this point, but we did not see any
|
// member of that set.
|
//
|
ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : ");
|
|
// What tokens could we have accepted at this point in the
|
// parse?
|
//
|
count = 0;
|
errBits = antlr3BitsetLoad (ex->expectingSet);
|
numbits = errBits->numBits (errBits);
|
size = errBits->size (errBits);
|
|
if (size > 0)
|
{
|
// However many tokens we could have dealt with here, it is usually
|
// not useful to print ALL of the set here. I arbitrarily chose 8
|
// here, but you should do whatever makes sense for you of course.
|
// No token number 0, so look for bit 1 and on.
|
//
|
for (bit = 1; bit < numbits && count < 8 && count < size; bit++)
|
{
|
// TODO: This doesn;t look right - should be asking if the bit is set!!
|
//
|
if (tokenNames[bit])
|
{
|
ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
|
count++;
|
}
|
}
|
ANTLR3_FPRINTF(stderr, "\n");
|
}
|
else
|
{
|
ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
|
ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
|
}
|
}
|
break;
|
|
case ANTLR3_EARLY_EXIT_EXCEPTION:
|
|
// We entered a loop requiring a number of token sequences
|
// but found a token that ended that sequence earlier than
|
// we should have done.
|
//
|
ANTLR3_FPRINTF(stderr, " : missing elements...\n");
|
break;
|
|
default:
|
|
// We don't handle any other exceptions here, but you can
|
// if you wish. If we get an exception that hits this point
|
// then we are just going to report what we know about the
|
// token.
|
//
|
ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
|
break;
|
}
|
|
// Here you have the token that was in error which if this is
|
// the standard implementation will tell you the line and offset
|
// and also record the address of the start of the line in the
|
// input stream. You could therefore print the source line and so on.
|
// Generally though, I would expect that your lexer/parser will keep
|
// its own map of lines and source pointers or whatever as there
|
// are a lot of specific things you need to know about the input
|
// to do something like that.
|
// Here is where you do it though :-).
|
//
|
}
|
|
/// Return how many syntax errors were detected by this recognizer
|
///
|
static ANTLR3_UINT32
|
getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
return recognizer->state->errorCount;
|
}
|
|
/// Recover from an error found on the input stream. Mostly this is
|
/// NoViableAlt exceptions, but could be a mismatched token that
|
/// the match() routine could not recover from.
|
///
|
static void
|
recover (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
// Used to compute the follow set of tokens
|
//
|
pANTLR3_BITSET followSet;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
|
return;
|
|
break;
|
}
|
|
// Are we about to repeat the same error?
|
//
|
if (recognizer->state->lastErrorIndex == is->index(is))
|
{
|
// The last error was at the same token index point. This must be a case
|
// where LT(1) is in the recovery token set so nothing is
|
// consumed. Consume a single token so at least to prevent
|
// an infinite loop; this is a failsafe.
|
//
|
is->consume(is);
|
}
|
|
// Record error index position
|
//
|
recognizer->state->lastErrorIndex = is->index(is);
|
|
// Work out the follows set for error recovery
|
//
|
followSet = recognizer->computeErrorRecoverySet(recognizer);
|
|
// Call resync hook (for debuggers and so on)
|
//
|
recognizer->beginResync(recognizer);
|
|
// Consume tokens until we have resynced to something in the follows set
|
//
|
recognizer->consumeUntilSet(recognizer, followSet);
|
|
// End resync hook
|
//
|
recognizer->endResync(recognizer);
|
|
// Destroy the temporary bitset we produced.
|
//
|
followSet->free(followSet);
|
|
// Reset the inError flag so we don't re-report the exception
|
//
|
recognizer->state->error = ANTLR3_FALSE;
|
recognizer->state->failed = ANTLR3_FALSE;
|
}
|
|
|
/// Attempt to recover from a single missing or extra token.
|
///
|
/// EXTRA TOKEN
|
///
|
/// LA(1) is not what we are looking for. If LA(2) has the right token,
|
/// however, then assume LA(1) is some extra spurious token. Delete it
|
/// and LA(2) as if we were doing a normal match(), which advances the
|
/// input.
|
///
|
/// MISSING TOKEN
|
///
|
/// If current token is consistent with what could come after
|
/// ttype then it is ok to "insert" the missing token, else throw
|
/// exception For example, Input "i=(3;" is clearly missing the
|
/// ')'. When the parser returns from the nested call to expr, it
|
/// will have call chain:
|
///
|
/// stat -> expr -> atom
|
///
|
/// and it will be trying to match the ')' at this point in the
|
/// derivation:
|
///
|
/// => ID '=' '(' INT ')' ('+' atom)* ';'
|
/// ^
|
/// match() will see that ';' doesn't match ')' and report a
|
/// mismatched token error. To recover, it sees that LA(1)==';'
|
/// is in the set of tokens that can follow the ')' token
|
/// reference in rule atom. It can assume that you forgot the ')'.
|
///
|
/// The exception that was passed in, in the java implementation is
|
/// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
|
/// error flag and rules cascade back when this is set.
|
///
|
static void *
|
recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
|
{
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
void * matchedSymbol;
|
|
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
|
return NULL;
|
|
break;
|
}
|
|
// Create an exception if we need one
|
//
|
if (recognizer->state->exception == NULL)
|
{
|
antlr3RecognitionExceptionNew(recognizer);
|
}
|
|
// If the next token after the one we are looking at in the input stream
|
// is what we are looking for then we remove the one we have discovered
|
// from the stream by consuming it, then consume this next one along too as
|
// if nothing had happened.
|
//
|
if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
|
{
|
recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
|
recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
|
|
// Call resync hook (for debuggers and so on)
|
//
|
if (recognizer->debugger != NULL)
|
{
|
recognizer->debugger->beginResync(recognizer->debugger);
|
}
|
|
// "delete" the extra token
|
//
|
recognizer->beginResync(recognizer);
|
is->consume(is);
|
recognizer->endResync(recognizer);
|
// End resync hook
|
//
|
if (recognizer->debugger != NULL)
|
{
|
recognizer->debugger->endResync(recognizer->debugger);
|
}
|
|
// Print out the error after we consume so that ANTLRWorks sees the
|
// token in the exception.
|
//
|
recognizer->reportError(recognizer);
|
|
// Return the token we are actually matching
|
//
|
matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
|
|
// Consume the token that the rule actually expected to get as if everything
|
// was hunky dory.
|
//
|
is->consume(is);
|
|
recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
|
|
return matchedSymbol;
|
}
|
|
// Single token deletion (Unwanted above) did not work
|
// so we see if we can insert a token instead by calculating which
|
// token would be missing
|
//
|
if (mismatchIsMissingToken(recognizer, is, follow))
|
{
|
// We can fake the missing token and proceed
|
//
|
matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
|
recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
|
recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
|
recognizer->state->exception->token = matchedSymbol;
|
recognizer->state->exception->expecting = ttype;
|
|
// Print out the error after we insert so that ANTLRWorks sees the
|
// token in the exception.
|
//
|
recognizer->reportError(recognizer);
|
|
recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
|
|
return matchedSymbol;
|
}
|
|
|
// Neither deleting nor inserting tokens allows recovery
|
// must just report the exception.
|
//
|
recognizer->state->error = ANTLR3_TRUE;
|
return NULL;
|
}
|
|
static void *
|
recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
|
{
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
pANTLR3_COMMON_TOKEN matchedSymbol;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
|
return NULL;
|
|
break;
|
}
|
|
if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
|
{
|
// We can fake the missing token and proceed
|
//
|
matchedSymbol = (pANTLR3_COMMON_TOKEN)recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
|
recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
|
recognizer->state->exception->token = matchedSymbol;
|
|
// Print out the error after we insert so that ANTLRWorks sees the
|
// token in the exception.
|
//
|
recognizer->reportError(recognizer);
|
|
recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
|
|
return matchedSymbol;
|
}
|
|
// TODO - Single token deletion like in recoverFromMismatchedToken()
|
//
|
recognizer->state->error = ANTLR3_TRUE;
|
recognizer->state->failed = ANTLR3_TRUE;
|
return NULL;
|
}
|
|
/// This code is factored out from mismatched token and mismatched set
|
/// recovery. It handles "single token insertion" error recovery for
|
/// both. No tokens are consumed to recover from insertions. Return
|
/// true if recovery was possible else return false.
|
///
|
static ANTLR3_BOOLEAN
|
recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
|
{
|
pANTLR3_BITSET viableToksFollowingRule;
|
pANTLR3_BITSET follow;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
|
return ANTLR3_FALSE;
|
|
break;
|
}
|
|
follow = antlr3BitsetLoad(followBits);
|
|
if (follow == NULL)
|
{
|
/* The follow set is NULL, which means we don't know what can come
|
* next, so we "hit and hope" by just signifying that we cannot
|
* recover, which will just cause the next token to be consumed,
|
* which might dig us out.
|
*/
|
return ANTLR3_FALSE;
|
}
|
|
/* We have a bitmap for the follow set, hence we can compute
|
* what can follow this grammar element reference.
|
*/
|
if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
|
{
|
/* First we need to know which of the available tokens are viable
|
* to follow this reference.
|
*/
|
viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
|
|
/* Remove the EOR token, which we do not wish to compute with
|
*/
|
follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
|
viableToksFollowingRule->free(viableToksFollowingRule);
|
/* We now have the computed set of what can follow the current token
|
*/
|
}
|
|
/* We can now see if the current token works with the set of tokens
|
* that could follow the current grammar reference. If it looks like it
|
* is consistent, then we can "insert" that token by not throwing
|
* an exception and assuming that we saw it.
|
*/
|
if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
|
{
|
/* report the error, but don't cause any rules to abort and stuff
|
*/
|
recognizer->reportError(recognizer);
|
if (follow != NULL)
|
{
|
follow->free(follow);
|
}
|
recognizer->state->error = ANTLR3_FALSE;
|
recognizer->state->failed = ANTLR3_FALSE;
|
return ANTLR3_TRUE; /* Success in recovery */
|
}
|
|
if (follow != NULL)
|
{
|
follow->free(follow);
|
}
|
|
/* We could not find anything viable to do, so this is going to
|
* cause an exception.
|
*/
|
return ANTLR3_FALSE;
|
}
|
|
/// Eat tokens from the input stream until we get one of JUST the right type
|
///
|
static void
|
consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
|
{
|
ANTLR3_UINT32 ttype;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
|
return;
|
|
break;
|
}
|
|
// What do have at the moment?
|
//
|
ttype = is->_LA(is, 1);
|
|
// Start eating tokens until we get to the one we want.
|
//
|
while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
|
{
|
is->consume(is);
|
ttype = is->_LA(is, 1);
|
}
|
}
|
|
/// Eat tokens from the input stream until we find one that
|
/// belongs to the supplied set.
|
///
|
static void
|
consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
|
{
|
ANTLR3_UINT32 ttype;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
|
return;
|
|
break;
|
}
|
|
// What do have at the moment?
|
//
|
ttype = is->_LA(is, 1);
|
|
// Start eating tokens until we get to one we want.
|
//
|
while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
|
{
|
is->consume(is);
|
ttype = is->_LA(is, 1);
|
}
|
}
|
|
/** Return the rule invocation stack (how we got here in the parse.
|
* In the java version Ter just asks the JVM for all the information
|
* but in C we don't get this information, so I am going to do nothing
|
* right now.
|
*/
|
static pANTLR3_STACK
|
getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
return NULL;
|
}
|
|
static pANTLR3_STACK
|
getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
|
{
|
return NULL;
|
}
|
|
/** Convenience method for template rewrites - NYI.
|
*/
|
static pANTLR3_HASH_TABLE
|
toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
|
{
|
return NULL;
|
}
|
|
static void ANTLR3_CDECL
|
freeIntTrie (void * trie)
|
{
|
((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
|
}
|
|
|
/** Pointer to a function to return whether the rule has parsed input starting at the supplied
|
* start index before. If the rule has not parsed input starting from the supplied start index,
|
* then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
|
* then it will return the point where it last stopped parsing after that start point.
|
*
|
* \remark
|
* The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
|
* issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
|
* version of the table.
|
*/
|
static ANTLR3_MARKER
|
getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
|
{
|
/* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
|
*/
|
pANTLR3_INT_TRIE ruleList;
|
ANTLR3_MARKER stopIndex;
|
pANTLR3_TRIE_ENTRY entry;
|
|
/* See if we have a list in the ruleMemos for this rule, and if not, then create one
|
* as we will need it eventually if we are being asked for the memo here.
|
*/
|
entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
|
|
if (entry == NULL)
|
{
|
/* Did not find it, so create a new one for it, with a bit depth based on the
|
* size of the input stream. We need the bit depth to incorporate the number if
|
* bits required to represent the largest possible stop index in the input, which is the
|
* last character. An int stream is free to return the largest 64 bit offset if it has
|
* no idea of the size, but you should remember that this will cause the leftmost
|
* bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
|
*/
|
ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */
|
|
if (ruleList != NULL)
|
{
|
recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
|
}
|
|
/* We cannot have a stopIndex in a trie we have just created of course
|
*/
|
return MEMO_RULE_UNKNOWN;
|
}
|
|
ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr);
|
|
/* See if there is a stop index associated with the supplied start index.
|
*/
|
stopIndex = 0;
|
|
entry = ruleList->get(ruleList, ruleParseStart);
|
if (entry != NULL)
|
{
|
stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
|
}
|
|
if (stopIndex == 0)
|
{
|
return MEMO_RULE_UNKNOWN;
|
}
|
|
return stopIndex;
|
}
|
|
/** Has this rule already parsed input at the current index in the
|
* input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE
|
* if we have not.
|
*
|
* This method has a side-effect: if we have seen this input for
|
* this rule and successfully parsed before, then seek ahead to
|
* 1 past the stop token matched for this rule last time.
|
*/
|
static ANTLR3_BOOLEAN
|
alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
|
{
|
ANTLR3_MARKER stopIndex;
|
pANTLR3_LEXER lexer;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
lexer = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
lexer = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_LEXER:
|
|
lexer = (pANTLR3_LEXER) (recognizer->super);
|
parser = NULL;
|
tparser = NULL;
|
is = lexer->input->istream;
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
|
return ANTLR3_FALSE;
|
|
break;
|
}
|
|
/* See if we have a memo marker for this.
|
*/
|
stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
|
|
if (stopIndex == MEMO_RULE_UNKNOWN)
|
{
|
return ANTLR3_FALSE;
|
}
|
|
if (stopIndex == MEMO_RULE_FAILED)
|
{
|
recognizer->state->failed = ANTLR3_TRUE;
|
}
|
else
|
{
|
is->seek(is, stopIndex+1);
|
}
|
|
/* If here then the rule was executed for this input already
|
*/
|
return ANTLR3_TRUE;
|
}
|
|
/** Record whether or not this rule parsed the input at this position
|
* successfully.
|
*/
|
static void
|
memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
|
{
|
/* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
|
*/
|
pANTLR3_INT_TRIE ruleList;
|
pANTLR3_TRIE_ENTRY entry;
|
ANTLR3_MARKER stopIndex;
|
pANTLR3_LEXER lexer;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_LEXER:
|
|
lexer = (pANTLR3_LEXER) (recognizer->super);
|
parser = NULL;
|
tparser = NULL;
|
is = lexer->input->istream;
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
|
return;
|
|
break;
|
}
|
|
stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
|
|
entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
|
|
if (entry != NULL)
|
{
|
ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
|
|
/* If we don't already have this entry, append it. The memoize trie does not
|
* accept duplicates so it won't add it if already there and we just ignore the
|
* return code as we don't care if it is there already.
|
*/
|
ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
|
}
|
}
|
/** A syntactic predicate. Returns true/false depending on whether
|
* the specified grammar fragment matches the current input stream.
|
* This resets the failed instance var afterwards.
|
*/
|
static ANTLR3_BOOLEAN
|
synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
|
{
|
ANTLR3_MARKER start;
|
pANTLR3_PARSER parser;
|
pANTLR3_TREE_PARSER tparser;
|
pANTLR3_INT_STREAM is;
|
|
switch (recognizer->type)
|
{
|
case ANTLR3_TYPE_PARSER:
|
|
parser = (pANTLR3_PARSER) (recognizer->super);
|
tparser = NULL;
|
is = parser->tstream->istream;
|
|
break;
|
|
case ANTLR3_TYPE_TREE_PARSER:
|
|
tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
|
parser = NULL;
|
is = tparser->ctnstream->tnstream->istream;
|
|
break;
|
|
default:
|
|
ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
|
return ANTLR3_FALSE;
|
|
break;
|
}
|
|
/* Begin backtracking so we can get back to where we started after trying out
|
* the syntactic predicate.
|
*/
|
start = is->mark(is);
|
recognizer->state->backtracking++;
|
|
/* Try the syntactical predicate
|
*/
|
predicate(ctx);
|
|
/* Reset
|
*/
|
is->rewind(is, start);
|
recognizer->state->backtracking--;
|
|
if (recognizer->state->failed == ANTLR3_TRUE)
|
{
|
/* Predicate failed
|
*/
|
recognizer->state->failed = ANTLR3_FALSE;
|
return ANTLR3_FALSE;
|
}
|
else
|
{
|
/* Predicate was successful
|
*/
|
recognizer->state->failed = ANTLR3_FALSE;
|
return ANTLR3_TRUE;
|
}
|
}
|
|
static void
|
reset(pANTLR3_BASE_RECOGNIZER recognizer)
|
{
|
if (recognizer->state->following != NULL)
|
{
|
recognizer->state->following->free(recognizer->state->following);
|
}
|
|
// Reset the state flags
|
//
|
recognizer->state->errorRecovery = ANTLR3_FALSE;
|
recognizer->state->lastErrorIndex = -1;
|
recognizer->state->failed = ANTLR3_FALSE;
|
recognizer->state->errorCount = 0;
|
recognizer->state->backtracking = 0;
|
recognizer->state->following = NULL;
|
|
if (recognizer->state != NULL)
|
{
|
if (recognizer->state->ruleMemo != NULL)
|
{
|
recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
|
recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */
|
}
|
}
|
|
// ml: 2013-11-05, added reset of old exceptions.
|
pANTLR3_EXCEPTION thisE = recognizer->state->exception;
|
if (thisE != NULL)
|
{
|
thisE->freeEx(thisE);
|
recognizer->state->exception = NULL;
|
}
|
|
// Install a new following set
|
//
|
recognizer->state->following = antlr3StackNew(8);
|
|
}
|
|
// Default implementation is for parser and assumes a token stream as supplied by the runtime.
|
// You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
|
//
|
static void *
|
getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
|
{
|
return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
|
}
|
|
// Default implementation is for parser and assumes a token stream as supplied by the runtime.
|
// You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
|
//
|
static void *
|
getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
|
ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
|
{
|
pANTLR3_TOKEN_STREAM ts;
|
pANTLR3_COMMON_TOKEN_STREAM cts;
|
pANTLR3_COMMON_TOKEN token;
|
pANTLR3_COMMON_TOKEN current;
|
pANTLR3_STRING text;
|
|
// Dereference the standard pointers
|
//
|
ts = (pANTLR3_TOKEN_STREAM)istream->super;
|
cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
|
|
// Work out what to use as the current symbol to make a line and offset etc
|
// If we are at EOF, we use the token before EOF
|
//
|
current = ts->_LT(ts, 1);
|
if (current->getType(current) == ANTLR3_TOKEN_EOF)
|
{
|
current = ts->_LT(ts, -1);
|
}
|
|
// Create a new empty token
|
//
|
if (recognizer->state->tokFactory == NULL)
|
{
|
// We don't yet have a token factory for making tokens
|
// we just need a fake one using the input stream of the current
|
// token.
|
//
|
recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
|
}
|
token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
|
if (token == NULL) { return NULL; }
|
|
// Set some of the token properties based on the current token
|
//
|
token->setLine (token, current->getLine(current));
|
token->setCharPositionInLine (token, current->getCharPositionInLine(current));
|
token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
|
token->setType (token, expectedTokenType);
|
token->user1 = current->user1;
|
token->user2 = current->user2;
|
token->user3 = current->user3;
|
token->custom = current->custom;
|
token->lineStart = current->lineStart;
|
|
// Create the token text that shows it has been inserted
|
//
|
token->setText8(token, (pANTLR3_UINT8)"<missing ");
|
text = token->getText(token);
|
|
if (text != NULL)
|
{
|
text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
|
text->append8(text, (const char *)">");
|
}
|
|
// Finally return the pointer to our new token
|
//
|
return token;
|
}
|
|
|
#ifdef ANTLR3_WINDOWS
|
#pragma warning( default : 4100 )
|
#endif
|
|
/// @}
|
///
|