Source code
Revision control
Copy as Markdown
Other Tools
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
#ifndef MITREXSL_EXPRLEXER_H
#define MITREXSL_EXPRLEXER_H
#include "txCore.h"
#include "nsString.h"
/**
* A Token class for the ExprLexer.
*
* This class was ported from XSL:P, an open source Java based
* XSLT processor, written by yours truly.
*/
class Token {
public:
/**
* Token types
*/
enum Type {
//-- Trivial Tokens
NULL_TOKEN = 1,
LITERAL,
NUMBER,
CNAME,
VAR_REFERENCE,
PARENT_NODE,
SELF_NODE,
R_PAREN,
R_BRACKET, // 9
/**
* start of tokens for 3.7, bullet 1
* ExprLexer::nextIsOperatorToken bails if the tokens aren't
* consecutive.
*/
COMMA,
AT_SIGN,
L_PAREN,
L_BRACKET,
AXIS_IDENTIFIER,
// These tokens include their following left parenthesis
FUNCTION_NAME_AND_PAREN, // 15
COMMENT_AND_PAREN,
NODE_AND_PAREN,
PROC_INST_AND_PAREN,
TEXT_AND_PAREN,
/**
* operators
*/
//-- boolean ops
AND_OP, // 20
OR_OP,
//-- relational
EQUAL_OP, // 22
NOT_EQUAL_OP,
LESS_THAN_OP,
GREATER_THAN_OP,
LESS_OR_EQUAL_OP,
GREATER_OR_EQUAL_OP,
//-- additive operators
ADDITION_OP, // 28
SUBTRACTION_OP,
//-- multiplicative
DIVIDE_OP, // 30
MULTIPLY_OP,
MODULUS_OP,
//-- path operators
PARENT_OP, // 33
ANCESTOR_OP,
UNION_OP,
/**
* end of tokens for 3.7, bullet 1 -/
*/
//-- Special endtoken
END // 36
};
/**
* Constructors
*/
using iterator = nsAString::const_char_iterator;
Token(iterator aStart, iterator aEnd, Type aType)
: mStart(aStart), mEnd(aEnd), mType(aType), mNext(nullptr) {}
Token(iterator aChar, Type aType)
: mStart(aChar), mEnd(aChar + 1), mType(aType), mNext(nullptr) {}
const nsDependentSubstring Value() { return Substring(mStart, mEnd); }
iterator mStart, mEnd;
Type mType;
Token* mNext;
};
/**
* A class for splitting an "Expr" String into tokens and
* performing basic Lexical Analysis.
*
* This class was ported from XSL:P, an open source Java based XSL processor
*/
class txExprLexer {
public:
txExprLexer();
~txExprLexer();
/**
* Parse the given string.
* returns an error result if lexing failed.
* The given string must outlive the use of the lexer, as the
* generated Tokens point to Substrings of it.
* mPosition points to the offending location in case of an error.
*/
nsresult parse(const nsAString& aPattern);
using iterator = nsAString::const_char_iterator;
iterator mPosition;
/**
* Functions for iterating over the TokenList
*/
Token* nextToken();
Token* peek() {
NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer");
return mCurrentItem;
}
Token* peekAhead() {
NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer");
// Don't peek past the end node
return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext
: mCurrentItem;
}
bool hasMoreTokens() {
NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer");
return (mCurrentItem && mCurrentItem->mType != Token::END);
}
/**
* Trivial Tokens
*/
//-- LF, changed to enum
enum _TrivialTokens {
D_QUOTE = '\"',
S_QUOTE = '\'',
L_PAREN = '(',
R_PAREN = ')',
L_BRACKET = '[',
R_BRACKET = ']',
L_ANGLE = '<',
R_ANGLE = '>',
COMMA = ',',
PERIOD = '.',
ASTERISK = '*',
FORWARD_SLASH = '/',
EQUAL = '=',
BANG = '!',
VERT_BAR = '|',
AT_SIGN = '@',
DOLLAR_SIGN = '$',
PLUS = '+',
HYPHEN = '-',
COLON = ':',
//-- whitespace tokens
SPACE = ' ',
TX_TAB = '\t',
TX_CR = '\n',
TX_LF = '\r'
};
private:
Token* mCurrentItem;
Token* mFirstItem;
Token* mLastItem;
int mTokenCount;
void addToken(Token* aToken);
/**
* Returns true if the following Token should be an operator.
* This is a helper for the first bullet of [XPath 3.7]
* Lexical Structure
*/
bool nextIsOperatorToken(Token* aToken);
/**
* Returns true if the given character represents a numeric letter (digit)
* Implemented in ExprLexerChars.cpp
*/
static bool isXPathDigit(char16_t ch) { return (ch >= '0' && ch <= '9'); }
};
#endif