{
* Hiliter.UPasLexer.pas
*
* Defines a class that analyses and tokenises Pascal source code.
*
* $Rev$
* $Date$
*
* ***** BEGIN LICENSE BLOCK *****
*
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
* the specific language governing rights and limitations under the License.
*
* The Original Code is Hiliter.UPasLexer.pas, formerly UHilitePasLexer.pas
*
* The Initial Developer of the Original Code is Peter Johnson
* (http://www.delphidabbler.com/).
*
* Portions created by the Initial Developer are Copyright (C) 2005-2011 Peter
* Johnson. All Rights Reserved.
*
* Contributor(s)
* NONE
*
* ***** END LICENSE BLOCK *****
}
unit Hiliter.UPasLexer;
interface
uses
// Delphi
SysUtils, Classes, Generics.Collections,
// Project
UStringReader;
type
/// <summary>Tokens describing the different components of Pascal source code
/// recognised by THilitePasLexer.</summary>
THilitePasToken = (
tkKeyword, // Pascal keyword
tkComment, // comment including opening and closing symbols
tkCompilerDir, // compiler directive including include comment symbols
tkDirective, // Pascal directive (Delphi 7)
tkIdentifier, // identifier: identifier that is not keyword or directive
tkString, // string literal including quotes
tkChar, // literal character "#" [Hex | Whole number ]
tkNumber, // integral whole number
tkFloat, // floating point number (may use 'E' notation)
tkHex, // hex digit "$" + { 0..9 | A..F }+
tkSymbol, // symbol (single or double character eg '=' and ':=')
tkWhitespace, // white space (spaces, tabs etc, excluding CR and LF)
tkEOL, // end of line (usually CRLF but CR and LF on own valid)
tkEOF, // end of file
tkError // error condition: shouldn't occur in valid Pascal code
);
type
/// <summary>Class that analyses and tokenises Pascal source code.</summary>
THilitePasLexer = class(TObject)
strict private
type
TEntityMap = class(TObject)
strict private
fMap: TDictionary<string,THilitePasToken>;
public
constructor Create;
destructor Destroy; override;
procedure Add(const Entity: string; const Token: THilitePasToken);
function Lookup(const Entity: string): THilitePasToken;
end;
class var
fEntityMap: TEntityMap;
var
/// <summary>Text of last token read from input.</summary>
fTokenStr: string;
/// <summary>Identifies last token read from input.</summary>
fToken: THilitePasToken;
/// <summary>Records state of comment being processed.</summary>
fCommentState: record
/// <summary>Informs whether currently processing comment.</summary>
InComment: Boolean;
/// <summary>Indicates whether comment or compiler directive.</summary>
CommentType: THilitePasToken;
/// <summary>Closing comment symbol.</summary>
/// <remarks>One of "*)", "}" or EOL.</remarks>
CommentCloser: string;
end;
/// <summary>Object that reads characters from input.</summary>
fReader: TStringReader;
/// <summary>Appends current character in input to token string.</summary>
/// <remarks>Ignores EOF.</remarks>
procedure UpdateTokenStr; overload;
/// <summary>Appends given character to token string.</summary>
/// <remarks>Ignores EOF.</remarks>
procedure UpdateTokenStr(const Ch: Char); overload;
/// <summary>Analyses a literal character from input and stores in token
/// string.</summary>
/// <remarks>A literal character comprises a # followed by a number.
/// </remarks>
/// <returns>THilitePasToken. Token indicating literal character.</returns>
function ParseChar: THilitePasToken;
/// <summary>Begins parsing of a new comment or compiler directive.
/// </summary>
/// <returns>THilitePasToken. Token informing whether this is a comment or
/// compiler directive.</returns>
function ParseCommentFromStart: THilitePasToken;
/// <summary>Analyses body of comment following start or after resuming
/// processing multi-line comments.</summary>
/// <returns>THilitePasToken. Token informing whether this is a comment or
/// compiler directive.</returns>
function ParseCommentInterior: THilitePasToken;
/// <summary>Analyses end of line from input and stores in token string.
/// </summary>
/// <returns>THilitePasToken. End of line token.</returns>
function ParseEOL: THilitePasToken;
/// <summary>Analyses a hexadecimal integer from input and stores in token
/// string.</summary>
/// <returns>THilitePasToken. Token indicating hexadecimal value.</returns>
function ParseHex: THilitePasToken;
/// <summary>Analyses an alphanumeric identifier from input and stores in
/// token string.</summary>
/// <returns>THilitePasToken. Token indicating if identifier is normal
/// identifier, keyword or directive.</returns>
function ParseIdent: THilitePasToken;
/// <summary>Analyses a number from input and stores in token string.
/// </summary>
/// <remarks>Number can be integer or real.</remarks>
/// <returns>THilitePasToken. Token indicating whether an integer or real
/// number was parsed.</returns>
function ParseNumber: THilitePasToken;
/// <summary>Analyses a string literal from input and stores in token
/// string.</summary>
/// <returns>THilitePasToken. Token indicating a string.</returns>
function ParseString: THilitePasToken;
/// <summary>Parses a symbol from input and stores in token string.
/// </summary>
/// <remarks>Determines whether the current symbol character on input
/// represents a symbol or introduces some other syntactic entity (i.e.
/// comment, string, character literal or a hex number).</remarks>
/// <returns>THilitePasToken. Token describing parsed entity.</returns>
function ParseSymbol: THilitePasToken;
/// <summary>Analyses an unrecognised entity from input and adds it to
/// token string.</summary>
/// <returns>THilitePasToken. Error token.</returns>
function ParseUnknown: THilitePasToken;
/// <summary>Analyses a sequence of white space from input and appends to
/// token string.</summary>
/// <returns>THilitePasToken. White space token.</returns>
function ParseWhiteSpace: THilitePasToken;
/// <summary>Analyses a whole number from input and appends to token
/// string.</summary>
/// <returns>THilitePasToken. Whole number token.</returns>
function ParseWholeNumber: THilitePasToken;
public
class constructor Create;
class destructor Destroy;
/// <summary>Object constructor. Sets up object to analyse given Pascal
/// source code string.</summary>
constructor Create(const Source: string);
/// <summary>Object destructor.</summary>
destructor Destroy; override;
/// <summary>Gets and analyses next pascal token from input and stores
/// details in token string.</summary>
/// <returns>THilitePasToken. Token identifying type of token.</returns>
function NextToken: THilitePasToken;
/// <summary>Text of token last read from input.</summary>
property TokenStr: string read fTokenStr;
/// <summary>Identifies type of last token read from input.</summary>
property Token: THilitePasToken read fToken;
end;
implementation
uses
// Delphi
Character,
// Project
UComparers, UConsts, UStrUtils, UUtils;
const
// Character constants
cDecimalPoint = '.';
cCompilerDirChar = '$';
cStringDelim = '''';
cCloseParen = ')';
cEOL = TStringReader.EOL;
cEOF = TStringReader.EOF;
// String tables
cDoubleSyms: array[0..9] of string = ( // list of valid double symbols
'(*', '(.', '*)', '.)', '..', '//', ':=', '<=', '>=', '<>'
);
cCommentOpeners: array[0..2] of string = ( // symbols that open comments
'{', '(*', '//'
);
cCommentClosers: array[0..2] of string = ( // symbols that close comments
// item at given index matches openers at same index
'}', '*)', cEOL
);
cCompilerDirOpeners: array[0..1] of string = ( // comment symbols that open
'{', '(*' // compiler directives
);
// table of keywords per Delphi 2006
cKeywords: array[0..65] of string = (
'and', 'array', 'as', 'asm',
'at', 'begin', 'case', 'class',
'const', 'constructor', 'destructor', 'dispinterface',
'div', 'do', 'downto', 'else',
'end', 'except', 'exports', 'file',
'finalization', 'finally', 'for', 'function',
'goto', 'if', 'implementation', 'in',
'inherited', 'initialization', 'interface', 'is',
'label', 'library', 'mod', 'nil',
'not', 'object', 'of', 'on',
'or', 'out', 'packed', 'procedure',
'program', 'property', 'raise', 'record',
'repeat', 'resourcestring', 'set', 'shl',
'shr', 'string', 'then', 'threadvar',
'to', 'try', 'type', 'unit',
'until', 'uses', 'var', 'while',
'with', 'xor'
);
// table of directives per Delphi 2010
cDirectives: array[0..54] of string = (
'absolute', 'abstract', 'assembler', 'automated',
'cdecl', 'contains', 'default', 'delayed',
'deprecated', 'dispid', 'dynamic', 'experimental',
'export', 'external', 'far', 'final',
'forward', 'helper', 'implements', 'index',
'inline', 'local', 'message', 'name',
'near', 'nodefault', 'operator', 'overload',
'override', 'package', 'pascal', 'platform',
'private', 'protected', 'public', 'published',
'read', 'readonly', 'reference', 'register',
'reintroduce', 'requires', 'resident', 'safecall',
'sealed', 'static', 'stdcall', 'stored',
'strict', 'unsafe', 'varargs', 'virtual',
'winapi', 'write', 'writeonly'
);
// Maps symbols onto likely tokens or error if token shouldn't occur (eg
// close comments).
cSymToTokenMap: array[0..32] of record
Symbol: string; // symbol strings
Token: THilitePasToken; // related token
end = (
( Symbol: '$'; Token: tkHex; ),
( Symbol: '#'; Token: tkChar; ),
( Symbol: '&'; Token: tkSymbol; ),
( Symbol: ''''; Token: tkString; ),
( Symbol: '('; Token: tkSymbol; ),
( Symbol: ')'; Token: tkSymbol; ),
( Symbol: '*'; Token: tkSymbol; ),
( Symbol: '+'; Token: tkSymbol; ),
( Symbol: ','; Token: tkSymbol; ),
( Symbol: '-'; Token: tkSymbol; ),
( Symbol: '.'; Token: tkSymbol; ),
( Symbol: '/'; Token: tkSymbol; ),
( Symbol: ':'; Token: tkSymbol; ),
( Symbol: ';'; Token: tkSymbol; ),
( Symbol: '<'; Token: tkSymbol; ),
( Symbol: '='; Token: tkSymbol; ),
( Symbol: '>'; Token: tkSymbol; ),
( Symbol: '@'; Token: tkSymbol; ),
( Symbol: '['; Token: tkSymbol; ),
( Symbol: ']'; Token: tkSymbol; ),
( Symbol: '^'; Token: tkSymbol; ),
( Symbol: '{'; Token: tkComment; ),
( Symbol: '}'; Token: tkError; ),
( Symbol: '(*'; Token: tkComment; ),
( Symbol: '*)'; Token: tkError; ),
( Symbol: '(.'; Token: tkSymbol; ),
( Symbol: '.)'; Token: tkSymbol; ),
( Symbol: '..'; Token: tkSymbol; ),
( Symbol: '//'; Token: tkComment; ),
( Symbol: ':='; Token: tkSymbol; ),
( Symbol: '<='; Token: tkSymbol; ),
( Symbol: '>='; Token: tkSymbol; ),
( Symbol: '<>'; Token: tkSymbol; )
);
var
// Private objects used to store and search lists of symbols and keywords
pvtDoubleSyms: TStringList = nil; // list of double symbols
{ Helper routines }
/// <summary>Checks if given character is valid for inclusion in the body of a
/// Delphi identifier, after the first character.</summary>
function IsValidIdentBodyChar(const C: Char): Boolean; inline;
begin
Result := TCharacter.IsLetterOrDigit(C) or (C = '_');
end;
/// <summary>Checks if given character is valid as a first character of a
/// Delphi identifier.</summary>
function IsValidIdentStartChar(const C: Char): Boolean; inline;
begin
Result := TCharacter.IsLetter(C) or (C = '_');
end;
/// <summary>Checks if given character is a white space character other than
/// EOL or EOF characters.</summary>
function IsWhiteSpaceChar(const C: Char): Boolean; inline;
begin
Result := TCharacter.IsWhiteSpace(C) and not CharInSet(C, [CR, LF, cEOF]);
end;
/// <summary>Checks if given character is a Delphi symbol.</summary>
function IsSymbolChar(const C: Char): Boolean; inline;
const
// valid symbols
cSymbols = [
'#', '$', '&', '''', '(', ')', '*', '+', ',', '-', '.',
'/', ':', ';', '<', '=', '>', '@', '[', ']', '^', '{', '}'
];
begin
Result := CharInSet(C, cSymbols);
end;
/// <summary>Checks if given character is a valid exponent.</summary>
function IsExponentChar(const C: Char): Boolean; inline;
begin
Result := CharInSet(C, ['E', 'e']);
end;
/// <summary>Checks if given character is a unary plus or minus operator.
/// </summary>
function IsUnaryPlusMinusChar(const C: Char): Boolean; inline;
begin
Result := CharInSet(C, ['+', '-']);
end;
/// <summary>Checks if given character is a separator character.</summary>
function IsSeparatorChar(const C: Char): Boolean; inline;
begin
Result := IsWhiteSpaceChar(C) or IsSymbolChar(C) or (C = cEOL);
end;
/// <summary>Returns index of given string in given table or -1 if string not
/// in table.</summary>
function IndexInTable(const Str: string; const Table: array of string): Integer;
var
I: Integer; // loops thru table
begin
// Note: calling code assumes Table is zero based
Result := -1;
for I := Low(Table) to High(Table) do
if StrSameText(Table[I], Str) then
begin
Result := I;
Break;
end;
end;
/// <summary>Creates and initialises a sorted string list from a given table of
/// values.</summary>
procedure InitStringList(out Strings: TStringList;
const Table: array of string);
var
Idx: Integer; // loops thru rows of table
begin
Strings := TStringList.Create;
for Idx := Low(Table) to High(Table) do
Strings.Add(Table[Idx]);
Strings.Sorted := True;
Strings.CaseSensitive := False;
end;
/// <summary>Checks if given symbol is valid double character symbol.</summary>
function IsDoubleSym(const Symbol: string): Boolean;
begin
if not Assigned(pvtDoubleSyms) then
InitStringList(pvtDoubleSyms, cDoubleSyms);
Result := pvtDoubleSyms.IndexOf(Symbol) >= 0;
end;
/// <summary>Returns the closing comment symbol that matches the given opening
/// comment symbol.</summary>
function MatchingCommentCloser(const CommentOpener: string): string;
var
Idx: Integer; // index of opening / closing symbols in table
begin
// Note: this code assumes cCommentXXX arrays are zero based
Idx := IndexInTable(CommentOpener, cCommentOpeners);
Assert(Idx >= 0, 'MatchingCommentCloser: invalid comment opener');
Result := cCommentClosers[Idx];
end;
/// <param>Checks if given string is a comment opening symbol that is valid for
/// a compiler directive.</param>
function IsCompilerDirOpener(const Str: string): Boolean;
begin
Result := IndexInTable(Str, cCompilerDirOpeners) >= 0;
end;
{ THilitePasLexer }
class constructor THilitePasLexer.Create;
var
Entity: string; // each keyword and directive name
Idx: Integer; // loops trhough symbol to token map
begin
fEntityMap := TEntityMap.Create;
for Entity in cKeywords do
fEntityMap.Add(Entity, tkKeyword);
for Entity in cDirectives do
fEntityMap.Add(Entity, tkDirective);
for Idx := Low(cSymToTokenMap) to High(cSymToTokenMap) do
fEntityMap.Add(cSymToTokenMap[Idx].Symbol, cSymToTokenMap[Idx].Token);
end;
constructor THilitePasLexer.Create(const Source: string);
begin
inherited Create;
fReader := TStringReader.Create(Source);
end;
class destructor THilitePasLexer.Destroy;
begin
fEntityMap.Free;
end;
destructor THilitePasLexer.Destroy;
begin
fReader.Free;
inherited;
end;
function THilitePasLexer.NextToken: THilitePasToken;
begin
// Reset token string
fTokenStr := '';
// Decide on method used to parse the token
if not fCommentState.InComment then
begin
// We are not in a multi-line comment: process normally
if IsWhiteSpaceChar(fReader.Ch) then
Result := ParseWhiteSpace
else if IsValidIdentStartChar(fReader.Ch) then
Result := ParseIdent
else if IsSymbolChar(fReader.Ch) then
Result := ParseSymbol
else if TCharacter.IsDigit(fReader.Ch) then
Result := ParseNumber
else if fReader.Ch = cEOL then
Result := ParseEOL
else if fReader.Ch = cEOF then
Result := tkEOF
else
Result := ParseUnknown;
end
else
begin
// We're in a multiline comment: char is either from inside comment or EOL
if fReader.Ch <> cEOL then
Result := ParseCommentInterior
else
Result := ParseEOL;
end;
// Record the token
fToken := Result;
end;
function THilitePasLexer.ParseChar: THilitePasToken;
begin
// This method called with token string already containing '#' and current
// char is char after '#'
// Numeric part can either by whole number or hex number
Result := tkChar;
if fEntityMap.Lookup(fReader.Ch) = tkHex then
begin
// Hex number ('$' detected)
// store '$' and skip to next
UpdateTokenStr;
fReader.NextChar;
// now read hex digits
ParseHex;
end
else if TCharacter.IsDigit(fReader.Ch) then
// This is whole number: parse it
ParseWholeNumber
else
// Not valid character: error token
Result := tkError;
end;
function THilitePasLexer.ParseCommentFromStart: THilitePasToken;
begin
// Token string contains comment opening symbol and current char is that which
// follows opening symbol
// Record information about the comment
fCommentState.InComment := True;
fCommentState.CommentCloser := MatchingCommentCloser(fTokenStr);
// if char following opener is '$' we have compiler directive
// (but only if comment opener is '{' or '(*' )
if (fReader.Ch = cCompilerDirChar) and
IsCompilerDirOpener(fTokenStr) then
fCommentState.CommentType := tkCompilerDir
else
fCommentState.CommentType := tkComment;
// Parse body of comment
Result := ParseCommentInterior;
end;
function THilitePasLexer.ParseCommentInterior: THilitePasToken;
var
Done: Boolean; // flag true when we have finished comment
begin
Assert(fCommentState.InComment,
ClassName + '.ParseCommentInterior: called when not in comment');
Assert(fCommentState.CommentType in [tkComment, tkCompilerDir],
ClassName + '.ParseCommentInterior: invalid comment type');
Assert(Length(fCommentState.CommentCloser) > 0,
ClassName + '.ParseCommentInterior: invalid comment closer');
Result := fCommentState.CommentType;
// Loop thru all comment, looking for closing comment symbol
Done := False;
while (fReader.Ch <> cEOF) and not Done do
begin
if fReader.Ch = fCommentState.CommentCloser[1] then
begin
// We have encountered 1st char of a comment "closer"
if Length(fCommentState.CommentCloser) = 1 then
begin
// Our closer is a single char: comment is closed
Done := True;
fCommentState.InComment := False;
if fCommentState.CommentCloser[1] = cEOL then
// closer is EOL: put it back to be read later
fReader.PutBackChar
else
// closer not EOL: add it to token string
UpdateTokenStr;
end
else
begin
// Our possible closer has two chars
// Record first char in token string
UpdateTokenStr;
// Peek ahead at next char
fReader.NextChar;
if fReader.Ch = fCommentState.CommentCloser[2] then
begin
// This is the expected closer: comment is closed
Done := True;
fCommentState.InComment := False;
UpdateTokenStr;
end
else
// False alarm: put back the char we peeked at
fReader.PutBackChar;
end;
end
else
begin
// Ordinary comment text
if fReader.Ch = cEOL then
begin
// EOL: put it back and stop parsing
// the comment stays open: we will continue processing after EOL handled
Done := True;
fReader.PutBackChar;
end
else
// Not EOL: add char to token string
UpdateTokenStr;
end;
fReader.NextChar;
end;
// If at EOF ensure that comment is closed
if fReader.Ch = cEOF then
fCommentState.InComment := False;
end;
function THilitePasLexer.ParseEOL: THilitePasToken;
begin
UpdateTokenStr(cEOL);
Result := tkEOL;
fReader.NextChar;
end;
function THilitePasLexer.ParseHex: THilitePasToken;
begin
// Called with fTokenStr = '$' and fReader.Ch with char after '$'
// Build string of hex digits
while IsHexDigit(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
// Check that we ended in a valid way: error if not
if not IsSeparatorChar(fReader.Ch) then
Result := tkError
else
Result := tkHex;
end;
function THilitePasLexer.ParseIdent: THilitePasToken;
begin
Assert(IsValidIdentStartChar(fReader.Ch),
ClassName + '.ParseIdent: identifier starting character expected');
// Build identifier in token string
while IsValidIdentBodyChar(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
// Check if token is keyword or directive or is plain identifier
Result := fEntityMap.Lookup(fTokenStr);
if not (Result in [tkKeyword, tkDirective]) then
Result := tkIdentifier;
end;
function THilitePasLexer.ParseNumber: THilitePasToken;
var
TempCh: Char; // temporary storage for a character read from input
begin
Assert(TCharacter.IsDigit(fReader.Ch),
ClassName + '.ParseNumber: digit expected');
// All numbers start with a whole number: read it
ParseWholeNumber; // leaves current char as one immediately after number
// Assume we have whole number and see if we can disprove it
Result := tkNumber;
if fReader.Ch = cDecimalPoint then
begin
// Char after whole number is a decimal point: this *may* indicate a float,
// but may not since there are other symbols that start with '.'
// Store the decimal point then read ahead to see what next char is
TempCh := fReader.Ch;
fReader.NextChar;
if CharInSet(fReader.Ch, [cDecimalPoint, cCloseParen]) then
begin
// decimal point was followed by '.' or ')' making valid two char symbols
// .. and .) => we put back the read character and get out, leaving first
// decimal point as current character and returning whole number
fReader.PutBackChar;
Exit;
end;
// Decimal point was valid: record in token string
UpdateTokenStr(TempCh);
// If we have digits after decimal point read them into token str
// Note: there may not necessarily be digits after '.' (e.g. 2. is a valid
// Delphi float)
if TCharacter.IsDigit(fReader.Ch) then
ParseWholeNumber;
Result := tkFloat;
end;
if IsExponentChar(fReader.Ch) then
begin
// Next char is an exponent (e or E) that is present in numbers in
// "scientific" notation. This can either follow whole number, follow
// decimal point or follow digits after decimal point. I.e. 2e4, 2.e3 and
// 2.0e4 are all valid, as is 2.0e-4 etc.
// Record exponent in token string
UpdateTokenStr;
// Read chars after exponent (first may be unary + or -)
fReader.NextChar;
if IsUnaryPlusMinusChar(fReader.Ch) then
begin
UpdateTokenStr;
fReader.NextChar;
end;
// Next comes whole number: get it
if TCharacter.IsDigit(fReader.Ch) then
begin
ParseWholeNumber;
Result := tkFloat
end
else
Result := tkError;
end;
end;
function THilitePasLexer.ParseString: THilitePasToken;
var
Done: Boolean; // flag true when done parsing string
begin
// Note: token string already contains opening quote - current char is first
// character of the string after the quote
Done := False;
// Loop thru characters until end of string found
while (fReader.Ch <> cEOF) and not Done do
begin
UpdateTokenStr;
if fReader.Ch = cStringDelim then
begin
// Could be closing quote or pair of quotes used to embed quote in string
// we need to read ahead to check this
fReader.NextChar;
if fReader.Ch = cStringDelim then
// this is a pair of quotes ('') => embeds quote in string => not done
UpdateTokenStr
else
begin
// not a pair of quotes => string completed
Done := True;
// put back char we read ahead
fReader.PutBackChar;
end;
end;
fReader.NextChar;
end;
Result := tkString;
end;
function THilitePasLexer.ParseSymbol: THilitePasToken;
var
AToken: THilitePasToken; // token represented by the symbol
begin
Assert(IsSymbolChar(fReader.Ch), ClassName + '.ParseSymbol: symbol expected');
// Add character that starts symbol to token string and read next char
UpdateTokenStr;
fReader.NextChar;
// Check if char read is second char of a two char symbol and process if so
if IsSymbolChar(fReader.Ch) then
begin
if IsDoubleSym(fTokenStr + fReader.Ch) then
begin
// this is 2 char symbol: store in token string and skip over
UpdateTokenStr;
fReader.NextChar;
end
end;
// Token string now holds symbol: check which kind of token it represents
// and parse accordingly
AToken := fEntityMap.Lookup(TokenStr);
case AToken of
tkComment:
Result := ParseCommentFromStart;
tkString:
Result := ParseString;
tkChar:
Result := ParseChar;
tkHex:
Result := ParseHex;
else
Result := AToken; // no special processing: return token
end;
end;
function THilitePasLexer.ParseUnknown: THilitePasToken;
begin
Result := tkError;
UpdateTokenStr;
fReader.NextChar;
end;
function THilitePasLexer.ParseWhiteSpace: THilitePasToken;
begin
Assert(IsWhiteSpaceChar(fReader.Ch),
ClassName + '.ParseWhiteSpace: current char not white space');
while IsWhiteSpaceChar(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
Result := tkWhiteSpace;
end;
function THilitePasLexer.ParseWholeNumber: THilitePasToken;
begin
Assert(TCharacter.IsDigit(fReader.Ch),
ClassName + '.ParseWholeNumber: current char not a digit');
while TCharacter.IsDigit(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
Result := tkNumber;
end;
procedure THilitePasLexer.UpdateTokenStr;
begin
UpdateTokenStr(fReader.Ch);
end;
procedure THilitePasLexer.UpdateTokenStr(const Ch: Char);
begin
if Ch <> cEOF then
fTokenStr := fTokenStr + Ch;
end;
{ THilitePasLexer.TEntityMap }
constructor THilitePasLexer.TEntityMap.Create;
begin
inherited Create;
fMap := TDictionary<string,THilitePasToken>.Create(
TTextEqualityComparer.Create
);
end;
destructor THilitePasLexer.TEntityMap.Destroy;
begin
fMap.Free;
inherited;
end;
procedure THilitePasLexer.TEntityMap.Add(const Entity: string;
const Token: THilitePasToken);
begin
fMap.Add(Entity, Token);
end;
function THilitePasLexer.TEntityMap.Lookup(const Entity: string):
THilitePasToken;
begin
if fMap.ContainsKey(Entity) then
Result := fMap[Entity]
else
Result := tkError;
end;
initialization
finalization
pvtDoubleSyms.Free;
end.