Menu

[r660]: / trunk / Src / UHilitePasLexer.pas  Maximize  Restore  History

Download this file

970 lines (899 with data), 33.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
{
* UHilitePasLexer.pas
*
* Defines class that analyses and tokenises Pascal source code.
*
* $Rev$
* $Date$
*
* ***** BEGIN LICENSE BLOCK *****
*
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
* the specific language governing rights and limitations under the License.
*
* The Original Code is UHilitePasLexer.pas
*
* The Initial Developer of the Original Code is Peter Johnson
* (http://www.delphidabbler.com/).
*
* Portions created by the Initial Developer are Copyright (C) 2005-2010 Peter
* Johnson. All Rights Reserved.
*
* Contributor(s)
* NONE
*
* ***** END LICENSE BLOCK *****
}
unit UHilitePasLexer;
interface
uses
// Delphi
Classes,
// Project
UTextStreamReader;
type
{
THilitePasToken:
Tokens describing the different components of Pascal source code returned by
the lexical analyser.
}
THilitePasToken = (
tkKeyword, // Pascal keyword
tkComment, // comment including opening and closing symbols
tkCompilerDir, // compiler directive including include comment symbols
tkDirective, // Pascal directive (Delphi 7)
tkIdentifier, // identifier: identifier that is not keyword or directive
tkString, // string literal including quotes
tkChar, // literal character "#" [Hex | Whole number ]
tkNumber, // integral whole number
tkFloat, // floating point number (may use 'E' notation)
tkHex, // hex digit "$" + { 0..9 | A..F }+
tkSymbol, // symbol (single or double character eg '=' and ':=')
tkWhitespace, // white space (spaces, tabs etc, excluding CR and LF)
tkEOL, // end of line (usually CRLF but CR and LF on own valid)
tkEOF, // end of file
tkError // error condition: shouldn't occur
);
{
THilitePasLexer:
Class that analyses and tokenises Pascal code.
}
THilitePasLexer = class(TObject)
strict private
fTokenStr: string; // Text of last token read from input
fToken: THilitePasToken; // Kind of last token read from input
fCommentState: record // Records state of comment being processed
InComment: Boolean; // whether currently processing comment
CommentType: THilitePasToken; // indicates comment or compiler directive
CommentCloser: string; // closing comment symbol ( *), } or EOL )
end;
fReader: TTextStreamReader; // Object that reads characters from input
procedure UpdateTokenStr; overload;
{Appends current character in input to token string. Ignores EOF.
}
procedure UpdateTokenStr(const Ch: Char); overload;
{Appends a character to token string. Ignores EOF.
@param Ch [in] Character to append.
}
function ParseChar: THilitePasToken;
{Analyses a literal character (made from # followed by number) from input
and stores in token string.
@return Token indicating literal char (tkChar).
}
function ParseCommentFromStart: THilitePasToken;
{Begins parsing of a new comment or compiler directive.
@return Token telling whether this is a comment or compiler directive
(tkComment, tkCompilerDir).
}
function ParseCommentInterior: THilitePasToken;
{Analyses body of comment after start or after resuming processing multi-
line comments.
@return Token telling whether this is a comment or compiler directive
(tkComment, tkCompilerDir).
}
function ParseEOL: THilitePasToken;
{Analyses end of line from input and stores in token string.
@return End of line token (tkEOL).
}
function ParseHex: THilitePasToken;
{Analyses a hexadecimal integer from input and stores in token string.
@return Token indicating hexadecimal value (tkHex).
}
function ParseIdent: THilitePasToken;
{Analyses an alphanumeric identifier from input and stores in token
string. Checks if identifier is keyword or directive.
@return Token representing identifier: tkKeyword, tkDirective or
tkIdentifier.
}
function ParseNumber: THilitePasToken;
{Analyses a number from input and stores in token string. Number can be
integer or real.
@return Appropriate token for number (tkNumber or tkFloat).
}
function ParseString: THilitePasToken;
{Analyses a string literal from input and stores in token string.
@return String token (tkString).
}
function ParseSymbol: THilitePasToken;
{Determines whether the current symbol character on input represents a
symbol or introduces some other syntactic entity (i.e. comment, string,
character literal or a hex number). Analyses the input accordingly and
stores the whole token in the token string.
@return Token describing entity parsed.
}
function ParseUnknown: THilitePasToken;
{Analyses an unrecognised entity from input and adds it to token string.
@return Error token (tkError).
}
function ParseWhiteSpace: THilitePasToken;
{Analyses a sequence of white space from input and appends space for each
white space character read to token string.
@return White space token (tkWhiteSpace).
}
function ParseWholeNumber: THilitePasToken;
{Analyses a whole number from input and appends to token string.
@return Whole number token (tkNumber).
}
public
constructor Create(const Stm: TStream);
{Class constructor. Sets up object to analyse code on a stream.
@param Stm [in] Stream containing Pascal source.
}
destructor Destroy; override;
{Class destructor. Tears down object.
}
function NextToken: THilitePasToken;
{Gets and analyses next pascal token from input and stores details in
token string.
@return Token identifier for type of token read.
}
property TokenStr: string read fTokenStr;
{Text that makes up the token last read from input}
property Token: THilitePasToken read fToken;
{Kind of token last read from input}
end;
implementation
uses
// Delphi
SysUtils, Windows {for inlining},
// Project
UConsts, UExceptions, UUnicodeHelper;
const
// Character constants
cDecimalPoint = '.';
cCompilerDirChar = '$';
cStringDelim = '''';
cCloseParen = ')';
cEOL = TTextStreamReader.EOL;
cEOF = TTextStreamReader.EOF;
// String tables
cDoubleSyms: array[0..9] of string = ( // list of valid double symbols
'(*', '(.', '*)', '.)', '..', '//', ':=', '<=', '>=', '<>'
);
cCommentOpeners: array[0..2] of string = ( // symbols that open comments
'{', '(*', '//'
);
cCommentClosers: array[0..2] of string = ( // symbols that close comments
// item at given index matches openers at same index
'}', '*)', cEOL
);
cCompilerDirOpeners: array[0..1] of string = ( // comment symbols that open
'{', '(*' // compiler directives
);
cKeywords: array[0..68] of string = ( // table of keywords per Delphi 2006
'and', 'array', 'as', 'asm',
'begin',
'case', 'class', 'const', 'constructor',
'destructor', 'dispinterface', 'div', 'do', 'downto',
'else', 'end', 'except', 'exports',
'file', 'final', 'finalization', 'finally', 'for', 'function',
'goto',
'if', 'implementation', 'in', 'inherited',
'initialization', 'inline', 'interface', 'is',
'label', 'library',
'mod',
'nil', 'not',
'object', 'of', 'or', 'out',
'packed', 'procedure', 'program', 'property',
'raise', 'record', 'repeat', 'resourcestring',
'sealed', 'set', 'shl', 'shr', 'static', 'string',
'then', 'threadvar', 'to', 'try', 'type',
'unit', 'unsafe', 'until', 'uses',
'var',
'while', 'with',
'xor'
);
cDirectives: array[0..45] of string = ( // table of directives
'absolute', // used in variable declaration
'abstract', // method directive
'assembly', // flags routine as containing assembler
'at', // only occurs in raise statement
'automated', // used in class declarations
'cdecl', // calling convention
'contains', // package clause
'default', // used in property declarations
'deprecated', // portability directive
'dispid', // used in automated properties
'dynamic', // method directive
'export', // calling convention (ignored)
'external', // routine directive
'far', // calling convention (ignored)
'forward', // routine directive
'implements', // used in property declarations
'index', // used in property declarations and re DLLs
'inline', // flags a routine as inlinable
'local', // routine directive
'message', // method directive
'name', // used re DLLs
'near', // calling convention (ignored)
'nodefault', // used in property declarations
'on', // used in exception handlers
'overload', // method / routine directive
'override', // method directive
'package', // introduces a package
'pascal', // calling convention
'platform', // portability directive
'private', // used in class declarations
'protected', // used in class declarations
'public', // used in class declarations
'published', // used in class declarations
'read', // used in property declarations
'readonly', // property directive in dispinterfaces
'register', // calling convention
'reintroduce', // method directive
'requires', // package clause
'resident', // directive used in exports clauses (ignored)
'safecall', // calling convention
'stdcall', // calling convention
'stored', // used in property declarations
'varargs', // method / routine directive
'virtual', // method directive
'write', // used in property declarations
'writeonly' // property directive in dispinterfaces
);
// Maps symbols onto likely tokens or error if token shouldn't occur (eg
// close comments).
cSymToTokenMap: array[0..32] of record
Symbol: string; // symbol strings
Token: THilitePasToken; // related token
end = (
( Symbol: '$'; Token: tkHex; ),
( Symbol: '#'; Token: tkChar; ),
( Symbol: '&'; Token: tkSymbol; ),
( Symbol: ''''; Token: tkString; ),
( Symbol: '('; Token: tkSymbol; ),
( Symbol: ')'; Token: tkSymbol; ),
( Symbol: '*'; Token: tkSymbol; ),
( Symbol: '+'; Token: tkSymbol; ),
( Symbol: ','; Token: tkSymbol; ),
( Symbol: '-'; Token: tkSymbol; ),
( Symbol: '.'; Token: tkSymbol; ),
( Symbol: '/'; Token: tkSymbol; ),
( Symbol: ':'; Token: tkSymbol; ),
( Symbol: ';'; Token: tkSymbol; ),
( Symbol: '<'; Token: tkSymbol; ),
( Symbol: '='; Token: tkSymbol; ),
( Symbol: '>'; Token: tkSymbol; ),
( Symbol: '@'; Token: tkSymbol; ),
( Symbol: '['; Token: tkSymbol; ),
( Symbol: ']'; Token: tkSymbol; ),
( Symbol: '^'; Token: tkSymbol; ),
( Symbol: '{'; Token: tkComment; ),
( Symbol: '}'; Token: tkError; ),
( Symbol: '(*'; Token: tkComment; ),
( Symbol: '*)'; Token: tkError; ),
( Symbol: '(.'; Token: tkSymbol; ),
( Symbol: '.)'; Token: tkSymbol; ),
( Symbol: '..'; Token: tkSymbol; ),
( Symbol: '//'; Token: tkComment; ),
( Symbol: ':='; Token: tkSymbol; ),
( Symbol: '<='; Token: tkSymbol; ),
( Symbol: '>='; Token: tkSymbol; ),
( Symbol: '<>'; Token: tkSymbol; )
);
var
// Private objects used to store and search lists of symbols and keywords
pvtKeywords: TStringList = nil; // keywords list
pvtDirectives: TStringList = nil; // directives list
pvtDoubleSyms: TStringList = nil; // list of double symbols
pvtSymMap: TStringList = nil; // map of symbols to tokens
{ Helper routines }
function IsValidIdentBodyChar(const C: Char): Boolean; inline;
{Checks if a character is valid for inclusion in the body of a Delphi
identifier, after the first character.
@param C [in] Character to be tested.
@return True if C is valid, False otherwise.
}
begin
Result := IsAlphaNumeric(C) or (C = '_');
end;
function IsValidIdentStartChar(const C: Char): Boolean; inline;
{Checks if a character is a valid first character of a Delphi identifier.
@param C [in] Character to be tested.
@return True if C is valid, False otherwise.
}
begin
Result := IsLetter(C) or (C = '_');
end;
function IsWhiteSpaceChar(const C: Char): Boolean; inline;
{Checks if a character is a whitespace character but not end of line or end
of file character}
begin
Result := IsWhiteSpace(C) and not IsCharInSet(C, [CR, LF, cEOF]);
end;
function IsSymbolChar(const C: Char): Boolean; inline;
{Checks if a character is a symbol.
@param C [in] Character to check.
@return True if C is a symbol, False if not.
}
const
// valid symbols
cSymbols = [
'#', '$', '&', '''', '(', ')', '*', '+', ',', '-', '.',
'/', ':', ';', '<', '=', '>', '@', '[', ']', '^', '{', '}'
];
begin
Result := IsCharInSet(C, cSymbols);
end;
function IsExponentChar(const C: Char): Boolean; inline;
{Checks if a character is an exponent.
@param C [in] Character to check.
@return True if C is an exponent, False if not.
}
begin
Result := IsCharInSet(C, ['E', 'e']);
end;
function IsUnaryPlusMinusChar(const C: Char): Boolean; inline;
{Checks if a character is a unary plus or minus operator.
@param C [in] Character to check.
@return True if C is a unary plus or minus, False if not.
}
begin
Result := IsCharInSet(C, ['+', '-']);
end;
function IsSeparatorChar(const C: Char): Boolean; inline;
{Checks if a character is a separator character.
@param C [in] Character to check.
@return True if C is a separator, False if not.
}
begin
Result := IsWhiteSpaceChar(C) or IsSymbolChar(C) or (C = cEOL);
end;
function IndexInTable(const Str: string; const Table: array of string): Integer;
{Gets the index of a string in a table.
@param Str [in] String to search for.
@param Table [in] Table of strings to search.
@return Index of string in table or -1 if string not in table.
}
var
I: Integer; // loops thru table
begin
// Note: calling code assumes Table is zero based
Result := -1;
for I := Low(Table) to High(Table) do
if AnsiSameText(Table[I], Str) then
begin
Result := I;
Break;
end;
end;
procedure InitStringList(out Strings: TStringList;
const Table: array of string);
{Creates and initialises a sorted string list from a table of values.
@param Strings [out] String list we create and initialise.
@param Table [in] Table of strings to place in string list.
}
var
Idx: Integer; // loops thru rows of table
begin
Strings := TStringList.Create;
for Idx := Low(Table) to High(Table) do
Strings.Add(Table[Idx]);
Strings.Sorted := True;
Strings.CaseSensitive := False;
end;
procedure InitSymbolMap(out Strings: TStringList);
{Initialises string list object used to map valid symbols to tokens.
@param Strings [out] String list we create and initialises.
}
var
I: Integer; // loops thru entries in symbol map constant table.
begin
Strings := TStringList.Create;
// Use Objects[] property to store tokens with related string
for I := Low(cSymToTokenMap) to High(cSymToTokenMap) do
Strings.AddObject(
cSymToTokenMap[I].Symbol, TObject(cSymToTokenMap[I].Token)
);
end;
function IsDoubleSym(const Symbol: string): Boolean;
{Checks if a symbol is a valid double character symbol.
@param Symbol [in] Symbol to check.
@return True if symbol is valid double character symbol.
}
begin
if not Assigned(pvtDoubleSyms) then
InitStringList(pvtDoubleSyms, cDoubleSyms);
Result := pvtDoubleSyms.IndexOf(Symbol) >= 0;
end;
function IsDirective(const Ident: string): Boolean;
{Checks if an identifier is a directive.
@param Ident [in] Identifier to check.
@return True if Ident is a directive, false otherwise.
}
begin
if not Assigned(pvtDirectives) then
InitStringList(pvtDirectives, cDirectives);
Result := pvtDirectives.IndexOf(Ident) >= 0;
end;
function IsKeyword(const Ident: string): Boolean;
{Checks if an identifier is a keyword.
@param Ident [in] Identifier to check.
@return True if Ident is a keyword, false otherwise.
}
begin
if not Assigned(pvtKeywords) then
InitStringList(pvtKeywords, cKeywords);
Result := pvtKeywords.IndexOf(Ident) >= 0;
end;
function SymbolToToken(const Symbol: string): THilitePasToken;
{Gets the likely token associated with a symbol.
@param Symbol [in] Symbol to check
@return Token associated with symbol.
}
var
Idx: Integer; // index of symbol in map table
begin
// Assumes Str is a symbol (single or double)
if not Assigned(pvtSymMap) then
InitSymbolMap(pvtSymMap);
Idx := pvtSymMap.IndexOf(Symbol);
if Idx >= 0 then
Result := THilitePasToken(pvtSymMap.Objects[Idx])
else
Result := tkError;
end;
function MatchingCommentCloser(const CommentOpener: string): string;
{Given a comment opening symbol gets the matching closing comment symbol.
@param CommentOpener [in] Opening comment we need to match.
@return Closing comment symbol.
}
var
Idx: Integer; // index of opening / closing symbols in table
begin
// Note: this code assumes cCommentXXX arrays are zero based
Idx := IndexInTable(CommentOpener, cCommentOpeners);
Assert(Idx >= 0, 'MatchingCommentCloser: invalid comment opener');
Result := cCommentClosers[Idx];
end;
function IsCompilerDirOpener(const Str: string): Boolean;
{Checks if text is a comment opening symbol that is valid for a compiler
directive.
@param Str [in] String we are check is a compiler directive opening symbol.
@return True if is a compiler directive opening symbol.
}
begin
Result := IndexInTable(Str, cCompilerDirOpeners) >= 0;
end;
{ THilitePasLexer }
constructor THilitePasLexer.Create(const Stm: TStream);
{Class constructor. Sets up object to analyse code on a stream.
@param Stm [in] Stream containing Pascal source.
}
begin
inherited Create;
fReader := TTextStreamReader.Create(Stm);
end;
destructor THilitePasLexer.Destroy;
{Class destructor. Tears down object.
}
begin
fReader.Free;
inherited;
end;
function THilitePasLexer.NextToken: THilitePasToken;
{Gets and analyses next Pascal token from input and stores details in token
string.
@return Token identifiing type of token read.
}
begin
// Reset token string
fTokenStr := '';
// Decide on method used to parse the token
if not fCommentState.InComment then
begin
// We are not in a multi-line comment: process normally
if IsWhiteSpaceChar(fReader.Ch) then
Result := ParseWhiteSpace
else if IsValidIdentStartChar(fReader.Ch) then
Result := ParseIdent
else if IsSymbolChar(fReader.Ch) then
Result := ParseSymbol
else if IsDigit(fReader.Ch) then
Result := ParseNumber
else if fReader.Ch = cEOL then
Result := ParseEOL
else if fReader.Ch = cEOF then
Result := tkEOF
else
Result := ParseUnknown;
end
else
begin
// We're in a multiline comment: char is either from inside comment or EOL
if fReader.Ch <> cEOL then
Result := ParseCommentInterior
else
Result := ParseEOL;
end;
// Record the token
fToken := Result;
end;
function THilitePasLexer.ParseChar: THilitePasToken;
{Analyses a literal character (made from # followed by number) from input and
stores in token string.
@return Token indicating literal char (tkChar).
}
begin
// This method called with token string already containing '#' and current
// char is char after '#'
// Numeric part can either by whole number or hex number
Result := tkChar;
if SymbolToToken(fReader.Ch) = tkHex then
begin
// Hex number ('$' detected)
// store '$' and skip to next
UpdateTokenStr;
fReader.NextChar;
// now read hex digits
ParseHex;
end
else if IsDigit(fReader.Ch) then
// This is whole number: parse it
ParseWholeNumber
else
// Not valid character: error token
Result := tkError;
end;
function THilitePasLexer.ParseCommentFromStart: THilitePasToken;
{Begins parsing of a new comment or compiler directive.
@return Token telling whether this is a comment or compiler directive
(tkComment, tkCompilerDir).
}
begin
// Token string contains comment opening symbol and current char is that which
// follows opening symbol
// Record information about the comment
fCommentState.InComment := True;
fCommentState.CommentCloser := MatchingCommentCloser(fTokenStr);
// if char following opener is '$' we have compiler directive
// (but only if comment opener is '{' or '(*' )
if (fReader.Ch = cCompilerDirChar) and
IsCompilerDirOpener(fTokenStr) then
fCommentState.CommentType := tkCompilerDir
else
fCommentState.CommentType := tkComment;
// Parse body of comment
Result := ParseCommentInterior;
end;
function THilitePasLexer.ParseCommentInterior: THilitePasToken;
{Analyses body of comment after start or after resuming processing multi-line
comments.
@return Token telling whether this is a comment or compiler directive
(tkComment, tkCompilerDir).
}
var
Done: Boolean; // flag true when we have finished comment
begin
Assert(fCommentState.InComment,
ClassName + '.ParseCommentInterior: called when not in comment');
Assert(fCommentState.CommentType in [tkComment, tkCompilerDir],
ClassName + '.ParseCommentInterior: invalid comment type');
Assert(Length(fCommentState.CommentCloser) > 0,
ClassName + '.ParseCommentInterior: invalid comment closer');
Result := fCommentState.CommentType;
// Loop thru all comment, looking for closing comment symbol
Done := False;
while (fReader.Ch <> cEOF) and not Done do
begin
if fReader.Ch = fCommentState.CommentCloser[1] then
begin
// We have encountered 1st char of a comment "closer"
if Length(fCommentState.CommentCloser) = 1 then
begin
// Our closer is a single char: comment is closed
Done := True;
fCommentState.InComment := False;
if fCommentState.CommentCloser[1] = cEOL then
// closer is EOL: put it back to be read later
fReader.PutBackChar
else
// closer not EOL: add it to token string
UpdateTokenStr;
end
else
begin
// Our possible closer has two chars
// Record first char in token string
UpdateTokenStr;
// Peek ahead at next char
fReader.NextChar;
if fReader.Ch = fCommentState.CommentCloser[2] then
begin
// This is the expected closer: comment is closed
Done := True;
fCommentState.InComment := False;
UpdateTokenStr;
end
else
// False alarm: put back the char we peeked at
fReader.PutBackChar;
end;
end
else
begin
// Ordinary comment text
if fReader.Ch = cEOL then
begin
// EOL: put it back and stop parsing
// the comment stays open: we will continue processing after EOL handled
Done := True;
fReader.PutBackChar;
end
else
// Not EOL: add char to token string
UpdateTokenStr;
end;
fReader.NextChar;
end;
// If at EOF ensure that comment is closed
if fReader.Ch = cEOF then
fCommentState.InComment := False;
end;
function THilitePasLexer.ParseEOL: THilitePasToken;
{Analyses end of line from input and stores in token string.
@return End of line token (tkEOL).
}
begin
UpdateTokenStr(cEOL);
Result := tkEOL;
fReader.NextChar;
end;
function THilitePasLexer.ParseHex: THilitePasToken;
{Analyses a hexadecimal integer from input and stores in token string.
@return Token indicating hexadecimal value (tkHex).
}
begin
// Called with fTokenStr = '$' and fReader.Ch with char after '$'
// Build string of hex digits
while IsHexDigit(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
// Check that we ended in a valid way: error if not
if not IsSeparatorChar(fReader.Ch) then
Result := tkError
else
Result := tkHex;
end;
function THilitePasLexer.ParseIdent: THilitePasToken;
{Analyses an alphanumeric identifier from input and stores in token string.
Checks if identifier is keyword or directive.
@return Token representing identifier: tkKeyword, tkDirective or
tkIdentifier.
}
begin
Assert(IsValidIdentStartChar(fReader.Ch),
ClassName + '.ParseIdent: identifier starting character expected');
// Build identifier in token string
while IsValidIdentBodyChar(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
// Check if token is keyword or directive or is plain identifier
if IsKeyword(fTokenStr) then
Result := tkKeyword
else if IsDirective(fTokenStr) then
Result := tkDirective
else
Result := tkIdentifier;
end;
function THilitePasLexer.ParseNumber: THilitePasToken;
{Analyses a number from input and stores in token string. Number can be
integer or real.
@return Appropriate token for number (tkNumber or tkFloat).
}
var
TempCh: Char; // temporary storage for a character read from input
begin
Assert(IsDigit(fReader.Ch), ClassName + '.ParseNumber: digit expected');
// All numbers start with a whole number: read it
ParseWholeNumber; // leaves current char as one immediately after number
// Assume we have whole number and see if we can disprove it
Result := tkNumber;
if fReader.Ch = cDecimalPoint then
begin
// Char after whole number is a decimal point: this *may* indicate a float,
// but may not since there are other symbols that start with '.'
// Store the decimal point then read ahead to see what next char is
TempCh := fReader.Ch;
fReader.NextChar;
if IsCharInSet(fReader.Ch, [cDecimalPoint, cCloseParen]) then
begin
// decimal point was followed by '.' or ')' making valid two char symbols
// .. and .) => we put back the read character and get out, leaving first
// decimal point as current character and returning whole number
fReader.PutBackChar;
Exit;
end;
// Decimal point was valid: record in token string
UpdateTokenStr(TempCh);
// If we have digits after decimal point read them into token str
// Note: there may not necessarily be digits after '.' (e.g. 2. is a valid
// Delphi float)
if IsDigit(fReader.Ch) then
ParseWholeNumber;
Result := tkFloat;
end;
if IsExponentChar(fReader.Ch) then
begin
// Next char is an exponent (e or E) that is present in numbers in
// "scientific" notation. This can either follow whole number, follow
// decimal point or follow digits after decimal point. I.e. 2e4, 2.e3 and
// 2.0e4 are all valid, as is 2.0e-4 etc.
// Record exponent in token string
UpdateTokenStr;
// Read chars after exponent (first may be unary + or -)
fReader.NextChar;
if IsUnaryPlusMinusChar(fReader.Ch) then
begin
UpdateTokenStr;
fReader.NextChar;
end;
// Next comes whole number: get it
ParseWholeNumber;
Result := tkFloat;
end;
end;
function THilitePasLexer.ParseString: THilitePasToken;
{Analyses a string literal from input and stores in token string.
@return String token (tkString).
}
var
Done: Boolean; // flag true when done parsing string
begin
// Note: token string already contains opening quote - current char is first
// character of the string after the quote
Done := False;
// Loop thru characters until end of string found
while (fReader.Ch <> cEOF) and not Done do
begin
UpdateTokenStr;
if fReader.Ch = cStringDelim then
begin
// Could be closing quote or pair of quotes used to embed quote in string
// we need to read ahead to check this
fReader.NextChar;
if fReader.Ch = cStringDelim then
// this is a pair of quotes ('') => embeds quote in string => not done
UpdateTokenStr
else
begin
// not a pair of quotes => string completed
Done := True;
// put back char we read ahead
fReader.PutBackChar;
end;
end;
fReader.NextChar;
end;
Result := tkString;
end;
function THilitePasLexer.ParseSymbol: THilitePasToken;
{Determines whether the current symbol character on input represents a symbol
or introduces some other syntactic entity (i.e. comment, string, character
literal or a hex number). Analyses the input accordingly and stores the whole
token in the token string.
@return Token describing entity parsed.
}
var
AToken: THilitePasToken; // token represented by the symbol
begin
Assert(IsSymbolChar(fReader.Ch), ClassName + '.ParseSymbol: symbol expected');
// Add character that starts symbol to token string and read next char
UpdateTokenStr;
fReader.NextChar;
// Check if char read is second char of a two char symbol and process if so
if IsSymbolChar(fReader.Ch) then
begin
if IsDoubleSym(fTokenStr + fReader.Ch) then
begin
// this is 2 char symbol: store in token string and skip over
UpdateTokenStr;
fReader.NextChar;
end
end;
// Token string now holds symbol: check which kind of token it represents
// and parse accordingly
AToken := SymbolToToken(TokenStr);
case AToken of
tkComment:
Result := ParseCommentFromStart;
tkString:
Result := ParseString;
tkChar:
Result := ParseChar;
tkHex:
Result := ParseHex;
else
Result := AToken; // no special processing: return token
end;
end;
function THilitePasLexer.ParseUnknown: THilitePasToken;
{Analyses an unrecognised entity from input and adds it to token string.
@return Error token (tkError).
}
begin
Result := tkError;
UpdateTokenStr;
fReader.NextChar;
end;
function THilitePasLexer.ParseWhiteSpace: THilitePasToken;
{Analyses a sequence of white space from input and appends space for each
white space character read to token string.
@return White space token (tkWhiteSpace).
}
begin
Assert(IsWhiteSpaceChar(fReader.Ch),
ClassName + '.ParseWhiteSpace: current char not white space');
while IsWhiteSpaceChar(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
Result := tkWhiteSpace;
end;
function THilitePasLexer.ParseWholeNumber: THilitePasToken;
{Analyses a whole number from input and appends to token string.
@return Whole number token (tkNumber).
}
begin
Assert(IsDigit(fReader.Ch),
ClassName + '.ParseWholeNumber: current char not a digit');
while IsDigit(fReader.Ch) do
begin
UpdateTokenStr;
fReader.NextChar;
end;
Result := tkNumber;
end;
procedure THilitePasLexer.UpdateTokenStr;
{Appends current character in input to token string. Ignores EOF.
}
begin
UpdateTokenStr(fReader.Ch);
end;
procedure THilitePasLexer.UpdateTokenStr(const Ch: Char);
{Appends a character to token string. Ignores EOF.
@param Ch [in] Character to append.
}
begin
if Ch <> cEOF then
fTokenStr := fTokenStr + Ch;
end;
initialization
finalization
pvtKeywords.Free;
pvtDirectives.Free;
pvtDoubleSyms.Free;
pvtSymMap.Free;
end.
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.