Menu

[r3204]: / branches / experimental / Src / TrunkSrc / UEncodings.pas  Maximize  Restore  History

Download this file

733 lines (677 with data), 24.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
{
* This Source Code Form is subject to the terms of the Mozilla Public License,
* v. 2.0. If a copy of the MPL was not distributed with this file, You can
* obtain one at http://mozilla.org/MPL/2.0/
*
* Copyright (C) 2009-2012, Peter Johnson (www.delphidabbler.com).
*
* $Rev$
* $Date$
*
* Provides support for certain character encodings used by the program.
}
unit UEncodings;
interface
uses
// Delphi
SysUtils, Classes, Types, Generics.Defaults, Generics.Collections,
// Project
UBaseObjects, UIStringList;
type
/// <summary>
/// Enumeration of identifiers of supported encodings.
/// </summary>
/// <remarks>
/// Always ensure that etSysDefault is the last item in the enumeration. This
/// is so that other encodings will always be found before it, in case one of
/// the other encodings has the same code page as the system default. The
/// principle is, if the caller requests an encoding explicitly then they
/// shouldn't be handed the default TEncoding. If we allow this it makes
/// testing Windows-1252 requests hard on systems where the default code page
/// is also Windows-1252.
/// </remarks>
TEncodingType = (
etASCII, // ASCII
etISO88591, // ISO-8859-1
etUTF8, // UTF-8
etUnicode, // Unicode (UTF-16)
etUTF16BE, // UTF-16BE
etUTF16LE, // UTF-16LE
etWindows1252, // Windows-1252
etSysDefault // default ANSI encoding
);
type
/// <summary>
/// Encapsulation of binary data containing encoded text.
/// </summary>
TEncodedData = record
strict private
var
fData: TBytes;
fEncodingType: TEncodingType;
public
/// <summary>
/// Constructs encoded data from array of bytes and specified encoding
/// type.
/// <param name="AData">TBytes [in] Source of data.</param>
/// <param name="AEncodingType">TEncodingType [in] Type of encoding that
/// was used to create byte array.</param>
/// </summary>
constructor Create(const AData: TBytes; const AEncodingType: TEncodingType);
overload;
/// <summary>
/// Constructs encoded data from a Unicode string and specified encoding
/// type.
/// <param name="AStr">string [in] Source of data.</param>
/// <param name="AEncodingType">TEncodingType [in] Type of encoding to be
/// used to encode string.</param>
/// </summary>
constructor Create(const AStr: string; const AEncodingType: TEncodingType);
overload;
/// <summary>
/// Constructs encoded data from content of a stream and specified
/// encoding type.
/// </summary>
/// <param name="AStream">TStream [in] Stream containing data.</param>
/// <param name="AEncodingType">TEncodingType [in] Type of encoding used
/// for stream data.</param>
/// <param name="AllStream">Boolean [in] Flag that indicates if whole
/// stream is to be copied. If True whole stream is copied. If False stream
/// is copied from current position to end.</param>
constructor Create(const AStream: TStream;
const AEncodingType: TEncodingType; const AllStream: Boolean = False);
overload;
/// <summary>
/// Constructs a copy of another TEncodedData record.
/// </summary>
/// <param name="AData">TEncodedData [in] Record to be copied.</param>
constructor Create(const AData: TEncodedData); overload;
/// <summary>
/// Returns data as a string. Encoding type is used to decode data into
/// string.
/// </summary>
function ToString: string;
/// <summary>
/// Binary data.
/// </summary>
property Data: TBytes read fData;
/// <summary>
/// Type of encoding that was used to encode binary data.</summary>
property EncodingType: TEncodingType read fEncodingType;
end;
type
/// <summary>
/// Static class that maintains a list of named character sets and enables
/// the associated encoding object to be created. For single byte character
/// access is provided to the matching code page, and vice versa. Also
/// exposes useful constants.
/// </summary>
TEncodingHelper = class(TNoConstructObject)
strict private
type
/// <summary>Type of closure used to create a TEncoding instance.
/// </summary>
TEncodingFactoryFn = reference to function: TEncoding;
type
/// <summary>
/// Record storing required information about an encoding.
/// </summary>
TEncodingInfo = record
/// <summary>Character set of encoding.</summary>
/// <remarks>Set to '' for default ANSI encoding.</remarks>
CharSet: string;
/// <summary>Flag true if ANSI encoding.</summary>
/// <remarks>When False CodePage field has no meaning.</remarks>
IsAnsi: Boolean;
/// <summary>Code page of encoding.</summary>
/// <remarks>Should be ignored when IsAnsi is False.</remarks>
CodePage: Integer;
/// <summary>Closure used to create TEncoding instances for the
/// encoding.</summary>
FactoryFn: TEncodingFactoryFn;
end;
class var
/// <summary>Map of encoding types onto related information records.
/// </summary>
fMap: array[TEncodingType] of TEncodingInfo;
strict protected
/// <summary>
/// Checks if a character set is wanted in the current class.
/// </summary>
/// <param name="CharSet">Name of a character set to be tested</param>
/// <returns>Always true</returns>
/// <remarks>
/// <para>A wanted character set is a sub-set of those registered in the
/// class constructor. Descendant classes may need to exclude some
/// characters sets from those they support. This is done by returning
/// False from this method when called with character set names that are
/// not supported.</para>
/// <para>All known character sets are wanted in this class, so True is
/// always returned.</para>
/// </remarks>
class function IsWantedCharSet(const CharSet: string): Boolean; virtual;
/// <summary>
/// Looks up a specified character set in fMap.
/// </summary>
/// <param name="CharSet">string [in] Character set.</param>
/// <param name="EncodingType">TEncodingType [out] Set to encoding type
/// of character set it is known. Undefined otherwise.</param>
/// <returns>True if character set is known, False if not.</returns>
class function LookupCharSet(const CharSet: string;
out EncodingType: TEncodingType): Boolean;
/// <summary>
/// Looks up a specified character set in fMap.
/// </summary>
/// <param name="CharSet">string [in] Character set.</param>
/// <param name="EncodingType">TEncodingType [out] Set to encoding type
/// of character set if it is both known and wanted. Undefined otherwise.
/// </param>
/// <returns>True if character set is both known and wanted, False if
/// not.</returns>
class function LookupValidCharSet(const CharSet: string;
out EncodingType: TEncodingType): Boolean;
/// <summary>
/// Looks up a specified code page in fMap.
/// </summary>
/// <param name="CodePage">Integer [in] Code page.</param>
/// <param name="EncodingType">TEncodingType [out] Set to encoding type
/// of code page if it is known. Undefined otherwise.</param>
/// <returns>True if code page is known, False if not.</returns>
class function LookupCodePage(const CodePage: Integer;
out EncodingType: TEncodingType): Boolean;
/// <summary>
/// Looks up a specified code page in fMap.
/// </summary>
/// <param name="CodePage">Integer [in] Code page.</param>
/// <param name="EncodingType">TEncodingType [out] Set to encoding type
/// of code page if it is both known and wanted. Undefined otherwise.
/// </param>
/// <returns>True if code page is both known and wanted, False if not.
/// </returns>
class function LookupValidCodePage(const CodePage: Integer;
out EncodingType: TEncodingType): Boolean;
/// <summary>
/// Gets the code page of a character set, if any.
/// </summary>
/// <param name="CharSet">string [in] Character set.</param>
/// <param name="CodePage">Integer [out] Set to code page of character set
/// if it is know, wanted and is an ANSI character set. Undefined
/// otherwise.</param>
/// <returns>True if character set known, wanted and is an ANSI character
/// set. False if not.
/// </returns>
class function GetValidCodePage(const CharSet: string;
out CodePage: Integer): Boolean;
public
// Constants storing names of recognised character sets
const
/// <summary>ASCII character set name.</summary>
ASCIICharSetName = 'ASCII';
/// <summary>ISO-8859-1 character set name.</summary>
ISO88591CharSetName = 'ISO-8859-1';
/// <summary>UTF-8 character set name.</summary>
UTF8CharSetName = 'UTF-8';
/// <summary>UTF-16 character set name.</summary>
UTF16CharSetName = 'UTF-16';
/// <summary>UTF-16 big endian character set name.</summary>
UTF16BECharSetName = 'UTF-16BE';
/// <summary>UTF-16 little endian character set name.</summary>
UTF16LECharSetName = 'UTF-16LE';
/// <summary>Windows 1252 character set name.</summary>
Windows1252CharSetName = 'Windows-1252';
// Constants storing code pages of recognised single byte character sets
/// <summary>ASCII code page.</summary>
ASCIICodePage = 20127;
/// <summary>ISO-8859-1 code page.</summary>
ISO88591CodePage = 28591;
/// <summary>UTF-8 code page.</summary>
UTF8CodePage = 65001;
/// <summary>Windows-1252 code page.</summary>
Windows1252CodePage = 1252;
public
/// <summary>Initialises encoding maps.</summary>
class constructor Create;
/// <summary>
/// Returns name of character set to use by default when empty string is
/// specified as a character set name.
/// </summary>
/// <returns>string - Required default character set name.</returns>
/// <remarks>
/// Descendant classes can override to change the default character set.
/// </remarks>
class function DefaultCharSet: string; virtual;
/// <summary>
/// Frees an given encoding providing it is not a standard encoding.
/// </summary>
/// <param name="Encoding">TEncoding [in] Encoding to be freed if
/// necessary.</param>
/// <remarks>Use this method to free any encoding created by this class,
/// or an encoding created elsewhere to avoid the overhead of testing
/// for a standard encoding before freeing.</remarks>
class procedure FreeEncoding(const Encoding: TEncoding);
/// <summary>
/// Creates an encoding instance for a specified encoding type.
/// </summary>
/// <param name="EncType">TEncodingType [in] Encoding type.</param>
/// <returns>New encoding instance for the type.</returns>
/// <remarks>
/// Caller is responsible for freeing the encoding if it is not a standard
/// encoding.
/// </remarks>
class function GetEncoding(const EncType: TEncodingType): TEncoding;
overload;
/// <summary>
/// Creates an encoding instance that is associated with a named character
/// set.
/// </summary>
/// <param name="CharSet">string [in] Name of character set.</param>
/// <returns>New encoding instance for the character set.</returns>
/// <remarks>
/// <para>Caller is responsible for freeing the encoding if it is not a
/// standard encoding.</para>
/// <para>Exception raised if character set not supported.</para>
/// </remarks>
class function GetEncoding(const CharSet: string): TEncoding; overload;
/// <summary>
/// Gets an encoding that is associated with a code page.
/// </summary>
/// <param name="CodePage">Integer [in] Code page.</param>
/// <returns>New encoding instance for the code page.</returns>
/// <remarks>
/// <para>Caller is responsible for freeing the encoding if it is not a
/// standard encoding.</para>
/// <para>Exception raised if code page not supported.</para>
/// </remarks>
class function GetEncoding(const CodePage: Integer): TEncoding; overload;
/// <summary>
/// Gets the encoding type that is associated with a named character set.
/// </summary>
/// <param name="CharSet">string [in] Name of character set.</param>
/// <returns>TEncodingType - Required encoding type.</returns>
/// <remarks>Exception raised if character set not supported.</remarks>
class function GetEncodingType(const CharSet: string): TEncodingType;
/// <summary>
/// Returns an array names of supported character sets.
/// </summary>
/// <returns>String array of required character set names.</returns>
class function CharSets: TStringDynArray;
end;
/// Ansi string using the ASCII code page.
ASCIIString = type AnsiString(20127);
/// <summary>
/// Converts a Unicode string into an ANSI string using the ASCII code page.
/// </summary>
function StringToASCIIString(const S: string): ASCIIString;
/// <summary>
/// Converts an array of bytes into an ANSI string using ASCII code page.
/// </summary>
/// <remarks>
/// Byte array is assumed to contain only valid ASCII characters. It is copied
/// unprocessed.
/// </remarks>
function BytesToASCIIString(const Bytes: TBytes): ASCIIString;
/// <summary>
/// Checks if an encoding supports all the characters in a given string.
/// Returns True if all characters of the string convert correctly or False if
/// not.
/// </summary>
function EncodingSupportsString(const S: UnicodeString;
const Encoding: SysUtils.TEncoding): Boolean;
/// <summary>
/// Checks if a code page supports all the characters in a given string.
/// Returns True if all characters of the string convert correctly or False if
/// not.
/// </summary>
function CodePageSupportsString(const S: UnicodeString;
const CodePage: Integer): Boolean;
/// <summary>
/// Converts a Unicode wide character into one or more equivalent ANSI
/// characters from a specified code page.
/// </summary>
/// <param name="Source">WideChar [in] Unicode character for conversion.
/// </param>
/// <param name="CodePage">Integer [in] Code page to use for conversion.
/// </param>
/// <param name="Dest">TArray&lt;AnsiChar&gt; [out] Array of ANSI characters
/// that result from conversion.</param>
/// <returns>True if conversion was successful or False if conversion failed.
/// </returns>
/// <remarks>
/// <para>Dest has to be an array of characters because the ANSI character set
/// may be multi-byte (e.g. UTF-8).</para>
/// <para>A failure result means that there is no equivalent of the Unicode
/// character in the specified ANSI code page.</para>
/// </remarks>
function WideCharToChar(const Source: WideChar; const CodePage: Integer;
out Dest: TArray<AnsiChar>): Boolean;
implementation
uses
// Delphi
Windows,
// Project
ULocales, UStrUtils;
/// <summary>
/// Converts as array of bytes to an ANSI raw byte string.
/// </summary>
/// <param name="Bytes">TBytes [in] Byte array to convert.</param>
/// <param name="CP">Integer [in] Code page of returned ANSI string.</param>
/// <returns>ANSI string with requested code page.</returns>
/// <remarks>
/// <para>Caller must ensure that the byte array has the correct format for
/// the requested code page.</para>
/// <para>Based on Stack Overflow posting at http://bit.ly/bAvtGd.</para>
/// <para>Any terminating 0 byte included in Bytes is excluded from the
/// result because Delphi adds its own terminal #0 character to ANSI strings.
/// </para>
/// </remarks>
function BytesToAnsiString(const Bytes: TBytes; const CP: Word): RawByteString;
begin
SetLength(Result, Length(Bytes));
if Length(Bytes) > 0 then
begin
Move(Bytes[0], Result[1], Length(Bytes));
if Result[Length(Result)] = #0 then
SetLength(Result, Length(Result) - 1);
SetCodePage(Result, CP, False);
end;
end;
function StringToASCIIString(const S: string): ASCIIString;
begin
Result := BytesToAnsiString(
TEncoding.ASCII.GetBytes(S), TEncodingHelper.ASCIICodePage
);
end;
function BytesToASCIIString(const Bytes: TBytes): ASCIIString;
begin
Result := BytesToAnsiString(Bytes, TEncodingHelper.ASCIICodePage);
end;
function EncodingSupportsString(const S: UnicodeString;
const Encoding: SysUtils.TEncoding): Boolean;
var
ConvertedStr: UnicodeString; // string converted using Encoding
begin
// Convert S to bytes and back to unicode string using Encoding
ConvertedStr := Encoding.GetString(Encoding.GetBytes(S));
// If text is valid for given encoding, text and converted text must be same
Result := S = ConvertedStr;
end;
function CodePageSupportsString(const S: UnicodeString;
const CodePage: Integer): Boolean;
var
Encoding: TEncoding;
begin
Encoding := TMBCSEncoding.Create(CodePage);
try
Result := EncodingSupportsString(S, Encoding);
finally
TEncodingHelper.FreeEncoding(Encoding);
end;
end;
function WideCharToChar(const Source: WideChar; const CodePage: Integer;
out Dest: TArray<AnsiChar>): Boolean;
var
UsedDefChar: BOOL;
BufSize: Integer;
begin
BufSize := WideCharToMultiByte(
CodePage, 0, @Source, 1, @Dest[0], 0, nil, nil
);
SetLength(Dest, BufSize + 1);
if WideCharToMultiByte(
CodePage, 0, @Source, 1, @Dest[0], Length(Dest), nil, @UsedDefChar
) = 0 then
RaiseLastOSError;
SetLength(Dest, Length(Dest) - 1);
Result := not UsedDefChar;
end;
{ TEncodingHelper }
resourcestring
// Error messages
sBadCharSet = 'Character set %s not supported';
sBadCodePage = 'Code page %d not supported';
class function TEncodingHelper.CharSets: TStringDynArray;
var
I: Integer;
EncInfo: TEncodingInfo;
begin
SetLength(Result, Length(fMap)); // overestimate result size
I := 0;
for EncInfo in fMap do
begin
if (EncInfo.CharSet <> '') and IsWantedCharSet(EncInfo.CharSet) then
begin
Result[I] := EncInfo.CharSet;
Inc(I);
end;
end;
SetLength(Result, I);
end;
class constructor TEncodingHelper.Create;
// Returns a closure that can create a multibyte encoding for a given code
// page.
function MBCSFactoryFn(CodePage: Integer): TEncodingFactoryFn;
begin
Result := function: TEncoding
begin
Result := TMBCSEncoding.Create(CodePage);
end;
end;
var
// References to various encoding factory functions
DefaultFactoryFn: TEncodingFactoryFn;
ASCIIFactoryFn: TEncodingFactoryFn;
UTF8FactoryFn: TEncodingFactoryFn;
UTF16FactoryFn: TEncodingFactoryFn;
UTF16BEFactoryFn: TEncodingFactoryFn;
begin
// Set references to appropriate encoding factory functions
DefaultFactoryFn :=
function: TEncoding begin Result := TEncoding.Default; end;
ASCIIFactoryFn :=
function: TEncoding begin Result := TEncoding.ASCII; end;
UTF8FactoryFn :=
function: TEncoding begin Result := TEncoding.UTF8; end;
UTF16FactoryFn :=
function: TEncoding begin Result := TEncoding.Unicode; end;
UTF16BEFactoryFn :=
function: TEncoding begin Result := TEncoding.BigEndianUnicode; end;
// Populate map for all encodings
with fMap[etSysDefault] do
begin
CharSet := '';
IsAnsi := True;
CodePage := ULocales.DefaultAnsiCodePage;
FactoryFn := DefaultFactoryFn;
end;
with fMap[etASCII] do
begin
CharSet := ASCIICharSetName;
IsAnsi := True;
CodePage := ASCIICodePage;
FactoryFn := ASCIIFactoryFn;
end;
with fMap[etISO88591] do
begin
CharSet := ISO88591CharSetName;
IsAnsi := True;
CodePage := ISO88591CodePage;
FactoryFn := MBCSFactoryFn(ISO88591CodePage);
end;
with fMap[etUTF8] do
begin
CharSet := UTF8CharSetName;
IsAnsi := True;
CodePage := UTF8CodePage;
FactoryFn := UTF8FactoryFn;
end;
with fMap[etUnicode] do
begin
CharSet := UTF16CharSetName;
IsAnsi := False;
CodePage := 0;
FactoryFn := UTF16FactoryFn;
end;
with fMap[etUTF16BE] do
begin
CharSet := UTF16BECharSetName;
IsAnsi := False;
CodePage := 0;
FactoryFn := UTF16BEFactoryFn;
end;
with fMap[etUTF16LE] do
begin
CharSet := UTF16LECharSetName;
IsAnsi := False;
CodePage := 0;
FactoryFn := UTF16FactoryFn;
end;
with fMap[etWindows1252] do
begin
CharSet := Windows1252CharSetName;
IsAnsi := True;
CodePage := Windows1252CodePage;
FactoryFn := MBCSFactoryFn(Windows1252CodePage);
end;
end;
class function TEncodingHelper.DefaultCharSet: string;
begin
Result := UTF8CharSetName;
end;
class procedure TEncodingHelper.FreeEncoding(const Encoding: TEncoding);
begin
if not TEncoding.IsStandardEncoding(Encoding) then
Encoding.Free;
end;
class function TEncodingHelper.GetEncoding(const CharSet: string): TEncoding;
var
EncType: TEncodingType;
begin
if not LookupValidCharSet(CharSet, EncType) then
raise ENotSupportedException.CreateFmt(sBadCharSet, [CharSet]);
Result := GetEncoding(EncType);
end;
class function TEncodingHelper.GetEncoding(const CodePage: Integer): TEncoding;
var
EncType: TEncodingType;
begin
if not LookupValidCodePage(CodePage, EncType) then
raise ENotSupportedException.CreateFmt(sBadCodePage, [CodePage]);
Result := GetEncoding(EncType);
end;
class function TEncodingHelper.GetEncodingType(const CharSet: string):
TEncodingType;
begin
if not LookupValidCharSet(CharSet, Result) then
raise ENotSupportedException.CreateFmt(sBadCharSet, [CharSet]);
end;
class function TEncodingHelper.GetEncoding(
const EncType: TEncodingType): TEncoding;
begin
Result := fMap[EncType].FactoryFn();
end;
class function TEncodingHelper.GetValidCodePage(const CharSet: string;
out CodePage: Integer): Boolean;
var
EncType: TEncodingType;
begin
if not LookupValidCharSet(CharSet, EncType) then
Exit(False);
if not fMap[EncType].IsAnsi then
Exit(False);
CodePage := fMap[EncType].CodePage;
Result := True;
end;
class function TEncodingHelper.IsWantedCharSet(const CharSet: string): Boolean;
begin
Result := True;
end;
class function TEncodingHelper.LookupCharSet(const CharSet: string;
out EncodingType: TEncodingType): Boolean;
var
EncType: TEncodingType;
begin
if CharSet = '' then // we never find default char set
Exit(False);
for EncType := Low(TEncodingType) to High(TEncodingType) do
begin
if StrSameText(CharSet, fMap[EncType].CharSet) then
begin
EncodingType := EncType;
Exit(True);
end;
end;
Result := False;
end;
class function TEncodingHelper.LookupCodePage(const CodePage: Integer;
out EncodingType: TEncodingType): Boolean;
var
EncType: TEncodingType;
begin
for EncType := Low(TEncodingType) to High(TEncodingType) do
begin
if fMap[EncType].IsAnsi and (fMap[EncType].CodePage = CodePage) then
begin
EncodingType := EncType;
Exit(True);
end;
end;
Result := False;
end;
class function TEncodingHelper.LookupValidCharSet(const CharSet: string;
out EncodingType: TEncodingType): Boolean;
begin
Result := LookupCharSet(CharSet, EncodingType) and IsWantedCharSet(CharSet);
end;
class function TEncodingHelper.LookupValidCodePage(const CodePage: Integer;
out EncodingType: TEncodingType): Boolean;
begin
Result := LookupCodePage(CodePage, EncodingType)
and IsWantedCharSet(fMap[EncodingType].CharSet);
end;
{ TEncodedData }
constructor TEncodedData.Create(const AData: TBytes;
const AEncodingType: TEncodingType);
begin
fData := Copy(AData);
fEncodingType := AEncodingType;
end;
constructor TEncodedData.Create(const AStr: string;
const AEncodingType: TEncodingType);
var
Encoding: TEncoding;
begin
Encoding := TEncodingHelper.GetEncoding(AEncodingType);
try
fData := Encoding.GetBytes(AStr);
finally
TEncodingHelper.FreeEncoding(Encoding);
end;
fEncodingType := AEncodingType;
end;
constructor TEncodedData.Create(const AStream: TStream;
const AEncodingType: TEncodingType; const AllStream: Boolean);
begin
if AllStream then
AStream.Position := 0;
SetLength(fData, AStream.Size - AStream.Position);
if Length(fData) > 0 then
AStream.ReadBuffer(Pointer(fData)^, Length(fData));
fEncodingType := AEncodingType;
end;
constructor TEncodedData.Create(const AData: TEncodedData);
begin
fData := Copy(AData.fData);
fEncodingType := AData.fEncodingType;
end;
function TEncodedData.ToString: string;
var
Encoding: TEncoding;
begin
Encoding := TEncodingHelper.GetEncoding(EncodingType);
try
Result := Encoding.GetString(Data);
finally
TEncodingHelper.FreeEncoding(Encoding);
end;
end;
end.
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.