{-----------------------------------------------------------------------------
The contents of this file are subject to the Mozilla Public License
Version 1.1 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL/MPL-1.1.html
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either expressed or implied. See the License for
the specific language governing rights and limitations under the License.
The Original Code is: JvStrToHtml.PAS, released on 2001-02-28.
The Initial Developer of the Original Code is Sébastien Buysse [sbuysse att buypin dott com]
Portions created by Sébastien Buysse are Copyright (C) 2001 Sébastien Buysse.
All Rights Reserved.
Contributor(s): Michael Beck [mbeck att bigfoot dott com].
Andreas Hausladen [Andreas dott Hausladen att gmx dott de]
You may retrieve the latest version of this file at the Project JEDI's JVCL home page,
located at http://jvcl.delphi-jedi.org
Known Issues:
-----------------------------------------------------------------------------}
// $Id$
unit JvStrToHtml;
{$I jvcl.inc}
interface
uses
{$IFDEF UNITVERSIONING}
JclUnitVersioning,
{$ENDIF UNITVERSIONING}
SysUtils, Classes,
JvComponentBase;
type
{$IFDEF RTL230_UP}
[ComponentPlatformsAttribute(pidWin32 or pidWin64 or pidOSX32)]
{$ENDIF RTL230_UP}
TJvStrToHtml = class(TJvComponent)
private
FHtml: string;
FValue: string;
procedure SetHtml(const Value: string);
procedure SetValue(const Value: string);
public
constructor Create(AOwner: TComponent); override;
function TextToHtml(const Text: string): string;
function HtmlToText(const Text: string): string;
published
property Text: string read FValue write SetValue;
property Html: string read FHtml write SetHtml;
end;
function StringToHtml(const Value: string): string;
function HtmlToString(const Value: string): string;
function CharToHtml(Ch: Char): string;
{$IFDEF UNITVERSIONING}
const
UnitVersioning: TUnitVersionInfo = (
RCSfile: '$URL$';
Revision: '$Revision$';
Date: '$Date$';
LogPath: 'JVCL\run'
);
{$ENDIF UNITVERSIONING}
implementation
{$IFNDEF UNICODE}
uses
Windows;
{$ENDIF ~UNICODE}
type
TJvHtmlCodeRec = record
Ch: Word;
Html: string;
end;
const
{ References:
http://www.w3.org/TR/REC-html40/charset.html#h-5.3
http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.2.1
http://www.w3.org/TR/REC-html40/sgml/entities.html#h-24.4.1
}
Conversions: array [0..240] of TJvHtmlCodeRec = (
(Ch: 34; Html: '"'),
(Ch: 38; Html: '&'),
(Ch: 39; Html: '''),
(Ch: 60; Html: '<'),
(Ch: 62; Html: '>'),
(Ch: 160; Html: ' '),
(Ch: 161; Html: '¡'),
(Ch: 162; Html: '¢'),
(Ch: 163; Html: '£'),
(Ch: 164; Html: '¤'),
(Ch: 165; Html: '¥'),
(Ch: 166; Html: '¦'),
(Ch: 167; Html: '§'),
(Ch: 168; Html: '¨'),
(Ch: 169; Html: '©'),
(Ch: 170; Html: 'ª'),
(Ch: 171; Html: '«'),
(Ch: 172; Html: '¬'),
(Ch: 173; Html: '­'),
(Ch: 174; Html: '®'),
(Ch: 175; Html: '¯'),
(Ch: 176; Html: '°'),
(Ch: 177; Html: '±'),
(Ch: 178; Html: '²'),
(Ch: 179; Html: '³'),
(Ch: 180; Html: '´'),
(Ch: 181; Html: 'µ'),
(Ch: 182; Html: '¶'),
(Ch: 183; Html: '·'),
(Ch: 184; Html: '¸'),
(Ch: 185; Html: '¹'),
(Ch: 186; Html: 'º'),
(Ch: 187; Html: '»'),
(Ch: 188; Html: '¼'),
(Ch: 189; Html: '½'),
(Ch: 190; Html: '¾'),
(Ch: 191; Html: '¿'),
(Ch: 192; Html: 'À'),
(Ch: 193; Html: 'Á'),
(Ch: 194; Html: 'Â'),
(Ch: 195; Html: 'Ã'),
(Ch: 196; Html: 'Ä'),
(Ch: 197; Html: 'Å'),
(Ch: 198; Html: 'Æ'),
(Ch: 199; Html: 'Ç'),
(Ch: 200; Html: 'È'),
(Ch: 201; Html: 'É'),
(Ch: 202; Html: 'Ê'),
(Ch: 203; Html: 'Ë'),
(Ch: 204; Html: 'Ì'),
(Ch: 205; Html: 'Í'),
(Ch: 206; Html: 'Î'),
(Ch: 207; Html: 'Ï'),
(Ch: 208; Html: 'Ð'),
(Ch: 209; Html: 'Ñ'),
(Ch: 210; Html: 'Ò'),
(Ch: 211; Html: 'Ó'),
(Ch: 212; Html: 'Ô'),
(Ch: 213; Html: 'Õ'),
(Ch: 214; Html: 'Ö'),
(Ch: 215; Html: '×'),
(Ch: 216; Html: 'Ø'),
(Ch: 217; Html: 'Ù'),
(Ch: 218; Html: 'Ú'),
(Ch: 219; Html: 'Û'),
(Ch: 220; Html: 'Ü'),
(Ch: 221; Html: 'Ý'),
(Ch: 222; Html: 'Þ'),
(Ch: 223; Html: 'ß'),
(Ch: 224; Html: 'à'),
(Ch: 225; Html: 'á'),
(Ch: 226; Html: 'â'),
(Ch: 227; Html: 'ã'),
(Ch: 228; Html: 'ä'),
(Ch: 229; Html: 'å'),
(Ch: 230; Html: 'æ'),
(Ch: 231; Html: 'ç'),
(Ch: 232; Html: 'è'),
(Ch: 233; Html: 'é'),
(Ch: 234; Html: 'ê'),
(Ch: 235; Html: 'ë'),
(Ch: 236; Html: 'ì'),
(Ch: 237; Html: 'í'),
(Ch: 238; Html: 'î'),
(Ch: 239; Html: 'ï'),
(Ch: 240; Html: 'ð'),
(Ch: 241; Html: 'ñ'),
(Ch: 242; Html: 'ò'),
(Ch: 243; Html: 'ó'),
(Ch: 244; Html: 'ô'),
(Ch: 245; Html: 'õ'),
(Ch: 246; Html: 'ö'),
(Ch: 247; Html: '÷'),
(Ch: 248; Html: 'ø'),
(Ch: 249; Html: 'ù'),
(Ch: 250; Html: 'ú'),
(Ch: 251; Html: 'û'),
(Ch: 252; Html: 'ü'),
(Ch: 253; Html: 'ý'),
(Ch: 254; Html: 'þ'),
(Ch: 255; Html: 'ÿ'),
(Ch: 338; Html: 'Œ'),
(Ch: 339; Html: 'œ'),
(Ch: 352; Html: 'Š'),
(Ch: 353; Html: 'š'),
(Ch: 376; Html: 'Ÿ'),
(Ch: 402; Html: 'ƒ'),
(Ch: 710; Html: 'ˆ'),
(Ch: 732; Html: '˜'),
(Ch: 913; Html: 'Α'),
(Ch: 914; Html: 'Β'),
(Ch: 915; Html: 'Γ'),
(Ch: 916; Html: 'Δ'),
(Ch: 917; Html: 'Ε'),
(Ch: 918; Html: 'Ζ'),
(Ch: 919; Html: 'Η'),
(Ch: 920; Html: 'Θ'),
(Ch: 921; Html: 'Ι'),
(Ch: 922; Html: 'Κ'),
(Ch: 923; Html: 'Λ'),
(Ch: 924; Html: 'Μ'),
(Ch: 925; Html: 'Ν'),
(Ch: 926; Html: 'Ξ'),
(Ch: 927; Html: 'Ο'),
(Ch: 928; Html: 'Π'),
(Ch: 929; Html: 'Ρ'),
(Ch: 931; Html: 'Σ'),
(Ch: 932; Html: 'Τ'),
(Ch: 933; Html: 'Υ'),
(Ch: 934; Html: 'Φ'),
(Ch: 935; Html: 'Χ'),
(Ch: 936; Html: 'Ψ'),
(Ch: 937; Html: 'Ω'),
(Ch: 945; Html: 'α'),
(Ch: 946; Html: 'β'),
(Ch: 947; Html: 'γ'),
(Ch: 948; Html: 'δ'),
(Ch: 949; Html: 'ε'),
(Ch: 950; Html: 'ζ'),
(Ch: 951; Html: 'η'),
(Ch: 952; Html: 'θ'),
(Ch: 953; Html: 'ι'),
(Ch: 954; Html: 'κ'),
(Ch: 955; Html: 'λ'),
(Ch: 956; Html: 'μ'),
(Ch: 957; Html: 'ν'),
(Ch: 958; Html: 'ξ'),
(Ch: 959; Html: 'ο'),
(Ch: 960; Html: 'π'),
(Ch: 961; Html: 'ρ'),
(Ch: 962; Html: 'ς'),
(Ch: 963; Html: 'σ'),
(Ch: 964; Html: 'τ'),
(Ch: 965; Html: 'υ'),
(Ch: 966; Html: 'φ'),
(Ch: 967; Html: 'χ'),
(Ch: 968; Html: 'ψ'),
(Ch: 969; Html: 'ω'),
(Ch: 977; Html: 'ϑ'),
(Ch: 978; Html: 'ϒ'),
(Ch: 982; Html: 'ϖ'),
(Ch: 8194; Html: ' '),
(Ch: 8195; Html: ' '),
(Ch: 8201; Html: ' '),
(Ch: 8204; Html: '‌'),
(Ch: 8205; Html: '‍'),
(Ch: 8206; Html: '‎'),
(Ch: 8207; Html: '‏'),
(Ch: 8211; Html: '–'),
(Ch: 8212; Html: '—'),
(Ch: 8216; Html: '‘'),
(Ch: 8217; Html: '’'),
(Ch: 8218; Html: '‚'),
(Ch: 8220; Html: '“'),
(Ch: 8221; Html: '”'),
(Ch: 8222; Html: '„'),
(Ch: 8224; Html: '†'),
(Ch: 8225; Html: '‡'),
(Ch: 8226; Html: '•'),
(Ch: 8230; Html: '…'),
(Ch: 8240; Html: '‰'),
(Ch: 8242; Html: '′'),
(Ch: 8243; Html: '″'),
(Ch: 8249; Html: '‹'),
(Ch: 8250; Html: '›'),
(Ch: 8254; Html: '‾'),
(Ch: 8364; Html: '€'),
(Ch: 8482; Html: '™'),
(Ch: 8592; Html: '←'),
(Ch: 8593; Html: '↑'),
(Ch: 8594; Html: '→'),
(Ch: 8595; Html: '↓'),
(Ch: 8596; Html: '↔'),
(Ch: 8629; Html: '↵'),
(Ch: 8704; Html: '∀'),
(Ch: 8706; Html: '∂'),
(Ch: 8707; Html: '∃'),
(Ch: 8709; Html: '∅'),
(Ch: 8711; Html: '∇'),
(Ch: 8712; Html: '∈'),
(Ch: 8713; Html: '∉'),
(Ch: 8715; Html: '∋'),
(Ch: 8719; Html: '∏'),
(Ch: 8721; Html: '∑'),
(Ch: 8722; Html: '−'),
(Ch: 8727; Html: '∗'),
(Ch: 8730; Html: '√'),
(Ch: 8733; Html: '∝'),
(Ch: 8734; Html: '∞'),
(Ch: 8736; Html: '∠'),
(Ch: 8743; Html: '∧'),
(Ch: 8744; Html: '∨'),
(Ch: 8745; Html: '∩'),
(Ch: 8746; Html: '∪'),
(Ch: 8747; Html: '∫'),
(Ch: 8756; Html: '∴'),
(Ch: 8764; Html: '∼'),
(Ch: 8773; Html: '≅'),
(Ch: 8776; Html: '≈'),
(Ch: 8800; Html: '≠'),
(Ch: 8801; Html: '≡'),
(Ch: 8804; Html: '≤'),
(Ch: 8805; Html: '≥'),
(Ch: 8834; Html: '⊂'),
(Ch: 8835; Html: '⊃'),
(Ch: 8836; Html: '⊄'),
(Ch: 8838; Html: '⊆'),
(Ch: 8839; Html: '⊇'),
(Ch: 8853; Html: '⊕'),
(Ch: 8855; Html: '⊗'),
(Ch: 8869; Html: '⊥'),
(Ch: 8901; Html: '⋅'),
(Ch: 8968; Html: '⌈'),
(Ch: 8969; Html: '⌉'),
(Ch: 8970; Html: '⌊'),
(Ch: 8971; Html: '⌋'),
(Ch: 9674; Html: '◊'),
(Ch: 9824; Html: '♠'),
(Ch: 9827; Html: '♣'),
(Ch: 9829; Html: '♥'),
(Ch: 9830; Html: '♦')
);
var
ConversionsHash: array of Word;
{$IFNDEF UNICODE}
const
MB_ERR_INVALID_CHARS = 8;
{$ENDIF ~UNICODE}
{ TJvStrToHtml }
constructor TJvStrToHtml.Create(AOwner: TComponent);
begin
inherited Create(AOwner);
FValue := '';
FHtml := '';
end;
function TJvStrToHtml.HtmlToText(const Text: string): string;
begin
Result := HtmlToString(Text);
end;
procedure TJvStrToHtml.SetHtml(const Value: string);
begin
FValue := HtmlToText(Value);
end;
procedure TJvStrToHtml.SetValue(const Value: string);
begin
FHtml := TextToHtml(Value);
end;
function TJvStrToHtml.TextToHtml(const Text: string): string;
begin
Result := StringToHtml(Text);
end;
function GetHtmlHash(const S: string): Word;
var
I: Integer;
begin
Result := Length(S);
for I := 1 to Length(S) do
Result := Word(Result + Ord(S[I]) shl (I mod 4));
end;
procedure InitConversionsHash;
var
I: Integer;
begin
SetLength(ConversionsHash, Length(Conversions));
for I := 0 to High(ConversionsHash) do
ConversionsHash[I] := GetHtmlHash(Conversions[I].Html);
end;
function StringToHtml(const Value: string): string;
const
Nbsp = ' ';
var
I, J: Integer;
Len, AddLen, HtmlLen: Integer;
P: PChar;
Ch: Char;
W: Word;
Even: Boolean;
begin
Len := Length(Value);
// number of chars to add
AddLen := 0;
I := 1;
while I <= Len do
begin
Ch := Value[I];
if Ch = ' ' then
begin
Even := False;
repeat
if Even then
Inc(AddLen, 5 {Length(Nbsp) - 1});
Even := not Even;
Inc(I);
until (I > Len) or (Value[I] <> ' ');
Continue;
end
else
if (Ord(Ch) >= 128) or not (AnsiChar(Ch) in ['A'..'Z', 'a'..'z', '0'..'9', '_']) then
begin
W := Word(Ch);
{$IFNDEF UNICODE}
if W >= 128 then
if MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, @Ch, 1, PWideChar(@W), 1) = 0 then
W := Word(Ch);
{$ENDIF ~UNICODE}
for J := Low(Conversions) to High(Conversions) do
if W = Conversions[J].Ch then
begin
Inc(AddLen, Length(Conversions[J].Html) - 1);
Break;
end;
end;
Inc(I);
end;
if AddLen = 0 then
Result := Value
else
begin
SetLength(Result, Len + AddLen);
P := Pointer(Result);
I := 1;
while I <= Len do
begin
Ch := Value[I];
if Ch = ' ' then
begin
Even := False;
repeat
if Even then
begin
HtmlLen := 6 {Length(Nbsp)};
Move(Nbsp[1], P[0], HtmlLen * SizeOf(Char));
Inc(P, HtmlLen);
end
else
begin
P[0] := ' ';
Inc(P);
end;
Even := not Even;
Inc(I);
until (I > Len) or (Value[I] <> ' ');
Continue;
end
else
if (Ord(Ch) >= 128) or not (AnsiChar(Ch) in ['A'..'Z', 'a'..'z', '0'..'9', '_']) then
begin
W := Word(Ch);
{$IFNDEF UNICODE}
if W >= 128 then
if MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, @Ch, 1, PWideChar(@W), 1) = 0 then
W := Word(Ch);
{$ENDIF ~UNICODE}
for J := Low(Conversions) to High(Conversions) do
if W = Conversions[J].Ch then
begin
HtmlLen := Length(Conversions[J].Html);
Move(Conversions[J].Html[1], P[0], HtmlLen * SizeOf(Char)); // Conversions[].Html is a PChar
Inc(P, HtmlLen);
Ch := #0;
Break;
end;
end;
if Ch <> #0 then
begin
P[0] := Ch;
Inc(P);
end;
Inc(I);
end;
end;
end;
function HtmlToString(const Value: string): string;
var
I, Index, Len: Integer;
Start, J: Integer;
Ch: Char;
W, Hash: Word;
ReplStr: string;
begin
if ConversionsHash = nil then
InitConversionsHash;
Len := Length(Value);
SetLength(Result, Len); // worst case
Index := 0;
I := 1;
while I <= Len do
begin
Ch := Value[I];
// html entitiy
if Ch = '&' then
begin
Start := I;
Inc(I);
while (I <= Len) and (Value[I] <> ';') and (I < Start + 20) do
Inc(I);
if Value[I] <> ';' then
I := Start
else
begin
Ch := #0;
ReplStr := Copy(Value, Start, I - Start + 1);
if ReplStr = ' ' then // special treatment for  
Ch := ' '
else
begin
Hash := GetHtmlHash(ReplStr);
J := 0;
while (J < Length(Conversions)) do
begin
while (J < Length(Conversions)) and (ConversionsHash[J] <> Hash) do
Inc(J);
if (J < Length(Conversions)) and (Conversions[J].Html = ReplStr) then
begin
W := Conversions[J].Ch;
Ch := Char(W);
{$IFNDEF UNICODE}
if W >= 128 then
if WideCharToMultiByte(CP_ACP, MB_ERR_INVALID_CHARS, @W, 1, PAnsiChar(@Ch), 1, nil, nil) = 0 then
Ch := Char(W);
{$ENDIF ~UNICODE}
Break;
end;
Inc(J);
end;
end;
// if no conversion was found, it may actually be a number
if Ch = #0 then
begin
ReplStr := Copy(ReplStr, 2, MaxInt);
if ReplStr <> '' then
begin
if (ReplStr[1] = '#') and (Length(ReplStr) > 1) then
begin
Delete(ReplStr, 1, 1);
if ReplStr[1] = 'x' then // hex value
ReplStr[1] := '$'; // prepare for StrToInt
end;
if StrToIntDef(ReplStr, -1) <> -1 then
Ch := Chr(StrToInt(ReplStr))
else
begin
I := Start;
Ch := Value[I];
end;
end;
end;
end;
end;
Inc(I);
Inc(Index);
Result[Index] := Ch;
end;
if Index <> Len then
SetLength(Result, Index);
end;
function CharToHtml(Ch: Char): string;
var
I: Integer;
W: Word;
begin
if (Ord(Ch) >= 128) or not (AnsiChar(Ch) in ['A'..'Z', 'a'..'z', '0'..'9', '_']) then
begin
W := Word(Ch);
{$IFNDEF UNICODE}
if (W < 128) or (MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS, @Ch, 1, PWideChar(@W), 1) <> 0) then
{$ENDIF ~UNICODE}
begin
I := 0;
while (I < Length(Conversions)) and (Conversions[I].Ch <> W) do
Inc(I);
if I < Length(Conversions) then
begin
Result := Conversions[I].Html;
Exit;
end;
end;
end;
Result := Ch;
end;
{$IFDEF UNITVERSIONING}
initialization
RegisterUnitVersion(HInstance, UnitVersioning);
finalization
UnregisterUnitVersion(HInstance);
{$ENDIF UNITVERSIONING}
end.