Menu

[r337]: / trunk / ext / scintilla / src / UniConversion.cxx  Maximize  Restore  History

Download this file

120 lines (112 with data), 3.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Scintilla source code edit control
/** @file UniConversion.cxx
** Functions to handle UFT-8 and UCS-2 strings.
**/
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <stdlib.h>
#include "UniConversion.h"
enum { SURROGATE_LEAD_FIRST = 0xD800 };
enum { SURROGATE_TRAIL_FIRST = 0xDC00 };
enum { SURROGATE_TRAIL_LAST = 0xDFFF };
unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) {
unsigned int len = 0;
for (unsigned int i = 0; i < tlen && uptr[i];) {
unsigned int uch = uptr[i];
if (uch < 0x80) {
len++;
} else if (uch < 0x800) {
len += 2;
} else if ((uch >= SURROGATE_LEAD_FIRST) &&
(uch <= SURROGATE_TRAIL_LAST)) {
len += 4;
i++;
} else {
len += 3;
}
i++;
}
return len;
}
void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) {
int k = 0;
for (unsigned int i = 0; i < tlen && uptr[i];) {
unsigned int uch = uptr[i];
if (uch < 0x80) {
putf[k++] = static_cast<char>(uch);
} else if (uch < 0x800) {
putf[k++] = static_cast<char>(0xC0 | (uch >> 6));
putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
} else if ((uch >= SURROGATE_LEAD_FIRST) &&
(uch <= SURROGATE_TRAIL_LAST)) {
// Half a surrogate pair
i++;
unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff);
putf[k++] = static_cast<char>(0xF0 | (xch >> 18));
putf[k++] = static_cast<char>(0x80 | (xch >> 12) & 0x3f);
putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f));
putf[k++] = static_cast<char>(0x80 | (xch & 0x3f));
} else {
putf[k++] = static_cast<char>(0xE0 | (uch >> 12));
putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
}
i++;
}
putf[len] = '\0';
}
unsigned int UTF16Length(const char *s, unsigned int len) {
unsigned int ulen = 0;
unsigned int charLen;
for (unsigned int i=0;i<len;) {
unsigned char ch = static_cast<unsigned char>(s[i]);
if (ch < 0x80) {
charLen = 1;
} else if (ch < 0x80 + 0x40 + 0x20) {
charLen = 2;
} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
charLen = 3;
} else {
charLen = 4;
ulen++;
}
i += charLen;
ulen++;
}
return ulen;
}
unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) {
unsigned int ui=0;
const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
unsigned int i=0;
while ((i<len) && (ui<tlen)) {
unsigned char ch = us[i++];
if (ch < 0x80) {
tbuf[ui] = ch;
} else if (ch < 0x80 + 0x40 + 0x20) {
tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6);
ch = us[i++];
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F));
} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12);
ch = us[i++];
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6));
ch = us[i++];
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F));
} else {
// Outside the BMP so need two surrogates
int val = (ch & 0x7) << 18;
ch = us[i++];
val += (ch & 0x3F) << 12;
ch = us[i++];
val += (ch & 0x3F) << 6;
ch = us[i++];
val += (ch & 0x3F);
tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST);
ui++;
tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST);
}
ui++;
}
return ui;
}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.