Menu

[r8]: / src / getword.cpp  Maximize  Restore  History

Download this file

79 lines (71 with data), 1.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include "getword.h"
char *GetWord::run(char **s, size_t *len)
{
size_t bytes;
char *t, *u;
int offset;
t = *s;
while (*t)
{
if (GetWord::isWordCharacter(t, &bytes))
{
for (u = t; *u;)
{
if (!GetWord::isWordCharacter(u, &bytes))
{
*len = u - t;
offset = t - *s;
*s += *len + offset + bytes;
return t;
}
else
u += bytes;
}
if (*t && !(*u))
{
*len = u - t;
offset = t - *s;
*s = u;
return t;
}
t = u;
}
else
t += bytes;
}
return NULL;
}
bool GetWord::isWordCharacter(char *s, size_t *bytes)
{
*bytes = 0;
unsigned char *us = (unsigned char *)s;
if (
*us < 65 ||
(*us > 90 && *us < 97) ||
(*us > 123 && *us < 128)
)
{
*bytes = 1;
return false;
}
// check for UTF-8 byte sequences
else if (*us > 127)
{
// determine length
unsigned char *it = us;
for (; *it > 127; ++it)
++(*bytes);
// Unicode punctuation marks
// Based on http://www1.tip.nl/~t876506/utf8tbl.html
return (
*us == 226 && *(us + 1) == 128 ||
*us == 194 ||
*us == 203
) ? false : true;
}
else
{
*bytes = 1;
return true;
}
}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.