Menu

[r26]: / trunk / python / python-tokenizer.php  Maximize  Restore  History

Download this file

123 lines (94 with data), 2.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
<?php
$python_tokens = array("for","in");
function python_tokenize($code) {
global $python_tokens;
$c = python_sanitize($code);
$tokens = array();
$indent = 0;
while (strlen($c)) {
if (re('/^[ ]+/',$c,$r)) {
# ignored
} else if (re('/^[\t]+/',$c,$r)) {
# indent/dedent
$d = strlen($r[0]) - $indent;
switch($d) {
case 0: break;
case +1: $tokens[] = token("INDENT"); $indent++; break;
case -1: $tokens[] = token("DEDENT"); $indent--; break;
default: die("Expectend indented block");
}
} else if (re('/^\n/',$c,$r)) {
# newline
$last = @$tokens[count($tokens)-1];
if (isset($last["name"])&&$last["name"]!="NEWLINE") {
$tokens[] = token("NEWLINE");
}
} else if (re('/^#.*/',$c,$r)) {
# sharp comment
python_parse_comment($r[0]);
} else if (re('/^,/',$c,$r)) {
# colon
$tokens[] = token("COMMA");
} else if (re('/^:/',$c,$r)) {
# colon
$tokens[] = token("COLON");
} else if (re('/^;/',$c,$r)) {
# colon
$tokens[] = token("SEMICOLON");
} else if (re('/^=/',$c,$r)) {
# round branch open
$tokens[] = token("EQUAL");
} else if (re('/^\(/',$c,$r)) {
# round branch open
$tokens[] = token("RBO");
} else if (re('/^\)/',$c,$r)) {
# round branch close
$tokens[] = token("RBC");
} else if (re('/^\[/',$c,$r)) {
# round branch open
$tokens[] = token("SBO");
} else if (re('/^\]/',$c,$r)) {
# round branch close
$tokens[] = token("SBC");
} else if (re('/^{/',$c,$r)) {
# round branch open
$tokens[] = token("BO");
} else if (re('/^}/',$c,$r)) {
# round branch close
$tokens[] = token("BC");
} else if (re('/^print/',$c,$r)) {
# print token
$tokens[] = token("PRINT");
} else if (re('/^True/',$c,$r)) {
# round branch close
$tokens[] = token("TRUE");
} else if (re('/^False/',$c,$r)) {
# round branch close
$tokens[] = token("FALSE");
} else if (re('/^None/',$c,$r)) {
# round branch close
$tokens[] = token("NONE");
} else if (re('/^\.\.\./',$c,$r)) {
# round branch close
$tokens[] = token("TRIEPLEDOT");
} else if (re('/^[0-9]+(\.[0-9]*)?/',$c,$r)) {
# number
$tokens[] = token("NUMBER",$r[0]);
} else if (re('/^"([^"]+((\\\\")*[^"]+))"/',$c,$r)) {
# double quoted string
$tokens[] = token("DSTRING",$r[1]);
} else if (re('/^[A-Za-z_][A-Za-z0-1_]*/',$c,$r)) {
# name or litteral-token
if (@in_array($r[0],$python_tokens)) {
$tokens[] = token(strtoupper($r[0]));
} else {
$tokens[] = token("LNAME",$r[0]);
}
} else {
# rest
$r[0] =$c[0];
}
$c = substr($c, strlen($r[0]));
}
return $tokens;
}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.