Menu

[r36]: / trunk / python / python-tokenizer.php  Maximize  Restore  History

Download this file

103 lines (97 with data), 2.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
<?php
function python_tokenize($code) {
$c = python_sanitize($code);
$o = array();
$i = 0;
$z = 1;
while (strlen($c)) {
$t = python_tokenize_next($c);
python_dump_token($t);
if ($z>3) {
die();
}
$o[] = $t;
if ($t["len"]>0) {
$c = substr($c,$t["len"]);
} else {
var_dump($t);
die();
}
$z++;
}
if ($i>0) {
while($i--) {
$o[] = python_token("NEWLINE");
$o[] = python_token("DEDENT");
}
}
$o[] = python_token("NEWLINE");
$o[] = python_token("ENDMARKER");
return $o;
}
function python_tokenize_next($c) {
$h = array(
'/^\n/' => "NEWLINE",
'/^,/' => "COMMA",
'/^\./' => "DOT",
'/^:/' => "COLON",
'/^;/' => "SEMICOLON",
'/^=/' => "EQ",
'/^\+/' => "PLUS",
'/^%/' => "MOD",
'/^==/' => "EQUAL",
'/^\(/' => "RBO",
'/^\)/' => "RBC",
'/^\[/' => "SBO",
'/^\]/' => "SBC",
'/^{/' => "BO",
'/^}/' => "BC",
'/^print/' => "PRINT",
'/^class/' => "CLASS",
'/^def/' => "DEF",
'/^return/' => "RETURN",
'/^while/' => "WHILE",
'/^True/' => "TRUE",
'/^False/' => "FALSE",
'/^None/' => "NONE",
'/^\.\.\./' => "TRIEPLEDOT",
);
if (re('/(^[ \t]+)/',$c,$s)) {
$l = strlen($s[0]);
$k = true;
if(isset($c[$l]))if(ord($c[$l])==13){$l++;$k=false;}
if(isset($c[$l]))if(ord($c[$l])==10){$l++;$k=false;}
if ($k) {
return python_token("INDENT","",$l);
} else {
return python_token("BLANKLINE","",$l);
}
} else {
$f = true;
foreach($h as $r=>$t) {
if (re($r,$c,$s)) {
$l = strlen($s[0]);
return python_token($t,$s[0],$l);
$f = false;
break;
}
}
if ($f) {
if (re('|^#.*|',$c,$s)) {
python_parse_comment($s[0]);
} else if (re('/^[0-9]+(\.[0-9]*)?/',$c,$s)) {
$o[] = python_token("NUMBER",(int)$s[0]);
} else if (re('/^"""(.*)"""/',$c,$s)) {
$o[] = python_token("STRING",$s[1]);
} else if (re('/^"([^"]+((\\\\")*[^"]+))"/',$c,$s)) {
$o[] = python_token("STRING",(string)$s[1]);
} else if (re('|^[A-Za-z_][A-Za-z0-1_]*|',$c,$s)) {
$v = new stdClass;
$v-> name = $s[0];
$o[] = python_token("NAME",$v);
} else {
$s[0] = $c[0];
}
}
}
}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.