<?php
function python_tokenize($code) {
$c = python_sanitize($code);
$o = array();
$i = 0;
$z = 1;
while (strlen($c)) {
$n = python_tokenize_newline($c);
if ($n > 0) {
$c = substr($c, $n);
}
$w = python_tokenize_whitespace($c);
if ($w > 0) {
$c = substr($c, $n);
}
#if ()
$t = python_tokenize_next($c);
python_dump_token($t);
if ($z>3) {
die();
}
if ($t["name"]=="BLANKSPACE" && $f==0) {
# $t = python_token("INDENT");
# $f = 1;
}
$o[] = $t;
if ($t["len"]>0) {
$c = substr($c,$t["len"]);
} else {
var_dump($t);
die();
}
$z++;
}
if ($i>0) {
while($i--) {
$o[] = python_token("NEWLINE");
$o[] = python_token("DEDENT");
}
}
$o[] = python_token("NEWLINE");
$o[] = python_token("ENDMARKER");
return $o;
}
function python_tokenize_newline($c) {
$l = 0;
$k = false;
if(isset($c[$l]))if(ord($c[$l])==13){$l++;$k=true;}
if(isset($c[$l]))if(ord($c[$l])==10){$l++;$k=true;}
if($k) return $l;
if (re('/(^[ \t]+)/',$c,$s)) {
$l = strlen($s[0]);
$k = false;
if(isset($c[$l]))if(ord($c[$l])==13){$l++;$k=true;}
if(isset($c[$l]))if(ord($c[$l])==10){$l++;$k=true;}
if($k) return $l;
}
return 0;
}
function python_tokenize_whitespace() {
}
function python_tokenize_next($c) {
if (false) {
} else {
$h = array(
'/^,/' => "COMMA",
'/^\./' => "DOT",
'/^:/' => "COLON",
'/^;/' => "SEMICOLON",
'/^=/' => "EQ",
'/^\+/' => "PLUS",
'/^%/' => "MOD",
'/^==/' => "EQUAL",
'/^\(/' => "RBO",
'/^\)/' => "RBC",
'/^\[/' => "SBO",
'/^\]/' => "SBC",
'/^{/' => "BO",
'/^}/' => "BC",
'/^print/' => "PRINT",
'/^class/' => "CLASS",
'/^def/' => "DEF",
'/^return/' => "RETURN",
'/^while/' => "WHILE",
'/^True/' => "TRUE",
'/^False/' => "FALSE",
'/^None/' => "NONE",
'/^\.\.\./' => "TRIEPLEDOT",
);
$f = true;
foreach($h as $r=>$t) {
if (re($r,$c,$s)) {
$l = strlen($s[0]);
return python_token($t,$s[0],$l);
$f = false;
break;
}
}
if ($f) {
if (re('|^#.*|',$c,$s)) {
python_parse_comment($s[0]);
} else if (re('/^[0-9]+(\.[0-9]*)?/',$c,$s)) {
$o[] = python_token("NUMBER",(int)$s[0]);
} else if (re('/^"""(.*)"""/',$c,$s)) {
$o[] = python_token("STRING",$s[1]);
} else if (re('/^"([^"]+((\\\\")*[^"]+))"/',$c,$s)) {
$o[] = python_token("STRING",(string)$s[1]);
} else if (re('|^[A-Za-z_][A-Za-z0-1_]*|',$c,$s)) {
$v = new stdClass;
$v-> name = $s[0];
$o[] = python_token("NAME",$v);
} else {
$s[0] = $c[0];
}
}
}
}