<?php
function python_tokenize($code) {
$c = python_sanitize($code);
$h = array(
'/^\n/' => "NEWLINE",
'/^,/' => "COMMA",
'/^\./' => "DOT",
'/^:/' => "COLON",
'/^;/' => "SEMICOLON",
'/^=/' => "EQ",
'/^\+/' => "PLUS",
'/^%/' => "MOD",
'/^==/' => "EQUAL",
'/^\(/' => "RBO",
'/^\)/' => "RBC",
'/^\[/' => "SBO",
'/^\]/' => "SBC",
'/^{/' => "BO",
'/^}/' => "BC",
'/^print/' => "PRINT",
'/^class/' => "CLASS",
'/^def/' => "DEF",
'/^return/' => "RETURN",
'/^while/' => "WHILE",
'/^True/' => "TRUE",
'/^False/' => "FALSE",
'/^None/' => "NONE",
'/^\.\.\./' => "TRIEPLEDOT",
);
$o = array();
$i = 0;
while (strlen($c)) {
$f = true;
$s = array();
foreach($h as $r=>$t) {
if (re($r,$c,$s)) {
$o[] = token($t,$s[0]);
$f = false;
break;
}
}
if ($f) {
if (re('/^[ ]+/',$c,$s)) {
} else if (re('/^[\t]+/',$c,$s)) {
$d = strlen($r[0]) - $i;
switch($d) {
case 0: break;
case +1: $o[] = token("INDENT"); $i++; break;
case -1: $o[] = token("DEDENT"); $i--; break;
default: die("Expectend indented block");
}
} else if (re('/^#.*/',$c,$s)) {
python_parse_comment($s[0]);
} else if (re('/^[0-9]+(\.[0-9]*)?/',$c,$s)) {
$o[] = token("NUMBER",(int)$s[0]);
} else if (re('/^"""(.*)"""/',$c,$s)) {
$o[] = token("STRING",$s[1]);
} else if (re('/^"([^"]+((\\\\")*[^"]+))"/',$c,$s)) {
$o[] = token("STRING",(string)$s[1]);
} else if (re('/^[A-Za-z_][A-Za-z0-1_]*/',$c,$s)) {
$v = new stdClass;
$v-> name = $s[0];
$o[] = token("NAME",$v);
} else {
$s[0] = $c[0];
}
}
$c = substr($c,strlen($s[0]));
}
if ($i>0) {
while($i--) {
$o[] = token("NEWLINE");
$o[] = token("DEDENT");
}
}
$o[] = token("NEWLINE");
$o[] = token("ENDMARKER");
return $o;
}