<?php
$python_tokens = array("for","in");
function python_tokenize($code) {
global $python_tokens;
$c = python_sanitize($code);
$tokens = array();
$indent = 0;
while (strlen($c)) {
if (re('/^[ ]+/',$c,$r)) {
# ignored
} else if (re('/^[\t]+/',$c,$r)) {
# indent/dedent
$d = strlen($r[0]) - $indent;
switch($d) {
case 0: break;
case +1: $tokens[] = token("INDENT"); $indent++; break;
case -1: $tokens[] = token("DEDENT"); $indent--; break;
default: die("Expectend indented block");
}
} else if (re('/^\n/',$c,$r)) {
# newline
$tokens[] = token("NEWLINE");
} else if (re('/^#.*/',$c,$r)) {
# sharp comment
python_parse_comment($r[0]);
} else if (re('/^,/',$c,$r)) {
# colon
$tokens[] = token("COMMA");
} else if (re('/^:/',$c,$r)) {
# colon
$tokens[] = token("COLON");
} else if (re('/^;/',$c,$r)) {
# colon
$tokens[] = token("SEMICOLON");
} else if (re('/^=/',$c,$r)) {
# round branch open
$tokens[] = token("EQUAL","=");
} else if (re('/^\+/',$c,$r)) {
# round branch open
$tokens[] = token("PLUS");
} else if (re('/^%/',$c,$r)) {
# round branch open
$tokens[] = token("MOD","%");
} else if (re('/^==/',$c,$r)) {
# round branch open
$tokens[] = token("EQUAL");
} else if (re('/^\(/',$c,$r)) {
# round branch open
$tokens[] = token("RBO");
} else if (re('/^\)/',$c,$r)) {
# round branch close
$tokens[] = token("RBC");
} else if (re('/^\[/',$c,$r)) {
# round branch open
$tokens[] = token("SBO");
} else if (re('/^\]/',$c,$r)) {
# round branch close
$tokens[] = token("SBC");
} else if (re('/^{/',$c,$r)) {
# round branch open
$tokens[] = token("BO");
} else if (re('/^}/',$c,$r)) {
# round branch close
$tokens[] = token("BC");
} else if (re('/^print/',$c,$r)) {
# print token
$tokens[] = token("PRINT");
} else if (re('/^class/',$c,$r)) {
# print token
$tokens[] = token("CLASS");
} else if (re('/^while/',$c,$r)) {
# while token
$tokens[] = token("WHILE");
} else if (re('/^True/',$c,$r)) {
# round branch close
$tokens[] = token("TRUE");
} else if (re('/^False/',$c,$r)) {
# round branch close
$tokens[] = token("FALSE");
} else if (re('/^None/',$c,$r)) {
# round branch close
$tokens[] = token("NONE");
} else if (re('/^\.\.\./',$c,$r)) {
# round branch close
$tokens[] = token("TRIEPLEDOT");
} else if (re('/^[0-9]+(\.[0-9]*)?/',$c,$r)) {
# number
$tokens[] = token("NUMBER",(int)$r[0]);
} else if (re('/^"""(.*)"""/',$c,$r)) {
# triple double quoted string
$tokens[] = token("STRING",$r[1]);
} else if (re('/^"([^"]+((\\\\")*[^"]+))"/',$c,$r)) {
# double quoted string
$tokens[] = token("STRING",(string)$r[1]);
} else if (re('/^[A-Za-z_][A-Za-z0-1_]*/',$c,$r)) {
# name or litteral-token
$value = new stdClass;
$value-> name = $r[0];
$tokens[] = token("NAME",$value);
} else {
# rest
$r[0] =$c[0];
}
$c = substr($c, strlen($r[0]));
}
if ($indent>0) {
while($indent--) {
$tokens[] = token("NEWLINE");
$tokens[] = token("DEDENT");
}
}
$tokens[] = token("NEWLINE");
#$tokens[] = token("ENDMARKER");
return $tokens;
}