phpython Code
a python interpreter written in php
Status: Pre-Alpha
Brought to you by:
francescobianco
--- a/trunk/python/python-tokenizer.php +++ b/trunk/python/python-tokenizer.php @@ -2,6 +2,38 @@ function python_tokenize($code) { $c = python_sanitize($code); + $o = array(); + $i = 0; + $z = 1; + while (strlen($c)) { + $t = python_tokenize_next($c); + + python_dump_token($t); + if ($z>3) { + die(); + } + + $o[] = $t; + if ($t["len"]>0) { + $c = substr($c,$t["len"]); + } else { + var_dump($t); + die(); + } + $z++; + } + if ($i>0) { + while($i--) { + $o[] = python_token("NEWLINE"); + $o[] = python_token("DEDENT"); + } + } + $o[] = python_token("NEWLINE"); + $o[] = python_token("ENDMARKER"); + return $o; +} + +function python_tokenize_next($c) { $h = array( '/^\n/' => "NEWLINE", '/^,/' => "COMMA", @@ -28,53 +60,43 @@ '/^None/' => "NONE", '/^\.\.\./' => "TRIEPLEDOT", ); - $o = array(); - $i = 0; - while (strlen($c)) { + if (re('/(^[ \t]+)/',$c,$s)) { + $l = strlen($s[0]); + $k = true; + if(isset($c[$l]))if(ord($c[$l])==13){$l++;$k=false;} + if(isset($c[$l]))if(ord($c[$l])==10){$l++;$k=false;} + if ($k) { + return python_token("INDENT","",$l); + } else { + return python_token("BLANKLINE","",$l); + } + } else { $f = true; - $s = array(); foreach($h as $r=>$t) { if (re($r,$c,$s)) { - $o[] = token($t,$s[0]); + $l = strlen($s[0]); + return python_token($t,$s[0],$l); $f = false; break; } } if ($f) { - if (re('/^[ ]+/',$c,$s)) { - } else if (re('/^[\t]+/',$c,$s)) { - $d = strlen($r[0]) - $i; - switch($d) { - case 0: break; - case +1: $o[] = token("INDENT"); $i++; break; - case -1: $o[] = token("DEDENT"); $i--; break; - default: die("Expectend indented block"); - } - } else if (re('/^#.*/',$c,$s)) { + if (re('|^#.*|',$c,$s)) { python_parse_comment($s[0]); } else if (re('/^[0-9]+(\.[0-9]*)?/',$c,$s)) { - $o[] = token("NUMBER",(int)$s[0]); + $o[] = python_token("NUMBER",(int)$s[0]); } else if (re('/^"""(.*)"""/',$c,$s)) { - $o[] = token("STRING",$s[1]); + $o[] = python_token("STRING",$s[1]); } else if (re('/^"([^"]+((\\\\")*[^"]+))"/',$c,$s)) { - $o[] = token("STRING",(string)$s[1]); - } else if (re('/^[A-Za-z_][A-Za-z0-1_]*/',$c,$s)) { + $o[] = python_token("STRING",(string)$s[1]); + } else if (re('|^[A-Za-z_][A-Za-z0-1_]*|',$c,$s)) { $v = new stdClass; $v-> name = $s[0]; - $o[] = token("NAME",$v); + $o[] = python_token("NAME",$v); } else { $s[0] = $c[0]; } - } - $c = substr($c,strlen($s[0])); - } - if ($i>0) { - while($i--) { - $o[] = token("NEWLINE"); - $o[] = token("DEDENT"); - } - } - $o[] = token("NEWLINE"); - $o[] = token("ENDMARKER"); - return $o; -} \ No newline at end of file + } + } + +}