phpython Code
a python interpreter written in php
Status: Pre-Alpha
Brought to you by:
francescobianco
--- a +++ b/trunk/python/python-tokenizer.php @@ -0,0 +1,153 @@ +<!-- + +http://svn.python.org/view/python/trunk/Lib/tokenize.py?view=markup + + +Whitespace = r'[ \f\t]*' +Comment = r'#[^\r\n]*' +Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) +Name = r'[a-zA-Z_]\w*' + +Hexnumber = r'0[xX][\da-fA-F]+[lL]?' +Octnumber = r'(0[oO][0-7]+)|(0[0-7]*)[lL]?' +Binnumber = r'0[bB][01]+[lL]?' +Decnumber = r'[1-9]\d*[lL]?' +Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) +Exponent = r'[eE][-+]?\d+' +Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent) +Expfloat = r'\d+' + Exponent +Floatnumber = group(Pointfloat, Expfloat) +Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]') +Number = group(Imagnumber, Floatnumber, Intnumber) + +# Tail end of ' string. +Single = r"[^'\\]*(?:\\.[^'\\]*)*'" +# Tail end of " string. +Double = r'[^"\\]*(?:\\.[^"\\]*)*"' +# Tail end of ''' string. +Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" +# Tail end of """ string. +Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' +Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""') +# Single-line ' or " string. +String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'", + r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"') + +# Because of leftmost-then-longest match semantics, be sure to put the +# longest operators first (e.g., if = came before ==, == would get +# recognized as two instances of =). +Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=", + r"//=?", + r"[+\-*/%&|^=<>]=?", + r"~") + +Bracket = '[][(){}]' +Special = group(r'\r?\n', r'[:;.,`@]') +Funny = group(Operator, Bracket, Special) + +PlainToken = group(Number, Funny, String, Name) +Token = Ignore + PlainToken + +# First (or only) line of ' or " string. +ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + + group("'", r'\\\r?\n'), + r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + + group('"', r'\\\r?\n')) +PseudoExtras = group(r'\\\r?\n', Comment, Triple) +PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) + +tokenprog, pseudoprog, single3prog, double3prog = map( + re.compile, (Token, PseudoToken, Single3, Double3)) +endprogs = {"'": re.compile(Single), '"': re.compile(Double), + "'''": single3prog, '"""': double3prog, + "r'''": single3prog, 'r"""': double3prog, + "u'''": single3prog, 'u"""': double3prog, + "ur'''": single3prog, 'ur"""': double3prog, + "R'''": single3prog, 'R"""': double3prog, + "U'''": single3prog, 'U"""': double3prog, + "uR'''": single3prog, 'uR"""': double3prog, + "Ur'''": single3prog, 'Ur"""': double3prog, + "UR'''": single3prog, 'UR"""': double3prog, + "b'''": single3prog, 'b"""': double3prog, + "br'''": single3prog, 'br"""': double3prog, + "B'''": single3prog, 'B"""': double3prog, + "bR'''": single3prog, 'bR"""': double3prog, + "Br'''": single3prog, 'Br"""': double3prog, + "BR'''": single3prog, 'BR"""': double3prog, + 'r': None, 'R': None, 'u': None, 'U': None, + 'b': None, 'B': None} + +triple_quoted = {} +for t in ("'''", '"""', + "r'''", 'r"""', "R'''", 'R"""', + "u'''", 'u"""', "U'''", 'U"""', + "ur'''", 'ur"""', "Ur'''", 'Ur"""', + "uR'''", 'uR"""', "UR'''", 'UR"""', + "b'''", 'b"""', "B'''", 'B"""', + "br'''", 'br"""', "Br'''", 'Br"""', + "bR'''", 'bR"""', "BR'''", 'BR"""'): + triple_quoted[t] = t +single_quoted = {} +for t in ("'", '"', + "r'", 'r"', "R'", 'R"', + "u'", 'u"', "U'", 'U"', + "ur'", 'ur"', "Ur'", 'Ur"', + "uR'", 'uR"', "UR'", 'UR"', + "b'", 'b"', "B'", 'B"', + "br'", 'br"', "Br'", 'Br"', + "bR'", 'bR"', "BR'", 'BR"' ): + single_quoted[t] = t + +tabsize = 8 +--> + +<?php + function group($re) { + if (is_array($re)) { + return "(".implode("|",$re).")"; + } else { + return "(".$re.")"; + } + } + function any($re) { + return group($re)."*"; + } + function maybe($re) { + return group($re)."?"; + } + + $preg_whitespace = "[ \f\t]*"; + $preg_comment = "#[^\r\n]*"; + $preg_ignore = $preg_whitespace + any("\\\r?\n" + $preg_whitespace) + maybe($preg_comment); + $preg_name = "[a-zA-Z_]\w*"; + + + function python_tokenize($pycode) { + $line = $pycode; + + $out = array(); + while (strlen($line)) { + if (preg_match('/^[ \t]+(\.[0-9]*)?/', $line, $regs)) { + # Ignored + $out[] = $regs[0]; + $line = substr($line, strlen($regs[0])); + + } else if (preg_match('/^[0-9]+(\.[0-9]*)?/', $line, $regs)) { + # It's a number + $out[] = $regs[0]; + $line = substr($line, strlen($regs[0])); + + } else if (preg_match('/^[A-Za-z]+/', $line, $regs)) { + # It's a variable name + $out[] = $regs[0]; + $line = substr($line, strlen($regs[0])); + + } else { + # It's some other character + $out[] = $line[0]; + $line = substr($line, 1); + } + } + return $out; + } +?>