Menu

Diff of /trunk/python/python-tokenizer.php [r35] .. [r36]  Maximize  Restore

Switch to side-by-side view

--- a/trunk/python/python-tokenizer.php
+++ b/trunk/python/python-tokenizer.php
@@ -2,6 +2,38 @@
 
 function python_tokenize($code) {		
 	$c = python_sanitize($code);	
+	$o = array();	
+	$i = 0;
+	$z = 1;
+	while (strlen($c)) {
+		$t = python_tokenize_next($c);				
+		
+		python_dump_token($t);
+		if ($z>3) {
+			die();
+		}
+		
+		$o[] = $t;
+		if ($t["len"]>0) {
+			$c = substr($c,$t["len"]);		
+		} else {
+			var_dump($t);
+			die();
+		}
+		$z++;
+	}
+	if ($i>0) {
+		while($i--) {
+			$o[] = python_token("NEWLINE");	
+			$o[] = python_token("DEDENT");		
+		}
+	}
+	$o[] = python_token("NEWLINE");	
+	$o[] = python_token("ENDMARKER");
+	return $o;
+}
+
+function python_tokenize_next($c) {
 	$h = array(
 		'/^\n/'		=> "NEWLINE",		
 		'/^,/'		=> "COMMA",
@@ -28,53 +60,43 @@
 		'/^None/'	=> "NONE",
 		'/^\.\.\./'	=> "TRIEPLEDOT",
 	);	
-	$o = array();
-	$i = 0;	
-	while (strlen($c)) {
+	if (re('/(^[ \t]+)/',$c,$s)) {
+		$l = strlen($s[0]);	
+		$k = true;
+		if(isset($c[$l]))if(ord($c[$l])==13){$l++;$k=false;}
+		if(isset($c[$l]))if(ord($c[$l])==10){$l++;$k=false;}
+		if ($k) {
+			return python_token("INDENT","",$l);				
+		} else {
+			return python_token("BLANKLINE","",$l);
+		}
+	} else {
 		$f = true;
-		$s = array();	
 		foreach($h as $r=>$t) {
 			if (re($r,$c,$s)) {
-				$o[] = token($t,$s[0]);
+				$l = strlen($s[0]);
+				return python_token($t,$s[0],$l);
 				$f = false;
 				break;
 			}
 		}
 		if ($f) {
-			if (re('/^[ ]+/',$c,$s)) {				
-			} else if (re('/^[\t]+/',$c,$s)) {
-				$d = strlen($r[0]) - $i;
-				switch($d) {
-					case  0: break;
-					case +1: $o[] = token("INDENT"); $i++; break;
-					case -1: $o[] = token("DEDENT"); $i--; break;
-					default: die("Expectend indented block");
-				}
-			} else if (re('/^#.*/',$c,$s)) {					
+			if (re('|^#.*|',$c,$s)) {					
 				python_parse_comment($s[0]);	
 			} else if (re('/^[0-9]+(\.[0-9]*)?/',$c,$s)) {
-				$o[] = token("NUMBER",(int)$s[0]);
+				$o[] = python_token("NUMBER",(int)$s[0]);
 			} else if (re('/^"""(.*)"""/',$c,$s)) {
-				$o[] = token("STRING",$s[1]);
+				$o[] = python_token("STRING",$s[1]);
 			} else if (re('/^"([^"]+((\\\\")*[^"]+))"/',$c,$s)) {
-				$o[] = token("STRING",(string)$s[1]);
-			} else if (re('/^[A-Za-z_][A-Za-z0-1_]*/',$c,$s)) {
+				$o[] = python_token("STRING",(string)$s[1]);
+			} else if (re('|^[A-Za-z_][A-Za-z0-1_]*|',$c,$s)) {
 				$v = new stdClass;
 				$v-> name = $s[0];
-				$o[] = token("NAME",$v);				
+				$o[] = python_token("NAME",$v);				
 			} else {
 				$s[0] = $c[0];
 			}
-		} 	
-		$c = substr($c,strlen($s[0]));		
-	}
-	if ($i>0) {
-		while($i--) {
-			$o[] = token("NEWLINE");	
-			$o[] = token("DEDENT");		
-		}
-	}
-	$o[] = token("NEWLINE");	
-	$o[] = token("ENDMARKER");
-	return $o;
-}
\ No newline at end of file
+		} 
+	}	
+
+}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.