PHPCheckstyle Code

Brought to you by: hkodungallur, tchule
[r161]: / trunk / PhpCheckstyle / src / Tokenizer.php Maximize Restore History

530 lines (463 with data), 13.3 kB

<?php
if (!defined("PHPCHECKSTYLE_HOME_DIR")) {
	define("PHPCHECKSTYLE_HOME_DIR", dirname(__FILE__)."/..");
}
require_once PHPCHECKSTYLE_HOME_DIR."/src/TokenInfo.php";

/**
 * Lexical Analysis.
 *
 * Class that stores the tokens for a particular class and provide
 * utility functions like getting the next/previous token,
 * checking whether the token is of particular type etc.
 *
 * Based on the internal PHP tokenizer but separate the NEW_LINE and the TAB tokens.
 *
 * @see http://www.php.net/manual/en/tokens.php
 * @author Hari Kodungallur <hkodungallur@spikesource.com>
 */
class Tokenizer {

	/**
	 * The array of tokens in a file.
	 * @var Array[TokenInfo]
	 */
	private $tokens;

	/**
	 * Position of the index in the current file.
	 * @var Integer
	 */
	private $index = 0;

	/**
	 * Constructor
	 */
	public function Tokenizer() {
		$this->reset();
	}

	/**
	 * Tokenizes the input php file and stores all the tokens in the
	 * $this->tokens variable.
	 *
	 * @param String $filename the line where the token is found
	 */
	public function tokenize($filename) {
		$contents = "";
		if (filesize($filename)) {
			$fp = fopen($filename, "rb");
			$contents = fread($fp, filesize($filename));
			fclose($fp);
		}
		$this->tokens = $this->_getAllTokens($contents);
	}

	/**
	 * Dump the tokens of the file.
	 *
	 * @return String
	 */
	public function dumpTokens() {
		$result = "";
		foreach ($this->tokens as $token) {
			$result .= $token->toString().PHP_EOL;
		}
		return $result;
	}

	/**
	 * Gets the next token.
	 *
	 * In the process moves the index to the next position.
	 *
	 * @return TokenInfo
	 */
	public function getNextToken() {
		if ($this->index < (count($this->tokens) - 1)) {

			// Increment the index
			$this->index++;

			// Return the new token
			return $this->tokens[$this->index];
		} else {
			return false;
		}
	}

	/**
	 * Gets the current token.
	 *
	 * @return TokenInfo
	 */
	public function getCurrentToken() {
		return $this->tokens[$this->index];
	}

	/**
	 * Get the token at a given position.
	 *
	 * @param Integer $position the position of the token
	 * @return TokenInfo the token found
	 */
	public function peekTokenAt($position) {
		if ($position < count($this->tokens)) {
			return $this->tokens[$position];
		} else {
			return null;
		}
	}

	/**
	 * Gives the current position in the tokenizer.
	 *
	 * @return current position of the Tokenizer
	 */
	public function getCurrentPosition() {
		return $this->index;
	}

	/**
	 * Returns the next token without moving
	 * the index.
	 *
	 * @return TokenInfo if the token is found (and update the line value)
	 */
	public function peekNextToken() {
		if ($this->index < (count($this->tokens) - 1)) {
			return $this->tokens[$this->index + 1];
		} else {
			return null;
		}
	}

	/**
	 * Peeks at the previous token. That is it returns the previous token
	 * without moving the index.
	 *
	 * @return TokenInfo
	 */
	public function peekPrvsToken() {
		if ($this->index > 0) {
			return $this->tokens[$this->index - 1];
		} else {
			return null;
		}
	}

	/**
	 * Peeks at the next valid token.
	 * A valid token is one that is neither a whitespace or a comment
	 *
	 * @param Integer $startPos the start position for the search (if the item on this position is valid, it will be returned)
	 * @param Boolean $stopOnNewLine Indicate if we need to stop when we meet a new line
	 * @return TokenInfo the info about the token found
	 */
	public function peekNextValidToken($startPos = null, $stopOnNewLine = false) {

		// define the start position
		$pos = $this->getCurrentPosition() + 1;  // defaut position for the search
		if ($startPos != null) {
			$pos = $startPos; // if defined, set the start position
		}

		// search for the next valid token
		$token = null;
		$nbTokens = count($this->tokens);
		while ($pos < $nbTokens) {
			$token = $this->tokens[$pos];
			$pos++;

			if ($token->id == T_WHITESPACE || $token->id == T_TAB || $token->id == T_COMMENT || $token->id == T_ML_COMMENT || $token->id == T_DOC_COMMENT) {
				continue;
			} else if ($token->id == T_NEW_LINE) {
				if ($stopOnNewLine) {
					break;
				} else {
					continue;
				}
			} else {
				break;
			}

		}

		return $token;

	}

	/**
	 * Peeks at the previous valid token.
	 * A valid token is one that is neither a whitespace or a comment
	 *
	 * @return TokenInfo the info about the token found
	 */
	public function peekPrvsValidToken() {
		$pos = $this->index - 1;

		$token = null;
		while ($pos > 0) {

			$token = $this->tokens[$pos];
			$pos--;

			if ($token->id == T_WHITESPACE || $token->id == T_TAB || $token->id == T_COMMENT || $token->id == T_ML_COMMENT || $token->id == T_DOC_COMMENT) {
				continue;
			} else if ($token->id == T_NEW_LINE) {
				continue;
			} else {
				break;
			}
		}

		return $token;

	}

	/**
	 * Resets all local variables
	 *
	 * @return
	 * @access public
	 */
	public function reset() {
		$this->index = 0;
		$this->tokens = array();
	}

	/**
	 * Check if a token is equal to a given token ID
	 *
	 * @param TokenInfo $token the token to test
	 * @param Integer $id the token ID we're looking for
	 * @param String $text (optional) the text we're looking for
	 * @return Boolean true if the token correspond
	 */
	public function checkToken($token, $id, $text = false) {
		$result = false;
		if ($token->id == $id) {
			if ($text) {
				if ($token->text == $text) {
					$result = true;
				} else {
					$result = false;
				}
			} else {
				$result = true;
			}

		}
		return $result;
	}

	/**
	 * Check if the previous valid token (ignoring whitespace) correspond to the specified token.
	 *
	 * @param Integer $id the token ID we're looking for
	 * @param String $text (optional) the text we're looking for
	 * @return Boolean true if the token is found
	 */
	public function checkPreviousValidToken($id, $text = false) {
		$token = $this->peekPrvsValidToken();
		return $this->checkToken($token, $id, $text);
	}

	/**
	 * Check if the previous valid token (ignoring whitespace) correspond to the specified token.
	 *
	 * @param Integer $id the token ID we're looking for
	 * @param String $text (optional) the text we're looking for
	 * @return Boolean true if the token is found
	 */
	public function checkPreviousToken($id, $text = false) {
		$token = $this->peekPrvsToken();
		return $this->checkToken($token, $id, $text);
	}

	/**
	 * Check if a the next token exists (and if its value correspond to what is expected).
	 *
	 * @param Integer $id the token we're looking for
	 * @param String $text (optional) the text we're looking for
	 * @return Boolean true if the token is found
	 */
	public function checkNextToken($id, $text = false) {
		$token = $this->peekNextToken();
		return $this->checkToken($token, $id, $text);
	}

	/**
	 * Check if a the next token exists (and if its value correspond to what is expected).
	 *
	 * @param Integer $id the token we're looking for
	 * @param String $text (optional) the text we're looking for
	 * @return Boolean true if the token is found
	 */
	public function checkCurrentToken($id, $text = false) {
		$token = $this->getCurrentToken();
		return $this->checkToken($token, $id, $text);
	}

	/**
	 * Find the next position of the string.
	 *
	 * @param String $text the text we're looking for
	 * @param Integer $apos the position to start from (by defaut will use current position)
	 * @return Integer the position, null if not found
	 */
	public function findNextStringPosition($text, $apos = null) {

		if ($apos == null) {
			$pos = $this->getCurrentPosition();
		} else {
			$pos = $apos;
		}
		$pos += 1; // Start from the token following the current position

		$nbTokens = count($this->tokens);
		while  ($pos < $nbTokens) {
			$token = $this->tokens[$pos];

			if ($text == $token->text) {
				return $pos;
			}

			$pos++;
		}

		return null;
	}

	/**
	 * Check if the next valid token (ignoring whitespaces) correspond to the specified token.
	 *
	 * @param Integer $id the id of the token we're looking for
	 * @param String $text the text we're looking for
	 * @param Integer $startPos the start position
	 * @return true if the token is found
	 */
	public function checkNextValidToken($id, $text = false, $startPos = null) {

		$tokenInfo = $this->peekNextValidToken($startPos);

		if ($tokenInfo != null) {
			return $this->checkToken($tokenInfo, $id, $text);
		} else {
			return false;
		}
	}

	/**
	 * Tokenize a string and separate the newline token.
	 *
	 * Found here : http://php.net/manual/function.token-get-all.php
	 *
	 * @param String $source The source code to analyse
	 * @return an array of tokens
	 */
	private function _getAllTokens($source) {
		$newTokens = array();

		$lineNumber = 1;
		$position = 0;

		// Get the tokens
		$tokens = token_get_all($source);

		// Split newlines into their own tokens
		foreach ($tokens as $token) {

			$tokenID = is_array($token) ? $token[0] : T_UNKNOWN;
			$tokenText = is_array($token) ? $token[1] : $token;

			// Split the data up by newlines
			// To correctly handle T_NEW_LINE inside comments and HTML
			$splitData = preg_split('#(\r\n|\n|\r)#', $tokenText, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
			foreach ($splitData as $data) {

				$tokenInfo = new TokenInfo();
				$tokenInfo->text = $data;
				$tokenInfo->position = $position;
				$tokenInfo->line = $lineNumber;

				if ($data == "\r\n" || $data == "\n" || $data == "\r") {
					// This is a new line token
					$tokenInfo->id = T_NEW_LINE;
					$lineNumber++;
				} else if ($data == "\t") {
					// This is a tab token
					$tokenInfo->id = T_TAB;
				} else {
					// Any other token
					$tokenInfo->id = $tokenID;
				}

				$position++;

				// Added detections
				if ($tokenInfo->id == T_UNKNOWN) {
					switch ($tokenInfo->text) {
						case ";" :
							$tokenInfo->id = T_SEMICOLON;
							break;
						case "{":
							$tokenInfo->id = T_BRACES_OPEN;
							break;
						case "}":
							$tokenInfo->id = T_BRACES_CLOSE;
							break;
						case "(":
							$tokenInfo->id = T_PARENTHESIS_OPEN;
							break;
						case ")":
							$tokenInfo->id = T_PARENTHESIS_CLOSE;
							break;
						case ",":
							$tokenInfo->id = T_COMMA;
							break;
						case "=":
							$tokenInfo->id = T_EQUAL;
							break;
						case ".":
							$tokenInfo->id = T_CONCAT;
							break;
						case ":" :
							$tokenInfo->id = T_COLON;
							break;
						case "-" :
							$tokenInfo->id = T_MINUS;
							break;
						case "+" :
							$tokenInfo->id = T_PLUS;
							break;
						case ">" :
							$tokenInfo->id = T_IS_GREATER;
							break;
						case "<" :
							$tokenInfo->id = T_IS_SMALLER;
							break;
						case "*" :
							$tokenInfo->id = T_MULTIPLY;
							break;
						case "/" :
							$tokenInfo->id = T_DIVIDE;
							break;
						case "?" :
							$tokenInfo->id = T_QUESTION_MARK;
							break;
						case "%" :
							$tokenInfo->id = T_MODULO;
							break;
						case "!" :
							$tokenInfo->id = T_EXCLAMATION_MARK;
							break;
						case "&" :
							$tokenInfo->id = T_AMPERSAND;
							break;
						case "[" :
							$tokenInfo->id = T_SQUARE_BRACKET_OPEN;
							break;
						case "]" :
							$tokenInfo->id = T_SQUARE_BRACKET_CLOSE;
							break;
						case "@" :
							$tokenInfo->id = T_AROBAS;
							break;
						default:
					}
				}

				$newTokens[] = $tokenInfo;
			}
		}

		return $newTokens;
	}


	/**
	 * Find the position of the closing parenthesis corresponding to the current position opening parenthesis.
	 *
	 * @param Integer $startPos
	 * @return Integer $closing position
	 */
	public function findClosingParenthesisPosition($startPos) {

		// Find the opening parenthesis after current position
		$pos = $this->findNextStringPosition('(', $startPos);
		$parenthesisCount = 1;

		$pos += 1; // Skip the opening  parenthesis

		$nbTokens = count($this->tokens);
		while ($parenthesisCount > 0 && $pos < $nbTokens) {
			// Look for the next token
			$token = $this->peekTokenAt($pos);

			// Increment or decrement the parenthesis count
			if ($token->id == T_PARENTHESIS_OPEN) {
				$parenthesisCount += 1;
			} else if ($token->id == T_PARENTHESIS_CLOSE) {
				$parenthesisCount -= 1;
			}

			// Increment the position
			$pos += 1;
		}

		return $pos - 1;
	}

	/**
	 * Checks if a token is in the type of token list.
	 *
	 * @param TokenInfo $tokenToCheck 	the token to check.
	 * @param Array[Integer] $tokenList 		an array of token ids, e.g. T_NEW_LINE, T_DOC_COMMENT, etc.
	 * @return Boolean true if the token is found, false if it is not.
	 */
	public function isTokenInList($tokenToCheck, $tokenList) {
		foreach ($tokenList as $tokenInList) {
			if ($this->checkToken($tokenToCheck, $tokenInList)) {
				return true;
			}
		}
		return false;
	}


}