diff options
author | Jeremy Dorn <jeremy@jeremydorn.com> | 2013-07-11 06:48:17 -0700 |
---|---|---|
committer | Jeremy Dorn <jeremy@jeremydorn.com> | 2013-07-11 06:48:17 -0700 |
commit | e1aa999f64b222bdd990be6408f832c616ac57cf (patch) | |
tree | 3bdebc81d1d207d1d0ac5b0225afca12489a80fc | |
parent | 7dc6794a53571d5ccbe10716d20e2e3a7101c00c (diff) | |
download | sql-formatter-e1aa999f64b222bdd990be6408f832c616ac57cf.zip sql-formatter-e1aa999f64b222bdd990be6408f832c616ac57cf.tar.gz sql-formatter-e1aa999f64b222bdd990be6408f832c616ac57cf.tar.bz2 |
Formatting and naming changes in order to merge @adityagoyal pull request.
-rw-r--r-- | examples/examples.php | 4 | ||||
-rw-r--r-- | lib/SqlFormatter.php | 431 |
2 files changed, 195 insertions, 240 deletions
diff --git a/examples/examples.php b/examples/examples.php index 97771ad..f54cfb0 100644 --- a/examples/examples.php +++ b/examples/examples.php @@ -43,15 +43,13 @@ $statements = array( DATE_FORMAT((DATE_SUB(NOW(),INTERVAL 1 DAY)),'%Y-%c-%d') AND t_create < DATE_FORMAT(NOW(), '%Y-%c-%d') ORDER BY d.id LIMIT 2,10) a, orc_scheme_detail b WHERE a.id = b.id", - - "SELECT * from Table1 LEFT OUTER JOIN Table2 on Table1.id = Table2.id", "SELECT * from Table1 LEFT OUTER JOIN Table2 on Table1.id = Table2.id", "SELECT * FROM MyTable WHERE id = 46", - "SELECT count(*),`Column1`,`Testing`, `Testing Three` FROM `Table1` + "SELECT count(*),`Column1` as count,`Testing`, `Testing Three` FROM `Table1` WHERE Column1 = 'testing' AND ( (`Column2` = `Column3` OR Column4 >= NOW()) ) GROUP BY Column1 ORDER BY Column3 DESC LIMIT 5,10", diff --git a/lib/SqlFormatter.php b/lib/SqlFormatter.php index 5908003..cbd23f3 100644 --- a/lib/SqlFormatter.php +++ b/lib/SqlFormatter.php @@ -26,12 +26,11 @@ class SqlFormatter const TOKEN_TYPE_BLOCK_COMMENT = 9; const TOKEN_TYPE_NUMBER = 10; const TOKEN_TYPE_ERROR = 11; - const TOKEN_TYPE_FUNCTION = 12; - + // Constants for different components of a token const TOKEN_TYPE = 0; const TOKEN_VALUE = 1; - + // Reserved words (for syntax highlighting) protected static $reserved = array( 'ACCESSIBLE', 'ACTION', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AS', 'ASC', @@ -47,7 +46,7 @@ class SqlFormatter 'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE', 'MATCH','MAX_CONNECTIONS_PER_HOUR', 'MAX_QUERIES_PER_HOUR', 'MAX_ROWS', 'MAX_UPDATES_PER_HOUR', 'MAX_USER_CONNECTIONS', 'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY', - 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY', + 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW()','NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY', 'ON UPDATE', 'ON DELETE', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE', 'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RANGE', 'RAID0', 'RAID_CHUNKS', 'RAID_CHUNKSIZE','RAID_TYPE', 'READ', 'READ_ONLY', 'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT', @@ -72,35 +71,35 @@ class SqlFormatter 'LEFT OUTER JOIN', 'RIGHT OUTER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'OUTER JOIN', 'INNER JOIN', 'JOIN', 'XOR', 'OR', 'AND' ); - protected static $mysql_function = array ( - 'ABS', 'ACOS', 'ADDDATE', 'ADDTIME', 'AES_DECRYPT', 'AES_ENCRYPT', 'AREA', 'ASBINARY', 'ASCII', 'ASIN', 'ASTEXT', 'ATAN', 'ATAN2', - 'AVG', 'BDMPOLYFROMTEXT', 'BDMPOLYFROMWKB', 'BDPOLYFROMTEXT', 'BDPOLYFROMWKB', 'BENCHMARK', 'BIN', 'BIT_AND', 'BIT_COUNT', 'BIT_LENGTH', - 'BIT_OR', 'BIT_XOR', 'BOUNDARY', 'BUFFER', 'CAST', 'CEIL', 'CEILING', 'CENTROID', 'CHAR', 'CHARACTER_LENGTH', 'CHARSET', 'CHAR_LENGTH', - 'COALESCE', 'COERCIBILITY', 'COLLATION', 'COMPRESS', 'CONCAT', 'CONCAT_WS', 'CONNECTION_ID', 'CONTAINS', 'CONV', 'CONVERT', 'CONVERT_TZ', - 'CONVEXHULL', 'COS', 'COT', 'COUNT', 'CRC32', 'CROSSES', 'CURDATE', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', - 'CURTIME', 'DATABASE', 'DATE', 'DATEDIFF', 'DATE_ADD', 'DATE_DIFF', 'DATE_FORMAT', 'DATE_SUB', 'DAY', 'DAYNAME', 'DAYOFMONTH', 'DAYOFWEEK', - 'DAYOFYEAR', 'DECODE', 'DEFAULT', 'DEGREES', 'DES_DECRYPT', 'DES_ENCRYPT', 'DIFFERENCE', 'DIMENSION', 'DISJOINT', 'DISTANCE', 'ELT', 'ENCODE', - 'ENCRYPT', 'ENDPOINT', 'ENVELOPE', 'EQUALS', 'EXP', 'EXPORT_SET', 'EXTERIORRING', 'EXTRACT', 'EXTRACTVALUE', 'FIELD', 'FIND_IN_SET', 'FLOOR', - 'FORMAT', 'FOUND_ROWS', 'FROM_DAYS', 'FROM_UNIXTIME', 'GEOMCOLLFROMTEXT', 'GEOMCOLLFROMWKB', 'GEOMETRYCOLLECTION', 'GEOMETRYCOLLECTIONFROMTEXT', - 'GEOMETRYCOLLECTIONFROMWKB', 'GEOMETRYFROMTEXT', 'GEOMETRYFROMWKB', 'GEOMETRYN', 'GEOMETRYTYPE', 'GEOMFROMTEXT', 'GEOMFROMWKB', 'GET_FORMAT', - 'GET_LOCK', 'GLENGTH', 'GREATEST', 'GROUP_CONCAT', 'GROUP_UNIQUE_USERS', 'HEX', 'HOUR', 'IF', 'IFNULL', 'INET_ATON', 'INET_NTOA', 'INSERT', 'INSTR', - 'INTERIORRINGN', 'INTERSECTION', 'INTERSECTS', 'INTERVAL', 'ISCLOSED', 'ISEMPTY', 'ISNULL', 'ISRING', 'ISSIMPLE', 'IS_FREE_LOCK', 'IS_USED_LOCK', - 'LAST_DAY', 'LAST_INSERT_ID', 'LCASE', 'LEAST', 'LEFT', 'LENGTH', 'LINEFROMTEXT', 'LINEFROMWKB', 'LINESTRING', 'LINESTRINGFROMTEXT', 'LINESTRINGFROMWKB', - 'LN', 'LOAD_FILE', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATE', 'LOG', 'LOG10', 'LOG2', 'LOWER', 'LPAD', 'LTRIM', 'MAKEDATE', 'MAKETIME', 'MAKE_SET', - 'MASTER_POS_WAIT', 'MAX', 'MBRCONTAINS', 'MBRDISJOINT', 'MBREQUAL', 'MBRINTERSECTS', 'MBROVERLAPS', 'MBRTOUCHES', 'MBRWITHIN', 'MD5', 'MICROSECOND', - 'MID', 'MIN', 'MINUTE', 'MLINEFROMTEXT', 'MLINEFROMWKB', 'MOD', 'MONTH', 'MONTHNAME', 'MPOINTFROMTEXT', 'MPOINTFROMWKB', 'MPOLYFROMTEXT', 'MPOLYFROMWKB', - 'MULTILINESTRING', 'MULTILINESTRINGFROMTEXT', 'MULTILINESTRINGFROMWKB', 'MULTIPOINT', 'MULTIPOINTFROMTEXT', 'MULTIPOINTFROMWKB', 'MULTIPOLYGON', - 'MULTIPOLYGONFROMTEXT', 'MULTIPOLYGONFROMWKB', 'NAME_CONST', 'NOW', 'NULLIF', 'NUMGEOMETRIES', 'NUMINTERIORRINGS', 'NUMPOINTS', 'OCT', 'OCTET_LENGTH', - 'OLD_PASSWORD', 'ORD', 'OVERLAPS', 'PASSWORD', 'PERIOD_ADD', 'PERIOD_DIFF', 'PI', 'POINT', 'POINTFROMTEXT', 'POINTFROMWKB', 'POINTN', 'POINTONSURFACE', - 'POLYFROMTEXT', 'POLYFROMWKB', 'POLYGON', 'POLYGONFROMTEXT', 'POLYGONFROMWKB', 'POSITION', 'POW', 'POWER', 'QUARTER', 'QUOTE', 'RADIANS', 'RAND', - 'RELATED', 'RELEASE_LOCK', 'REPEAT', 'REPLACE', 'REVERSE', 'RIGHT', 'ROUND', 'ROW_COUNT', 'RPAD', 'RTRIM', 'SCHEMA', 'SECOND', 'SEC_TO_TIME', - 'SESSION_USER', 'SHA', 'SHA1', 'SIGN', 'SIN', 'SLEEP', 'SOUNDEX', 'SPACE', 'SQRT', 'SRID', 'STARTPOINT', 'STD', 'STDDEV', 'STDDEV_POP', 'STDDEV_SAMP', - 'STRCMP', 'STR_TO_DATE', 'SUBDATE', 'SUBSTR', 'SUBSTRING', 'SUBSTRING_INDEX', 'SUBTIME', 'SUM', 'SYMDIFFERENCE', 'SYSDATE', 'SYSTEM_USER', 'TAN', - 'TIME', 'TIMEDIFF', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TIME_FORMAT', 'TIME_TO_SEC', 'TOUCHES', 'TO_DAYS', 'TRIM', 'TRUNCATE', 'UCASE', - 'UNCOMPRESS', 'UNCOMPRESSED_LENGTH', 'UNHEX', 'UNIQUE_USERS', 'UNIX_TIMESTAMP', 'UPDATEXML', 'UPPER', 'USER', 'UTC_DATE', 'UTC_TIME', 'UTC_TIMESTAMP', - 'UUID', 'VARIANCE', 'VAR_POP', 'VAR_SAMP', 'VERSION', 'WEEK', 'WEEKDAY', 'WEEKOFYEAR', 'WITHIN', 'X', 'Y', 'YEAR', 'YEARWEEK' -); - + protected static $functions = array ( + 'ABS', 'ACOS', 'ADDDATE', 'ADDTIME', 'AES_DECRYPT', 'AES_ENCRYPT', 'AREA', 'ASBINARY', 'ASCII', 'ASIN', 'ASTEXT', 'ATAN', 'ATAN2', + 'AVG', 'BDMPOLYFROMTEXT', 'BDMPOLYFROMWKB', 'BDPOLYFROMTEXT', 'BDPOLYFROMWKB', 'BENCHMARK', 'BIN', 'BIT_AND', 'BIT_COUNT', 'BIT_LENGTH', + 'BIT_OR', 'BIT_XOR', 'BOUNDARY', 'BUFFER', 'CAST', 'CEIL', 'CEILING', 'CENTROID', 'CHAR', 'CHARACTER_LENGTH', 'CHARSET', 'CHAR_LENGTH', + 'COALESCE', 'COERCIBILITY', 'COLLATION', 'COMPRESS', 'CONCAT', 'CONCAT_WS', 'CONNECTION_ID', 'CONTAINS', 'CONV', 'CONVERT', 'CONVERT_TZ', + 'CONVEXHULL', 'COS', 'COT', 'COUNT', 'CRC32', 'CROSSES', 'CURDATE', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', + 'CURTIME', 'DATABASE', 'DATE', 'DATEDIFF', 'DATE_ADD', 'DATE_DIFF', 'DATE_FORMAT', 'DATE_SUB', 'DAY', 'DAYNAME', 'DAYOFMONTH', 'DAYOFWEEK', + 'DAYOFYEAR', 'DECODE', 'DEFAULT', 'DEGREES', 'DES_DECRYPT', 'DES_ENCRYPT', 'DIFFERENCE', 'DIMENSION', 'DISJOINT', 'DISTANCE', 'ELT', 'ENCODE', + 'ENCRYPT', 'ENDPOINT', 'ENVELOPE', 'EQUALS', 'EXP', 'EXPORT_SET', 'EXTERIORRING', 'EXTRACT', 'EXTRACTVALUE', 'FIELD', 'FIND_IN_SET', 'FLOOR', + 'FORMAT', 'FOUND_ROWS', 'FROM_DAYS', 'FROM_UNIXTIME', 'GEOMCOLLFROMTEXT', 'GEOMCOLLFROMWKB', 'GEOMETRYCOLLECTION', 'GEOMETRYCOLLECTIONFROMTEXT', + 'GEOMETRYCOLLECTIONFROMWKB', 'GEOMETRYFROMTEXT', 'GEOMETRYFROMWKB', 'GEOMETRYN', 'GEOMETRYTYPE', 'GEOMFROMTEXT', 'GEOMFROMWKB', 'GET_FORMAT', + 'GET_LOCK', 'GLENGTH', 'GREATEST', 'GROUP_CONCAT', 'GROUP_UNIQUE_USERS', 'HEX', 'HOUR', 'IF', 'IFNULL', 'INET_ATON', 'INET_NTOA', 'INSERT', 'INSTR', + 'INTERIORRINGN', 'INTERSECTION', 'INTERSECTS', 'INTERVAL', 'ISCLOSED', 'ISEMPTY', 'ISNULL', 'ISRING', 'ISSIMPLE', 'IS_FREE_LOCK', 'IS_USED_LOCK', + 'LAST_DAY', 'LAST_INSERT_ID', 'LCASE', 'LEAST', 'LEFT', 'LENGTH', 'LINEFROMTEXT', 'LINEFROMWKB', 'LINESTRING', 'LINESTRINGFROMTEXT', 'LINESTRINGFROMWKB', + 'LN', 'LOAD_FILE', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATE', 'LOG', 'LOG10', 'LOG2', 'LOWER', 'LPAD', 'LTRIM', 'MAKEDATE', 'MAKETIME', 'MAKE_SET', + 'MASTER_POS_WAIT', 'MAX', 'MBRCONTAINS', 'MBRDISJOINT', 'MBREQUAL', 'MBRINTERSECTS', 'MBROVERLAPS', 'MBRTOUCHES', 'MBRWITHIN', 'MD5', 'MICROSECOND', + 'MID', 'MIN', 'MINUTE', 'MLINEFROMTEXT', 'MLINEFROMWKB', 'MOD', 'MONTH', 'MONTHNAME', 'MPOINTFROMTEXT', 'MPOINTFROMWKB', 'MPOLYFROMTEXT', 'MPOLYFROMWKB', + 'MULTILINESTRING', 'MULTILINESTRINGFROMTEXT', 'MULTILINESTRINGFROMWKB', 'MULTIPOINT', 'MULTIPOINTFROMTEXT', 'MULTIPOINTFROMWKB', 'MULTIPOLYGON', + 'MULTIPOLYGONFROMTEXT', 'MULTIPOLYGONFROMWKB', 'NAME_CONST', 'NULLIF', 'NUMGEOMETRIES', 'NUMINTERIORRINGS', 'NUMPOINTS', 'OCT', 'OCTET_LENGTH', + 'OLD_PASSWORD', 'ORD', 'OVERLAPS', 'PASSWORD', 'PERIOD_ADD', 'PERIOD_DIFF', 'PI', 'POINT', 'POINTFROMTEXT', 'POINTFROMWKB', 'POINTN', 'POINTONSURFACE', + 'POLYFROMTEXT', 'POLYFROMWKB', 'POLYGON', 'POLYGONFROMTEXT', 'POLYGONFROMWKB', 'POSITION', 'POW', 'POWER', 'QUARTER', 'QUOTE', 'RADIANS', 'RAND', + 'RELATED', 'RELEASE_LOCK', 'REPEAT', 'REPLACE', 'REVERSE', 'RIGHT', 'ROUND', 'ROW_COUNT', 'RPAD', 'RTRIM', 'SCHEMA', 'SECOND', 'SEC_TO_TIME', + 'SESSION_USER', 'SHA', 'SHA1', 'SIGN', 'SIN', 'SLEEP', 'SOUNDEX', 'SPACE', 'SQRT', 'SRID', 'STARTPOINT', 'STD', 'STDDEV', 'STDDEV_POP', 'STDDEV_SAMP', + 'STRCMP', 'STR_TO_DATE', 'SUBDATE', 'SUBSTR', 'SUBSTRING', 'SUBSTRING_INDEX', 'SUBTIME', 'SUM', 'SYMDIFFERENCE', 'SYSDATE', 'SYSTEM_USER', 'TAN', + 'TIME', 'TIMEDIFF', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TIME_FORMAT', 'TIME_TO_SEC', 'TOUCHES', 'TO_DAYS', 'TRIM', 'TRUNCATE', 'UCASE', + 'UNCOMPRESS', 'UNCOMPRESSED_LENGTH', 'UNHEX', 'UNIQUE_USERS', 'UNIX_TIMESTAMP', 'UPDATEXML', 'UPPER', 'USER', 'UTC_DATE', 'UTC_TIME', 'UTC_TIMESTAMP', + 'UUID', 'VARIANCE', 'VAR_POP', 'VAR_SAMP', 'VERSION', 'WEEK', 'WEEKDAY', 'WEEKOFYEAR', 'WITHIN', 'X', 'Y', 'YEAR', 'YEARWEEK' + ); + // Punctuation that can be used as a boundary between other tokens protected static $boundaries = array(',', ';', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&', '#'); @@ -115,13 +114,12 @@ class SqlFormatter public static $error_attributes = 'style="background-color: red;"'; public static $comment_attributes = 'style="color: #aaa;"'; public static $pre_attributes = 'style="color: black; background-color: white;"'; - public static $mysql_functions = 'style="color: red;"'; - + // Boolean - whether or not the current environment is the CLI // This affects the type of syntax highlighting // If not defined, it will be determined automatically public static $cli; - + // For CLI syntax highlighting public static $cli_quote = "\x1b[34;1m"; public static $cli_backtick_quote = "\x1b[35;1m"; @@ -138,16 +136,16 @@ class SqlFormatter // This flag tells us if queries need to be enclosed in <pre> tags public static $use_pre = true; - + // This flag tells us if SqlFormatted has been initialized protected static $init; - + // Regular expressions for tokenizing protected static $regex_boundaries; protected static $regex_reserved; protected static $regex_reserved_newline; protected static $regex_reserved_toplevel; - protected static $regex_mysql_function; + protected static $regex_function; // Cache variables // Only tokens shorter than this size will be cached. Somewhere between 10 and 20 seems to work well for most cases. @@ -155,12 +153,13 @@ class SqlFormatter protected static $token_cache = array(); protected static $cache_hits = 0; protected static $cache_misses = 0; - + /** * Get stats about the token cache * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes */ - public static function getCacheStats() { + public static function getCacheStats() + { return array( 'hits'=>self::$cache_hits, 'misses'=>self::$cache_misses, @@ -168,13 +167,14 @@ class SqlFormatter 'size'=>strlen(serialize(self::$token_cache)) ); } - + /** * Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words. */ - protected static function init() { + protected static function init() + { if(self::$init) return; - + // Sort reserved word list from longest word to shortest usort(self::$reserved, array('SqlFormatter', 'sortLength')); @@ -184,17 +184,17 @@ class SqlFormatter self::$regex_reserved_toplevel = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved_toplevel)).')'); self::$regex_reserved_newline = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved_newline)).')'); - self::$regex_mysql_function = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$mysql_function)).')'; - + self::$regex_function = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$functions)).')'; + self::$init = true; } - + /** * Return the next token and token type in a SQL string. * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens. * - * @param String $string The SQL string - * @param array $previous The result of the previous getNextToken() call + * @param String $string The SQL string + * @param array $previous The result of the previous getNextToken() call * * @return Array An associative array containing the type and value of the token. */ @@ -207,7 +207,7 @@ class SqlFormatter self::TOKEN_TYPE=>self::TOKEN_TYPE_WHITESPACE ); } - + // Comment if ($string[0] === '#' || (isset($string[1])&&($string[0]==='-'&&$string[1]==='-') || ($string[0]==='/'&&$string[1]==='*'))) { // Comment until end of line @@ -230,25 +230,25 @@ class SqlFormatter } // Quoted String - if($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') { + if ($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') { $return = array( self::TOKEN_TYPE => ($string[0]==='`'? self::TOKEN_TYPE_BACKTICK_QUOTE : self::TOKEN_TYPE_QUOTE), self::TOKEN_VALUE => $string ); - + // This checks for the following patterns: // 1. backtick quoted string using `` to escape // 2. double quoted string using "" or \" to escape // 3. single quoted string using '' or \' to escape - if( preg_match('/^(((`[^`]*($|`))+)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s', $string, $matches)) { + if ( preg_match('/^(((`[^`]*($|`))+)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s', $string, $matches)) { $return[self::TOKEN_VALUE] = $matches[1]; } - + return $return; } - + // Number - if(preg_match('/^([0-9]+(\.[0-9]+)?)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) { + if (preg_match('/^([0-9]+(\.[0-9]+)?)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) { return array( self::TOKEN_VALUE => $matches[1], self::TOKEN_TYPE=>self::TOKEN_TYPE_NUMBER @@ -256,7 +256,7 @@ class SqlFormatter } // Boundary Character (punctuation and symbols) - if(preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) { + if (preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) { return array( self::TOKEN_VALUE => $matches[1], self::TOKEN_TYPE => self::TOKEN_TYPE_BOUNDARY @@ -268,21 +268,21 @@ class SqlFormatter if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') { $upper = strtoupper($string); // Top Level Reserved Word - if(preg_match('/^('.self::$regex_reserved_toplevel.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { + if (preg_match('/^('.self::$regex_reserved_toplevel.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { return array( self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_TOPLEVEL, self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])) ); } // Newline Reserved Word - if(preg_match('/^('.self::$regex_reserved_newline.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { + if (preg_match('/^('.self::$regex_reserved_newline.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { return array( self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_NEWLINE, self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])) ); } // Other Reserved Word - if(preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { + if (preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { return array( self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED, self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])) @@ -291,19 +291,16 @@ class SqlFormatter } // A function must be suceeded by '(' - // this makes it so in "count(", which considers it as a mysql function. - - $upper = strtoupper($string); - // MySQL function - if(preg_match('/^('.self::$regex_mysql_function.'[(]|\s|[)])/', $upper,$matches)) { - - return array( - self::TOKEN_TYPE=>self::TOKEN_TYPE_FUNCTION, - self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])-1) - ); - } - - + // this makes it so "count(" is considered a function, but "count" alone is not + $upper = strtoupper($string); + // function + if (preg_match('/^('.self::$regex_function.'[(]|\s|[)])/', $upper,$matches)) { + return array( + self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED, + self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])-1) + ); + } + // Non reserved word preg_match('/^(.*?)($|\s|["\'`]|'.self::$regex_boundaries.')/',$string,$matches); @@ -324,7 +321,7 @@ class SqlFormatter protected static function tokenize($string) { self::init(); - + $tokens = array(); // Used for debugging if there is an error while tokenizing the string @@ -334,7 +331,7 @@ class SqlFormatter $old_string_len = strlen($string) + 1; $token = null; - + $current_length = strlen($string); // Keep processing the string until it is empty @@ -345,49 +342,47 @@ class SqlFormatter self::TOKEN_VALUE=>$string, self::TOKEN_TYPE=>self::TOKEN_TYPE_ERROR ); - + return $tokens; } $old_string_len = $current_length; // Determine if we can use caching - if($current_length >= self::$max_cachekey_size) { + if ($current_length >= self::$max_cachekey_size) { $cacheKey = substr($string,0,self::$max_cachekey_size); - } - else { + } else { $cacheKey = false; } // See if the token is already cached - if($cacheKey && isset(self::$token_cache[$cacheKey])) { + if ($cacheKey && isset(self::$token_cache[$cacheKey])) { // Retrieve from cache $token = self::$token_cache[$cacheKey]; $token_length = strlen($token[self::TOKEN_VALUE]); self::$cache_hits++; - } - else { + } else { // Get the next token and the token type - $token = self::getNextToken($string, $token); + $token = self::getNextToken($string, $token); $token_length = strlen($token[self::TOKEN_VALUE]); self::$cache_misses++; - + // If the token is shorter than the max length, store it in cache - if($cacheKey && $token_length < self::$max_cachekey_size) { + if ($cacheKey && $token_length < self::$max_cachekey_size) { self::$token_cache[$cacheKey] = $token; } } - + $tokens[] = $token; // Advance the string $string = substr($string, $token_length); - + $current_length -= $token_length; } return $tokens; } - + /** * Format the whitespace in a SQL string to make it easier to read. * @@ -396,7 +391,8 @@ class SqlFormatter * * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag */ - public static function format($string, $highlight=true) { + public static function format($string, $highlight=true) + { // This variable will be populated with formatted html $return = ''; @@ -412,10 +408,10 @@ class SqlFormatter $added_newline = false; $inline_count = 0; $inline_indented = false; - + // Tokenize String $original_tokens = self::tokenize($string); - + // Remove existing whitespace $tokens = array(); foreach ($original_tokens as $i=>$token) { @@ -426,7 +422,7 @@ class SqlFormatter } // Format token by token - foreach ($tokens as $i=>$token) { + foreach ($tokens as $i=>$token) { // Get highlighted token if doing syntax highlighting if ($highlight) { $highlighted = self::highlightToken($token); @@ -435,28 +431,27 @@ class SqlFormatter } // If we are increasing the special indent level now - if($increase_special_indent) { + if ($increase_special_indent) { $indent_level++; $increase_special_indent = false; array_unshift($indent_types,'special'); } // If we are increasing the block indent level now - if($increase_block_indent) { + if ($increase_block_indent) { $indent_level++; $increase_block_indent = false; array_unshift($indent_types,'block'); } - + // If we need a new line before the token if ($newline) { $return .= "\n" . str_repeat($tab, $indent_level); $newline = false; $added_newline = true; - } - else { + } else { $added_newline = false; } - + // Display comments directly where they appear in the source if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) { if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) { @@ -469,31 +464,31 @@ class SqlFormatter $newline = true; continue; } - - if($inline_parentheses) { + + if ($inline_parentheses) { // End of inline parentheses - if($token[self::TOKEN_VALUE] === ')') { + if ($token[self::TOKEN_VALUE] === ')') { $return = rtrim($return,' '); - - if($inline_indented) { + + if ($inline_indented) { array_shift($indent_types); $indent_level --; $return .= "\n" . str_repeat($tab, $indent_level); } - + $inline_parentheses = false; - + $return .= $highlighted . ' '; continue; } - - if($token[self::TOKEN_VALUE] === ',') { - if($inline_count >= 30) { + + if ($token[self::TOKEN_VALUE] === ',') { + if ($inline_count >= 30) { $inline_count = 0; $newline = true; } } - + $inline_count += strlen($token[self::TOKEN_VALUE]); } @@ -503,105 +498,104 @@ class SqlFormatter // Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2) // Allow up to 3 non-whitespace tokens inside inline parentheses $length = 0; - for($j=1;$j<=250;$j++) { + for ($j=1;$j<=250;$j++) { // Reached end of string if(!isset($tokens[$i+$j])) break; - + $next = $tokens[$i+$j]; - + // Reached closing parentheses, able to inline it - if($next[self::TOKEN_VALUE] === ')') { + if ($next[self::TOKEN_VALUE] === ')') { $inline_parentheses = true; $inline_count = 0; $inline_indented = false; break; } - + // Reached an invalid token for inline parentheses if ($next[self::TOKEN_VALUE]===';' || $next[self::TOKEN_VALUE]==='(') { break; } - + // Reached an invalid token type for inline parentheses if ($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_TOPLEVEL || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_NEWLINE || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_BLOCK_COMMENT) { break; } - + $length += strlen($next[self::TOKEN_VALUE]); } - - if($inline_parentheses && $length > 30) { + + if ($inline_parentheses && $length > 30) { $increase_block_indent = true; $inline_indented = true; $newline = true; } - + // Take out the preceding space unless there was whitespace there in the original query if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) { $return = rtrim($return,' '); } - - if(!$inline_parentheses) { + + if (!$inline_parentheses) { $increase_block_indent = true; // Add a newline after the parentheses $newline = true; } - + } - + // Closing parentheses decrease the block indent level elseif ($token[self::TOKEN_VALUE] === ')') { // Remove whitespace before the closing parentheses $return = rtrim($return,' '); - + $indent_level--; - + // Reset indent level - while($j=array_shift($indent_types)) { - if($j==='special') { + while ($j=array_shift($indent_types)) { + if ($j==='special') { $indent_level--; - } - else { + } else { break; } } - - if($indent_level < 0) { + + if ($indent_level < 0) { // This is an error $indent_level = 0; - + if ($highlight) { $return .= "\n".self::highlightError($token[self::TOKEN_VALUE]); continue; } } - + // Add a newline before the closing parentheses (if not already added) - if(!$added_newline) { + if (!$added_newline) { $return .= "\n" . str_repeat($tab, $indent_level); - } + } } - + // Commas start a new line (unless within inline parentheses) elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) { $newline = true; } - + // Top level reserved words start a new line and increase the special indent level elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) { $increase_special_indent = true; - + // If the last indent type was 'special', decrease the special indent for this round reset($indent_types); - if(current($indent_types)==='special') { + if (current($indent_types)==='special') { $indent_level--; array_shift($indent_types); } - + // Add a newline after the top level reserved word $newline = true; // Add a newline before the top level reserved word (if not already added) - if(!$added_newline) { + if (!$added_newline) { $return .= "\n" . str_repeat($tab, $indent_level); } // If we already added a newline, redo the indentation since it may be different now @@ -618,7 +612,7 @@ class SqlFormatter // Newline reserved words start a new line elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE) { // Add a newline before the reserved word (if not already added) - if(!$added_newline) { + if (!$added_newline) { $return .= "\n" . str_repeat($tab, $indent_level); } @@ -627,10 +621,10 @@ class SqlFormatter $highlighted = preg_replace('/\s+/',' ',$highlighted); } } - + // Multiple boundary characters in a row should not have spaces between them (not including parentheses) - elseif($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) { - if($tokens[$i-1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) { + elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) { + if ($tokens[$i-1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) { if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) { $return = rtrim($return,' '); } @@ -641,7 +635,7 @@ class SqlFormatter if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') { $return = rtrim($return, ' '); } - + $return .= $highlighted.' '; // If the token shouldn't have a space after it @@ -654,7 +648,7 @@ class SqlFormatter if ($highlight && array_search('block',$indent_types) !== false) { $return .= "\n".self::highlightError("WARNING: unclosed parentheses or section"); } - + // Replace tab characters with the configuration tab character $return = trim(str_replace("\t",self::$tab,$return)); @@ -662,7 +656,7 @@ class SqlFormatter $return = self::output($return); } - return $return; + return $return; } /** @@ -700,8 +694,8 @@ class SqlFormatter $empty = true; $tokens = self::tokenize($string); - - foreach ($tokens as $token) { + + foreach ($tokens as $token) { // If this is a query separator if ($token[self::TOKEN_VALUE] === ';') { if (!$empty) { @@ -711,9 +705,9 @@ class SqlFormatter $empty = true; continue; } - + // If this is a non-empty character - if($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) { + if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) { $empty = false; } @@ -749,6 +743,7 @@ class SqlFormatter $result .= $token[self::TOKEN_VALUE]; } $result = self::format( $result,false); + return $result; } @@ -765,26 +760,23 @@ class SqlFormatter $tokens = self::tokenize($string); - $whitespace = true; foreach ($tokens as $token) { // Skip comment tokens if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) { continue; } - - if($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) { + + if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) { // If the last token was whitespace, don't add another one - if($whitespace) { + if ($whitespace) { continue; - } - else { + } else { $whitespace = true; // Convert all whitespace to a single space $token[self::TOKEN_VALUE] = ' '; } - } - else { + } else { $whitespace = false; } @@ -804,45 +796,33 @@ class SqlFormatter protected static function highlightToken($token) { $type = $token[self::TOKEN_TYPE]; - - if(self::is_cli()) { + + if (self::is_cli()) { $token = $token[self::TOKEN_VALUE]; - } - else { + } else { $token = htmlentities($token[self::TOKEN_VALUE],ENT_COMPAT,'UTF-8'); } - if($type===self::TOKEN_TYPE_BOUNDARY) { + if ($type===self::TOKEN_TYPE_BOUNDARY) { return self::highlightBoundary($token); - } - elseif($type===self::TOKEN_TYPE_WORD) { + } elseif ($type===self::TOKEN_TYPE_WORD) { return self::highlightWord($token); - } - elseif($type===self::TOKEN_TYPE_BACKTICK_QUOTE) { + } elseif ($type===self::TOKEN_TYPE_BACKTICK_QUOTE) { return self::highlightBacktickQuote($token); - } - elseif($type===self::TOKEN_TYPE_QUOTE) { + } elseif ($type===self::TOKEN_TYPE_QUOTE) { return self::highlightQuote($token); - } - elseif($type===self::TOKEN_TYPE_RESERVED) { + } elseif ($type===self::TOKEN_TYPE_RESERVED) { return self::highlightReservedWord($token); - } - elseif($type===self::TOKEN_TYPE_RESERVED_TOPLEVEL) { + } elseif ($type===self::TOKEN_TYPE_RESERVED_TOPLEVEL) { return self::highlightReservedWord($token); - } - elseif($type===self::TOKEN_TYPE_RESERVED_NEWLINE) { + } elseif ($type===self::TOKEN_TYPE_RESERVED_NEWLINE) { return self::highlightReservedWord($token); - } - elseif($type===self::TOKEN_TYPE_NUMBER) { + } elseif ($type===self::TOKEN_TYPE_NUMBER) { return self::highlightNumber($token); - } - elseif($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) { + } elseif ($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) { return self::highlightComment($token); } - elseif($type===self::TOKEN_TYPE_FUNCTION) { - return self::highlightMySQLFunction($token); - } - + return $token; } @@ -855,10 +835,9 @@ class SqlFormatter */ protected static function highlightQuote($value) { - if(self::is_cli()) { + if (self::is_cli()) { return self::$cli_quote . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$quote_attributes . '>' . $value . '</span>'; } } @@ -870,11 +849,11 @@ class SqlFormatter * * @return String HTML code of the highlighted token. */ - protected static function highlightBacktickQuote($value) { - if(self::is_cli()) { + protected static function highlightBacktickQuote($value) + { + if (self::is_cli()) { return self::$cli_backtick_quote . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>'; } } @@ -888,30 +867,12 @@ class SqlFormatter */ protected static function highlightReservedWord($value) { - if(self::is_cli()) { + if (self::is_cli()) { return self::$cli_reserved . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$reserved_attributes . '>' . $value . '</span>'; } } - - /** - * Highlights mysql functions - * - * @param String $value The token's value - * - * @return String HTML code of the highlighted token. - */ - protected static function highlightMySQLFunction($value) - { - if(self::is_cli()) { - return self::$cli_reserved . $value . "\x1b[0m";//not sure as doesn't have this environment for now.Need to check - } - else { - return '<span ' . self::$mysql_functions . '>' . $value . '</span>'; - } - } /** * Highlights a boundary token @@ -923,11 +884,10 @@ class SqlFormatter protected static function highlightBoundary($value) { if($value==='(' || $value===')') return $value; - - if(self::is_cli()) { + + if (self::is_cli()) { return self::$cli_boundary . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$boundary_attributes . '>' . $value . '</span>'; } } @@ -941,10 +901,9 @@ class SqlFormatter */ protected static function highlightNumber($value) { - if(self::is_cli()) { + if (self::is_cli()) { return self::$cli_number . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$number_attributes . '>' . $value . '</span>'; } } @@ -958,10 +917,9 @@ class SqlFormatter */ protected static function highlightError($value) { - if(self::is_cli()) { + if (self::is_cli()) { return self::$cli_error . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$error_attributes . '>' . $value . '</span>'; } } @@ -975,10 +933,9 @@ class SqlFormatter */ protected static function highlightComment($value) { - if(self::is_cli()) { + if (self::is_cli()) { return self::$cli_comment . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$comment_attributes . '>' . $value . '</span>'; } } @@ -992,10 +949,9 @@ class SqlFormatter */ protected static function highlightWord($value) { - if(self::is_cli()) { + if (self::is_cli()) { return self::$cli_word . $value . "\x1b[0m"; - } - else { + } else { return '<span ' . self::$word_attributes . '>' . $value . '</span>'; } } @@ -1012,7 +968,7 @@ class SqlFormatter { return strlen($b) - strlen($a); } - + /** * Helper function for building regular expressions for reserved words and boundary characters * @@ -1024,7 +980,7 @@ class SqlFormatter { return preg_quote($a,'/'); } - + /** * Helper function for building string output * @@ -1034,21 +990,22 @@ class SqlFormatter */ private static function output($string) { - if(self::is_cli()) { + if (self::is_cli()) { return $string."\n"; - } - else { + } else { $string=trim($string); - if(!self::$use_pre) { + if (!self::$use_pre) { return $string; } + return '<pre '.self::$pre_attributes.'>' . $string . '</pre>'; } } - - private static function is_cli() { + + private static function is_cli() + { if(isset(self::$cli)) return self::$cli; else return php_sapi_name() === 'cli'; } - + } |