summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Dorn <jeremy@jeremydorn.com>2013-07-11 06:48:17 -0700
committerJeremy Dorn <jeremy@jeremydorn.com>2013-07-11 06:48:17 -0700
commite1aa999f64b222bdd990be6408f832c616ac57cf (patch)
tree3bdebc81d1d207d1d0ac5b0225afca12489a80fc
parent7dc6794a53571d5ccbe10716d20e2e3a7101c00c (diff)
downloadsql-formatter-e1aa999f64b222bdd990be6408f832c616ac57cf.zip
sql-formatter-e1aa999f64b222bdd990be6408f832c616ac57cf.tar.gz
sql-formatter-e1aa999f64b222bdd990be6408f832c616ac57cf.tar.bz2
Formatting and naming changes in order to merge @adityagoyal pull request.
-rw-r--r--examples/examples.php4
-rw-r--r--lib/SqlFormatter.php431
2 files changed, 195 insertions, 240 deletions
diff --git a/examples/examples.php b/examples/examples.php
index 97771ad..f54cfb0 100644
--- a/examples/examples.php
+++ b/examples/examples.php
@@ -43,15 +43,13 @@ $statements = array(
DATE_FORMAT((DATE_SUB(NOW(),INTERVAL 1 DAY)),'%Y-%c-%d') AND t_create
< DATE_FORMAT(NOW(), '%Y-%c-%d') ORDER BY d.id LIMIT 2,10) a,
orc_scheme_detail b WHERE a.id = b.id",
-
- "SELECT * from Table1 LEFT OUTER JOIN Table2 on Table1.id = Table2.id",
"SELECT * from Table1 LEFT
OUTER JOIN Table2 on Table1.id = Table2.id",
"SELECT * FROM MyTable WHERE id = 46",
- "SELECT count(*),`Column1`,`Testing`, `Testing Three` FROM `Table1`
+ "SELECT count(*),`Column1` as count,`Testing`, `Testing Three` FROM `Table1`
WHERE Column1 = 'testing' AND ( (`Column2` = `Column3` OR Column4 >= NOW()) )
GROUP BY Column1 ORDER BY Column3 DESC LIMIT 5,10",
diff --git a/lib/SqlFormatter.php b/lib/SqlFormatter.php
index 5908003..cbd23f3 100644
--- a/lib/SqlFormatter.php
+++ b/lib/SqlFormatter.php
@@ -26,12 +26,11 @@ class SqlFormatter
const TOKEN_TYPE_BLOCK_COMMENT = 9;
const TOKEN_TYPE_NUMBER = 10;
const TOKEN_TYPE_ERROR = 11;
- const TOKEN_TYPE_FUNCTION = 12;
-
+
// Constants for different components of a token
const TOKEN_TYPE = 0;
const TOKEN_VALUE = 1;
-
+
// Reserved words (for syntax highlighting)
protected static $reserved = array(
'ACCESSIBLE', 'ACTION', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AS', 'ASC',
@@ -47,7 +46,7 @@ class SqlFormatter
'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE',
'MATCH','MAX_CONNECTIONS_PER_HOUR', 'MAX_QUERIES_PER_HOUR', 'MAX_ROWS', 'MAX_UPDATES_PER_HOUR', 'MAX_USER_CONNECTIONS',
'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY',
- 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY',
+ 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW()','NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY',
'ON UPDATE', 'ON DELETE', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE',
'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RANGE', 'RAID0', 'RAID_CHUNKS', 'RAID_CHUNKSIZE','RAID_TYPE', 'READ', 'READ_ONLY',
'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT',
@@ -72,35 +71,35 @@ class SqlFormatter
'LEFT OUTER JOIN', 'RIGHT OUTER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'OUTER JOIN', 'INNER JOIN', 'JOIN', 'XOR', 'OR', 'AND'
);
- protected static $mysql_function = array (
- 'ABS', 'ACOS', 'ADDDATE', 'ADDTIME', 'AES_DECRYPT', 'AES_ENCRYPT', 'AREA', 'ASBINARY', 'ASCII', 'ASIN', 'ASTEXT', 'ATAN', 'ATAN2',
- 'AVG', 'BDMPOLYFROMTEXT', 'BDMPOLYFROMWKB', 'BDPOLYFROMTEXT', 'BDPOLYFROMWKB', 'BENCHMARK', 'BIN', 'BIT_AND', 'BIT_COUNT', 'BIT_LENGTH',
- 'BIT_OR', 'BIT_XOR', 'BOUNDARY', 'BUFFER', 'CAST', 'CEIL', 'CEILING', 'CENTROID', 'CHAR', 'CHARACTER_LENGTH', 'CHARSET', 'CHAR_LENGTH',
- 'COALESCE', 'COERCIBILITY', 'COLLATION', 'COMPRESS', 'CONCAT', 'CONCAT_WS', 'CONNECTION_ID', 'CONTAINS', 'CONV', 'CONVERT', 'CONVERT_TZ',
- 'CONVEXHULL', 'COS', 'COT', 'COUNT', 'CRC32', 'CROSSES', 'CURDATE', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
- 'CURTIME', 'DATABASE', 'DATE', 'DATEDIFF', 'DATE_ADD', 'DATE_DIFF', 'DATE_FORMAT', 'DATE_SUB', 'DAY', 'DAYNAME', 'DAYOFMONTH', 'DAYOFWEEK',
- 'DAYOFYEAR', 'DECODE', 'DEFAULT', 'DEGREES', 'DES_DECRYPT', 'DES_ENCRYPT', 'DIFFERENCE', 'DIMENSION', 'DISJOINT', 'DISTANCE', 'ELT', 'ENCODE',
- 'ENCRYPT', 'ENDPOINT', 'ENVELOPE', 'EQUALS', 'EXP', 'EXPORT_SET', 'EXTERIORRING', 'EXTRACT', 'EXTRACTVALUE', 'FIELD', 'FIND_IN_SET', 'FLOOR',
- 'FORMAT', 'FOUND_ROWS', 'FROM_DAYS', 'FROM_UNIXTIME', 'GEOMCOLLFROMTEXT', 'GEOMCOLLFROMWKB', 'GEOMETRYCOLLECTION', 'GEOMETRYCOLLECTIONFROMTEXT',
- 'GEOMETRYCOLLECTIONFROMWKB', 'GEOMETRYFROMTEXT', 'GEOMETRYFROMWKB', 'GEOMETRYN', 'GEOMETRYTYPE', 'GEOMFROMTEXT', 'GEOMFROMWKB', 'GET_FORMAT',
- 'GET_LOCK', 'GLENGTH', 'GREATEST', 'GROUP_CONCAT', 'GROUP_UNIQUE_USERS', 'HEX', 'HOUR', 'IF', 'IFNULL', 'INET_ATON', 'INET_NTOA', 'INSERT', 'INSTR',
- 'INTERIORRINGN', 'INTERSECTION', 'INTERSECTS', 'INTERVAL', 'ISCLOSED', 'ISEMPTY', 'ISNULL', 'ISRING', 'ISSIMPLE', 'IS_FREE_LOCK', 'IS_USED_LOCK',
- 'LAST_DAY', 'LAST_INSERT_ID', 'LCASE', 'LEAST', 'LEFT', 'LENGTH', 'LINEFROMTEXT', 'LINEFROMWKB', 'LINESTRING', 'LINESTRINGFROMTEXT', 'LINESTRINGFROMWKB',
- 'LN', 'LOAD_FILE', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATE', 'LOG', 'LOG10', 'LOG2', 'LOWER', 'LPAD', 'LTRIM', 'MAKEDATE', 'MAKETIME', 'MAKE_SET',
- 'MASTER_POS_WAIT', 'MAX', 'MBRCONTAINS', 'MBRDISJOINT', 'MBREQUAL', 'MBRINTERSECTS', 'MBROVERLAPS', 'MBRTOUCHES', 'MBRWITHIN', 'MD5', 'MICROSECOND',
- 'MID', 'MIN', 'MINUTE', 'MLINEFROMTEXT', 'MLINEFROMWKB', 'MOD', 'MONTH', 'MONTHNAME', 'MPOINTFROMTEXT', 'MPOINTFROMWKB', 'MPOLYFROMTEXT', 'MPOLYFROMWKB',
- 'MULTILINESTRING', 'MULTILINESTRINGFROMTEXT', 'MULTILINESTRINGFROMWKB', 'MULTIPOINT', 'MULTIPOINTFROMTEXT', 'MULTIPOINTFROMWKB', 'MULTIPOLYGON',
- 'MULTIPOLYGONFROMTEXT', 'MULTIPOLYGONFROMWKB', 'NAME_CONST', 'NOW', 'NULLIF', 'NUMGEOMETRIES', 'NUMINTERIORRINGS', 'NUMPOINTS', 'OCT', 'OCTET_LENGTH',
- 'OLD_PASSWORD', 'ORD', 'OVERLAPS', 'PASSWORD', 'PERIOD_ADD', 'PERIOD_DIFF', 'PI', 'POINT', 'POINTFROMTEXT', 'POINTFROMWKB', 'POINTN', 'POINTONSURFACE',
- 'POLYFROMTEXT', 'POLYFROMWKB', 'POLYGON', 'POLYGONFROMTEXT', 'POLYGONFROMWKB', 'POSITION', 'POW', 'POWER', 'QUARTER', 'QUOTE', 'RADIANS', 'RAND',
- 'RELATED', 'RELEASE_LOCK', 'REPEAT', 'REPLACE', 'REVERSE', 'RIGHT', 'ROUND', 'ROW_COUNT', 'RPAD', 'RTRIM', 'SCHEMA', 'SECOND', 'SEC_TO_TIME',
- 'SESSION_USER', 'SHA', 'SHA1', 'SIGN', 'SIN', 'SLEEP', 'SOUNDEX', 'SPACE', 'SQRT', 'SRID', 'STARTPOINT', 'STD', 'STDDEV', 'STDDEV_POP', 'STDDEV_SAMP',
- 'STRCMP', 'STR_TO_DATE', 'SUBDATE', 'SUBSTR', 'SUBSTRING', 'SUBSTRING_INDEX', 'SUBTIME', 'SUM', 'SYMDIFFERENCE', 'SYSDATE', 'SYSTEM_USER', 'TAN',
- 'TIME', 'TIMEDIFF', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TIME_FORMAT', 'TIME_TO_SEC', 'TOUCHES', 'TO_DAYS', 'TRIM', 'TRUNCATE', 'UCASE',
- 'UNCOMPRESS', 'UNCOMPRESSED_LENGTH', 'UNHEX', 'UNIQUE_USERS', 'UNIX_TIMESTAMP', 'UPDATEXML', 'UPPER', 'USER', 'UTC_DATE', 'UTC_TIME', 'UTC_TIMESTAMP',
- 'UUID', 'VARIANCE', 'VAR_POP', 'VAR_SAMP', 'VERSION', 'WEEK', 'WEEKDAY', 'WEEKOFYEAR', 'WITHIN', 'X', 'Y', 'YEAR', 'YEARWEEK'
-);
-
+ protected static $functions = array (
+ 'ABS', 'ACOS', 'ADDDATE', 'ADDTIME', 'AES_DECRYPT', 'AES_ENCRYPT', 'AREA', 'ASBINARY', 'ASCII', 'ASIN', 'ASTEXT', 'ATAN', 'ATAN2',
+ 'AVG', 'BDMPOLYFROMTEXT', 'BDMPOLYFROMWKB', 'BDPOLYFROMTEXT', 'BDPOLYFROMWKB', 'BENCHMARK', 'BIN', 'BIT_AND', 'BIT_COUNT', 'BIT_LENGTH',
+ 'BIT_OR', 'BIT_XOR', 'BOUNDARY', 'BUFFER', 'CAST', 'CEIL', 'CEILING', 'CENTROID', 'CHAR', 'CHARACTER_LENGTH', 'CHARSET', 'CHAR_LENGTH',
+ 'COALESCE', 'COERCIBILITY', 'COLLATION', 'COMPRESS', 'CONCAT', 'CONCAT_WS', 'CONNECTION_ID', 'CONTAINS', 'CONV', 'CONVERT', 'CONVERT_TZ',
+ 'CONVEXHULL', 'COS', 'COT', 'COUNT', 'CRC32', 'CROSSES', 'CURDATE', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
+ 'CURTIME', 'DATABASE', 'DATE', 'DATEDIFF', 'DATE_ADD', 'DATE_DIFF', 'DATE_FORMAT', 'DATE_SUB', 'DAY', 'DAYNAME', 'DAYOFMONTH', 'DAYOFWEEK',
+ 'DAYOFYEAR', 'DECODE', 'DEFAULT', 'DEGREES', 'DES_DECRYPT', 'DES_ENCRYPT', 'DIFFERENCE', 'DIMENSION', 'DISJOINT', 'DISTANCE', 'ELT', 'ENCODE',
+ 'ENCRYPT', 'ENDPOINT', 'ENVELOPE', 'EQUALS', 'EXP', 'EXPORT_SET', 'EXTERIORRING', 'EXTRACT', 'EXTRACTVALUE', 'FIELD', 'FIND_IN_SET', 'FLOOR',
+ 'FORMAT', 'FOUND_ROWS', 'FROM_DAYS', 'FROM_UNIXTIME', 'GEOMCOLLFROMTEXT', 'GEOMCOLLFROMWKB', 'GEOMETRYCOLLECTION', 'GEOMETRYCOLLECTIONFROMTEXT',
+ 'GEOMETRYCOLLECTIONFROMWKB', 'GEOMETRYFROMTEXT', 'GEOMETRYFROMWKB', 'GEOMETRYN', 'GEOMETRYTYPE', 'GEOMFROMTEXT', 'GEOMFROMWKB', 'GET_FORMAT',
+ 'GET_LOCK', 'GLENGTH', 'GREATEST', 'GROUP_CONCAT', 'GROUP_UNIQUE_USERS', 'HEX', 'HOUR', 'IF', 'IFNULL', 'INET_ATON', 'INET_NTOA', 'INSERT', 'INSTR',
+ 'INTERIORRINGN', 'INTERSECTION', 'INTERSECTS', 'INTERVAL', 'ISCLOSED', 'ISEMPTY', 'ISNULL', 'ISRING', 'ISSIMPLE', 'IS_FREE_LOCK', 'IS_USED_LOCK',
+ 'LAST_DAY', 'LAST_INSERT_ID', 'LCASE', 'LEAST', 'LEFT', 'LENGTH', 'LINEFROMTEXT', 'LINEFROMWKB', 'LINESTRING', 'LINESTRINGFROMTEXT', 'LINESTRINGFROMWKB',
+ 'LN', 'LOAD_FILE', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATE', 'LOG', 'LOG10', 'LOG2', 'LOWER', 'LPAD', 'LTRIM', 'MAKEDATE', 'MAKETIME', 'MAKE_SET',
+ 'MASTER_POS_WAIT', 'MAX', 'MBRCONTAINS', 'MBRDISJOINT', 'MBREQUAL', 'MBRINTERSECTS', 'MBROVERLAPS', 'MBRTOUCHES', 'MBRWITHIN', 'MD5', 'MICROSECOND',
+ 'MID', 'MIN', 'MINUTE', 'MLINEFROMTEXT', 'MLINEFROMWKB', 'MOD', 'MONTH', 'MONTHNAME', 'MPOINTFROMTEXT', 'MPOINTFROMWKB', 'MPOLYFROMTEXT', 'MPOLYFROMWKB',
+ 'MULTILINESTRING', 'MULTILINESTRINGFROMTEXT', 'MULTILINESTRINGFROMWKB', 'MULTIPOINT', 'MULTIPOINTFROMTEXT', 'MULTIPOINTFROMWKB', 'MULTIPOLYGON',
+ 'MULTIPOLYGONFROMTEXT', 'MULTIPOLYGONFROMWKB', 'NAME_CONST', 'NULLIF', 'NUMGEOMETRIES', 'NUMINTERIORRINGS', 'NUMPOINTS', 'OCT', 'OCTET_LENGTH',
+ 'OLD_PASSWORD', 'ORD', 'OVERLAPS', 'PASSWORD', 'PERIOD_ADD', 'PERIOD_DIFF', 'PI', 'POINT', 'POINTFROMTEXT', 'POINTFROMWKB', 'POINTN', 'POINTONSURFACE',
+ 'POLYFROMTEXT', 'POLYFROMWKB', 'POLYGON', 'POLYGONFROMTEXT', 'POLYGONFROMWKB', 'POSITION', 'POW', 'POWER', 'QUARTER', 'QUOTE', 'RADIANS', 'RAND',
+ 'RELATED', 'RELEASE_LOCK', 'REPEAT', 'REPLACE', 'REVERSE', 'RIGHT', 'ROUND', 'ROW_COUNT', 'RPAD', 'RTRIM', 'SCHEMA', 'SECOND', 'SEC_TO_TIME',
+ 'SESSION_USER', 'SHA', 'SHA1', 'SIGN', 'SIN', 'SLEEP', 'SOUNDEX', 'SPACE', 'SQRT', 'SRID', 'STARTPOINT', 'STD', 'STDDEV', 'STDDEV_POP', 'STDDEV_SAMP',
+ 'STRCMP', 'STR_TO_DATE', 'SUBDATE', 'SUBSTR', 'SUBSTRING', 'SUBSTRING_INDEX', 'SUBTIME', 'SUM', 'SYMDIFFERENCE', 'SYSDATE', 'SYSTEM_USER', 'TAN',
+ 'TIME', 'TIMEDIFF', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TIME_FORMAT', 'TIME_TO_SEC', 'TOUCHES', 'TO_DAYS', 'TRIM', 'TRUNCATE', 'UCASE',
+ 'UNCOMPRESS', 'UNCOMPRESSED_LENGTH', 'UNHEX', 'UNIQUE_USERS', 'UNIX_TIMESTAMP', 'UPDATEXML', 'UPPER', 'USER', 'UTC_DATE', 'UTC_TIME', 'UTC_TIMESTAMP',
+ 'UUID', 'VARIANCE', 'VAR_POP', 'VAR_SAMP', 'VERSION', 'WEEK', 'WEEKDAY', 'WEEKOFYEAR', 'WITHIN', 'X', 'Y', 'YEAR', 'YEARWEEK'
+ );
+
// Punctuation that can be used as a boundary between other tokens
protected static $boundaries = array(',', ';', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&', '#');
@@ -115,13 +114,12 @@ class SqlFormatter
public static $error_attributes = 'style="background-color: red;"';
public static $comment_attributes = 'style="color: #aaa;"';
public static $pre_attributes = 'style="color: black; background-color: white;"';
- public static $mysql_functions = 'style="color: red;"';
-
+
// Boolean - whether or not the current environment is the CLI
// This affects the type of syntax highlighting
// If not defined, it will be determined automatically
public static $cli;
-
+
// For CLI syntax highlighting
public static $cli_quote = "\x1b[34;1m";
public static $cli_backtick_quote = "\x1b[35;1m";
@@ -138,16 +136,16 @@ class SqlFormatter
// This flag tells us if queries need to be enclosed in <pre> tags
public static $use_pre = true;
-
+
// This flag tells us if SqlFormatted has been initialized
protected static $init;
-
+
// Regular expressions for tokenizing
protected static $regex_boundaries;
protected static $regex_reserved;
protected static $regex_reserved_newline;
protected static $regex_reserved_toplevel;
- protected static $regex_mysql_function;
+ protected static $regex_function;
// Cache variables
// Only tokens shorter than this size will be cached. Somewhere between 10 and 20 seems to work well for most cases.
@@ -155,12 +153,13 @@ class SqlFormatter
protected static $token_cache = array();
protected static $cache_hits = 0;
protected static $cache_misses = 0;
-
+
/**
* Get stats about the token cache
* @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes
*/
- public static function getCacheStats() {
+ public static function getCacheStats()
+ {
return array(
'hits'=>self::$cache_hits,
'misses'=>self::$cache_misses,
@@ -168,13 +167,14 @@ class SqlFormatter
'size'=>strlen(serialize(self::$token_cache))
);
}
-
+
/**
* Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words.
*/
- protected static function init() {
+ protected static function init()
+ {
if(self::$init) return;
-
+
// Sort reserved word list from longest word to shortest
usort(self::$reserved, array('SqlFormatter', 'sortLength'));
@@ -184,17 +184,17 @@ class SqlFormatter
self::$regex_reserved_toplevel = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved_toplevel)).')');
self::$regex_reserved_newline = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved_newline)).')');
- self::$regex_mysql_function = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$mysql_function)).')';
-
+ self::$regex_function = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$functions)).')';
+
self::$init = true;
}
-
+
/**
* Return the next token and token type in a SQL string.
* Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
*
- * @param String $string The SQL string
- * @param array $previous The result of the previous getNextToken() call
+ * @param String $string The SQL string
+ * @param array $previous The result of the previous getNextToken() call
*
* @return Array An associative array containing the type and value of the token.
*/
@@ -207,7 +207,7 @@ class SqlFormatter
self::TOKEN_TYPE=>self::TOKEN_TYPE_WHITESPACE
);
}
-
+
// Comment
if ($string[0] === '#' || (isset($string[1])&&($string[0]==='-'&&$string[1]==='-') || ($string[0]==='/'&&$string[1]==='*'))) {
// Comment until end of line
@@ -230,25 +230,25 @@ class SqlFormatter
}
// Quoted String
- if($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') {
+ if ($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') {
$return = array(
self::TOKEN_TYPE => ($string[0]==='`'? self::TOKEN_TYPE_BACKTICK_QUOTE : self::TOKEN_TYPE_QUOTE),
self::TOKEN_VALUE => $string
);
-
+
// This checks for the following patterns:
// 1. backtick quoted string using `` to escape
// 2. double quoted string using "" or \" to escape
// 3. single quoted string using '' or \' to escape
- if( preg_match('/^(((`[^`]*($|`))+)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s', $string, $matches)) {
+ if ( preg_match('/^(((`[^`]*($|`))+)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s', $string, $matches)) {
$return[self::TOKEN_VALUE] = $matches[1];
}
-
+
return $return;
}
-
+
// Number
- if(preg_match('/^([0-9]+(\.[0-9]+)?)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) {
+ if (preg_match('/^([0-9]+(\.[0-9]+)?)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) {
return array(
self::TOKEN_VALUE => $matches[1],
self::TOKEN_TYPE=>self::TOKEN_TYPE_NUMBER
@@ -256,7 +256,7 @@ class SqlFormatter
}
// Boundary Character (punctuation and symbols)
- if(preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) {
+ if (preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) {
return array(
self::TOKEN_VALUE => $matches[1],
self::TOKEN_TYPE => self::TOKEN_TYPE_BOUNDARY
@@ -268,21 +268,21 @@ class SqlFormatter
if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') {
$upper = strtoupper($string);
// Top Level Reserved Word
- if(preg_match('/^('.self::$regex_reserved_toplevel.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
+ if (preg_match('/^('.self::$regex_reserved_toplevel.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
return array(
self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_TOPLEVEL,
self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
);
}
// Newline Reserved Word
- if(preg_match('/^('.self::$regex_reserved_newline.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
+ if (preg_match('/^('.self::$regex_reserved_newline.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
return array(
self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_NEWLINE,
self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
);
}
// Other Reserved Word
- if(preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
+ if (preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
return array(
self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED,
self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
@@ -291,19 +291,16 @@ class SqlFormatter
}
// A function must be suceeded by '('
- // this makes it so in "count(", which considers it as a mysql function.
-
- $upper = strtoupper($string);
- // MySQL function
- if(preg_match('/^('.self::$regex_mysql_function.'[(]|\s|[)])/', $upper,$matches)) {
-
- return array(
- self::TOKEN_TYPE=>self::TOKEN_TYPE_FUNCTION,
- self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])-1)
- );
- }
-
-
+ // this makes it so "count(" is considered a function, but "count" alone is not
+ $upper = strtoupper($string);
+ // function
+ if (preg_match('/^('.self::$regex_function.'[(]|\s|[)])/', $upper,$matches)) {
+ return array(
+ self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED,
+ self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])-1)
+ );
+ }
+
// Non reserved word
preg_match('/^(.*?)($|\s|["\'`]|'.self::$regex_boundaries.')/',$string,$matches);
@@ -324,7 +321,7 @@ class SqlFormatter
protected static function tokenize($string)
{
self::init();
-
+
$tokens = array();
// Used for debugging if there is an error while tokenizing the string
@@ -334,7 +331,7 @@ class SqlFormatter
$old_string_len = strlen($string) + 1;
$token = null;
-
+
$current_length = strlen($string);
// Keep processing the string until it is empty
@@ -345,49 +342,47 @@ class SqlFormatter
self::TOKEN_VALUE=>$string,
self::TOKEN_TYPE=>self::TOKEN_TYPE_ERROR
);
-
+
return $tokens;
}
$old_string_len = $current_length;
// Determine if we can use caching
- if($current_length >= self::$max_cachekey_size) {
+ if ($current_length >= self::$max_cachekey_size) {
$cacheKey = substr($string,0,self::$max_cachekey_size);
- }
- else {
+ } else {
$cacheKey = false;
}
// See if the token is already cached
- if($cacheKey && isset(self::$token_cache[$cacheKey])) {
+ if ($cacheKey && isset(self::$token_cache[$cacheKey])) {
// Retrieve from cache
$token = self::$token_cache[$cacheKey];
$token_length = strlen($token[self::TOKEN_VALUE]);
self::$cache_hits++;
- }
- else {
+ } else {
// Get the next token and the token type
- $token = self::getNextToken($string, $token);
+ $token = self::getNextToken($string, $token);
$token_length = strlen($token[self::TOKEN_VALUE]);
self::$cache_misses++;
-
+
// If the token is shorter than the max length, store it in cache
- if($cacheKey && $token_length < self::$max_cachekey_size) {
+ if ($cacheKey && $token_length < self::$max_cachekey_size) {
self::$token_cache[$cacheKey] = $token;
}
}
-
+
$tokens[] = $token;
// Advance the string
$string = substr($string, $token_length);
-
+
$current_length -= $token_length;
}
return $tokens;
}
-
+
/**
* Format the whitespace in a SQL string to make it easier to read.
*
@@ -396,7 +391,8 @@ class SqlFormatter
*
* @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag
*/
- public static function format($string, $highlight=true) {
+ public static function format($string, $highlight=true)
+ {
// This variable will be populated with formatted html
$return = '';
@@ -412,10 +408,10 @@ class SqlFormatter
$added_newline = false;
$inline_count = 0;
$inline_indented = false;
-
+
// Tokenize String
$original_tokens = self::tokenize($string);
-
+
// Remove existing whitespace
$tokens = array();
foreach ($original_tokens as $i=>$token) {
@@ -426,7 +422,7 @@ class SqlFormatter
}
// Format token by token
- foreach ($tokens as $i=>$token) {
+ foreach ($tokens as $i=>$token) {
// Get highlighted token if doing syntax highlighting
if ($highlight) {
$highlighted = self::highlightToken($token);
@@ -435,28 +431,27 @@ class SqlFormatter
}
// If we are increasing the special indent level now
- if($increase_special_indent) {
+ if ($increase_special_indent) {
$indent_level++;
$increase_special_indent = false;
array_unshift($indent_types,'special');
}
// If we are increasing the block indent level now
- if($increase_block_indent) {
+ if ($increase_block_indent) {
$indent_level++;
$increase_block_indent = false;
array_unshift($indent_types,'block');
}
-
+
// If we need a new line before the token
if ($newline) {
$return .= "\n" . str_repeat($tab, $indent_level);
$newline = false;
$added_newline = true;
- }
- else {
+ } else {
$added_newline = false;
}
-
+
// Display comments directly where they appear in the source
if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
@@ -469,31 +464,31 @@ class SqlFormatter
$newline = true;
continue;
}
-
- if($inline_parentheses) {
+
+ if ($inline_parentheses) {
// End of inline parentheses
- if($token[self::TOKEN_VALUE] === ')') {
+ if ($token[self::TOKEN_VALUE] === ')') {
$return = rtrim($return,' ');
-
- if($inline_indented) {
+
+ if ($inline_indented) {
array_shift($indent_types);
$indent_level --;
$return .= "\n" . str_repeat($tab, $indent_level);
}
-
+
$inline_parentheses = false;
-
+
$return .= $highlighted . ' ';
continue;
}
-
- if($token[self::TOKEN_VALUE] === ',') {
- if($inline_count >= 30) {
+
+ if ($token[self::TOKEN_VALUE] === ',') {
+ if ($inline_count >= 30) {
$inline_count = 0;
$newline = true;
}
}
-
+
$inline_count += strlen($token[self::TOKEN_VALUE]);
}
@@ -503,105 +498,104 @@ class SqlFormatter
// Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
// Allow up to 3 non-whitespace tokens inside inline parentheses
$length = 0;
- for($j=1;$j<=250;$j++) {
+ for ($j=1;$j<=250;$j++) {
// Reached end of string
if(!isset($tokens[$i+$j])) break;
-
+
$next = $tokens[$i+$j];
-
+
// Reached closing parentheses, able to inline it
- if($next[self::TOKEN_VALUE] === ')') {
+ if ($next[self::TOKEN_VALUE] === ')') {
$inline_parentheses = true;
$inline_count = 0;
$inline_indented = false;
break;
}
-
+
// Reached an invalid token for inline parentheses
if ($next[self::TOKEN_VALUE]===';' || $next[self::TOKEN_VALUE]==='(') {
break;
}
-
+
// Reached an invalid token type for inline parentheses
if ($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_TOPLEVEL || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_NEWLINE || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_BLOCK_COMMENT) {
break;
}
-
+
$length += strlen($next[self::TOKEN_VALUE]);
}
-
- if($inline_parentheses && $length > 30) {
+
+ if ($inline_parentheses && $length > 30) {
$increase_block_indent = true;
$inline_indented = true;
$newline = true;
}
-
+
// Take out the preceding space unless there was whitespace there in the original query
if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
$return = rtrim($return,' ');
}
-
- if(!$inline_parentheses) {
+
+ if (!$inline_parentheses) {
$increase_block_indent = true;
// Add a newline after the parentheses
$newline = true;
}
-
+
}
-
+
// Closing parentheses decrease the block indent level
elseif ($token[self::TOKEN_VALUE] === ')') {
// Remove whitespace before the closing parentheses
$return = rtrim($return,' ');
-
+
$indent_level--;
-
+
// Reset indent level
- while($j=array_shift($indent_types)) {
- if($j==='special') {
+ while ($j=array_shift($indent_types)) {
+ if ($j==='special') {
$indent_level--;
- }
- else {
+ } else {
break;
}
}
-
- if($indent_level < 0) {
+
+ if ($indent_level < 0) {
// This is an error
$indent_level = 0;
-
+
if ($highlight) {
$return .= "\n".self::highlightError($token[self::TOKEN_VALUE]);
continue;
}
}
-
+
// Add a newline before the closing parentheses (if not already added)
- if(!$added_newline) {
+ if (!$added_newline) {
$return .= "\n" . str_repeat($tab, $indent_level);
- }
+ }
}
-
+
// Commas start a new line (unless within inline parentheses)
elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) {
$newline = true;
}
-
+
// Top level reserved words start a new line and increase the special indent level
elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
$increase_special_indent = true;
-
+
// If the last indent type was 'special', decrease the special indent for this round
reset($indent_types);
- if(current($indent_types)==='special') {
+ if (current($indent_types)==='special') {
$indent_level--;
array_shift($indent_types);
}
-
+
// Add a newline after the top level reserved word
$newline = true;
// Add a newline before the top level reserved word (if not already added)
- if(!$added_newline) {
+ if (!$added_newline) {
$return .= "\n" . str_repeat($tab, $indent_level);
}
// If we already added a newline, redo the indentation since it may be different now
@@ -618,7 +612,7 @@ class SqlFormatter
// Newline reserved words start a new line
elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE) {
// Add a newline before the reserved word (if not already added)
- if(!$added_newline) {
+ if (!$added_newline) {
$return .= "\n" . str_repeat($tab, $indent_level);
}
@@ -627,10 +621,10 @@ class SqlFormatter
$highlighted = preg_replace('/\s+/',' ',$highlighted);
}
}
-
+
// Multiple boundary characters in a row should not have spaces between them (not including parentheses)
- elseif($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
- if($tokens[$i-1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
+ elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
+ if ($tokens[$i-1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
$return = rtrim($return,' ');
}
@@ -641,7 +635,7 @@ class SqlFormatter
if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') {
$return = rtrim($return, ' ');
}
-
+
$return .= $highlighted.' ';
// If the token shouldn't have a space after it
@@ -654,7 +648,7 @@ class SqlFormatter
if ($highlight && array_search('block',$indent_types) !== false) {
$return .= "\n".self::highlightError("WARNING: unclosed parentheses or section");
}
-
+
// Replace tab characters with the configuration tab character
$return = trim(str_replace("\t",self::$tab,$return));
@@ -662,7 +656,7 @@ class SqlFormatter
$return = self::output($return);
}
- return $return;
+ return $return;
}
/**
@@ -700,8 +694,8 @@ class SqlFormatter
$empty = true;
$tokens = self::tokenize($string);
-
- foreach ($tokens as $token) {
+
+ foreach ($tokens as $token) {
// If this is a query separator
if ($token[self::TOKEN_VALUE] === ';') {
if (!$empty) {
@@ -711,9 +705,9 @@ class SqlFormatter
$empty = true;
continue;
}
-
+
// If this is a non-empty character
- if($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) {
+ if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) {
$empty = false;
}
@@ -749,6 +743,7 @@ class SqlFormatter
$result .= $token[self::TOKEN_VALUE];
}
$result = self::format( $result,false);
+
return $result;
}
@@ -765,26 +760,23 @@ class SqlFormatter
$tokens = self::tokenize($string);
-
$whitespace = true;
foreach ($tokens as $token) {
// Skip comment tokens
if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
continue;
}
-
- if($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) {
+
+ if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) {
// If the last token was whitespace, don't add another one
- if($whitespace) {
+ if ($whitespace) {
continue;
- }
- else {
+ } else {
$whitespace = true;
// Convert all whitespace to a single space
$token[self::TOKEN_VALUE] = ' ';
}
- }
- else {
+ } else {
$whitespace = false;
}
@@ -804,45 +796,33 @@ class SqlFormatter
protected static function highlightToken($token)
{
$type = $token[self::TOKEN_TYPE];
-
- if(self::is_cli()) {
+
+ if (self::is_cli()) {
$token = $token[self::TOKEN_VALUE];
- }
- else {
+ } else {
$token = htmlentities($token[self::TOKEN_VALUE],ENT_COMPAT,'UTF-8');
}
- if($type===self::TOKEN_TYPE_BOUNDARY) {
+ if ($type===self::TOKEN_TYPE_BOUNDARY) {
return self::highlightBoundary($token);
- }
- elseif($type===self::TOKEN_TYPE_WORD) {
+ } elseif ($type===self::TOKEN_TYPE_WORD) {
return self::highlightWord($token);
- }
- elseif($type===self::TOKEN_TYPE_BACKTICK_QUOTE) {
+ } elseif ($type===self::TOKEN_TYPE_BACKTICK_QUOTE) {
return self::highlightBacktickQuote($token);
- }
- elseif($type===self::TOKEN_TYPE_QUOTE) {
+ } elseif ($type===self::TOKEN_TYPE_QUOTE) {
return self::highlightQuote($token);
- }
- elseif($type===self::TOKEN_TYPE_RESERVED) {
+ } elseif ($type===self::TOKEN_TYPE_RESERVED) {
return self::highlightReservedWord($token);
- }
- elseif($type===self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
+ } elseif ($type===self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
return self::highlightReservedWord($token);
- }
- elseif($type===self::TOKEN_TYPE_RESERVED_NEWLINE) {
+ } elseif ($type===self::TOKEN_TYPE_RESERVED_NEWLINE) {
return self::highlightReservedWord($token);
- }
- elseif($type===self::TOKEN_TYPE_NUMBER) {
+ } elseif ($type===self::TOKEN_TYPE_NUMBER) {
return self::highlightNumber($token);
- }
- elseif($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) {
+ } elseif ($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) {
return self::highlightComment($token);
}
- elseif($type===self::TOKEN_TYPE_FUNCTION) {
- return self::highlightMySQLFunction($token);
- }
-
+
return $token;
}
@@ -855,10 +835,9 @@ class SqlFormatter
*/
protected static function highlightQuote($value)
{
- if(self::is_cli()) {
+ if (self::is_cli()) {
return self::$cli_quote . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$quote_attributes . '>' . $value . '</span>';
}
}
@@ -870,11 +849,11 @@ class SqlFormatter
*
* @return String HTML code of the highlighted token.
*/
- protected static function highlightBacktickQuote($value) {
- if(self::is_cli()) {
+ protected static function highlightBacktickQuote($value)
+ {
+ if (self::is_cli()) {
return self::$cli_backtick_quote . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>';
}
}
@@ -888,30 +867,12 @@ class SqlFormatter
*/
protected static function highlightReservedWord($value)
{
- if(self::is_cli()) {
+ if (self::is_cli()) {
return self::$cli_reserved . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$reserved_attributes . '>' . $value . '</span>';
}
}
-
- /**
- * Highlights mysql functions
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightMySQLFunction($value)
- {
- if(self::is_cli()) {
- return self::$cli_reserved . $value . "\x1b[0m";//not sure as doesn't have this environment for now.Need to check
- }
- else {
- return '<span ' . self::$mysql_functions . '>' . $value . '</span>';
- }
- }
/**
* Highlights a boundary token
@@ -923,11 +884,10 @@ class SqlFormatter
protected static function highlightBoundary($value)
{
if($value==='(' || $value===')') return $value;
-
- if(self::is_cli()) {
+
+ if (self::is_cli()) {
return self::$cli_boundary . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$boundary_attributes . '>' . $value . '</span>';
}
}
@@ -941,10 +901,9 @@ class SqlFormatter
*/
protected static function highlightNumber($value)
{
- if(self::is_cli()) {
+ if (self::is_cli()) {
return self::$cli_number . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$number_attributes . '>' . $value . '</span>';
}
}
@@ -958,10 +917,9 @@ class SqlFormatter
*/
protected static function highlightError($value)
{
- if(self::is_cli()) {
+ if (self::is_cli()) {
return self::$cli_error . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$error_attributes . '>' . $value . '</span>';
}
}
@@ -975,10 +933,9 @@ class SqlFormatter
*/
protected static function highlightComment($value)
{
- if(self::is_cli()) {
+ if (self::is_cli()) {
return self::$cli_comment . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$comment_attributes . '>' . $value . '</span>';
}
}
@@ -992,10 +949,9 @@ class SqlFormatter
*/
protected static function highlightWord($value)
{
- if(self::is_cli()) {
+ if (self::is_cli()) {
return self::$cli_word . $value . "\x1b[0m";
- }
- else {
+ } else {
return '<span ' . self::$word_attributes . '>' . $value . '</span>';
}
}
@@ -1012,7 +968,7 @@ class SqlFormatter
{
return strlen($b) - strlen($a);
}
-
+
/**
* Helper function for building regular expressions for reserved words and boundary characters
*
@@ -1024,7 +980,7 @@ class SqlFormatter
{
return preg_quote($a,'/');
}
-
+
/**
* Helper function for building string output
*
@@ -1034,21 +990,22 @@ class SqlFormatter
*/
private static function output($string)
{
- if(self::is_cli()) {
+ if (self::is_cli()) {
return $string."\n";
- }
- else {
+ } else {
$string=trim($string);
- if(!self::$use_pre) {
+ if (!self::$use_pre) {
return $string;
}
+
return '<pre '.self::$pre_attributes.'>' . $string . '</pre>';
}
}
-
- private static function is_cli() {
+
+ private static function is_cli()
+ {
if(isset(self::$cli)) return self::$cli;
else return php_sapi_name() === 'cli';
}
-
+
}