// Version: 0.8 // ANTLR Version: 2.7.2 // Date: 2003.08.25 // // Description: This is a MS SQL Server 2000 SELECT statement grammar. // // ======================================================================================= // Author: Tomasz Jastrzebski // Contact: tdjastrzebski@yahoo.com // Working parser/lexer generated based on this grammar will available for some time at: // http://jastrzebski.europe.webmatrixhosting.net/mssqlparser.aspx options { language = "CSharp"; } // PARSER ******************************************************************************** class SqlParser extends Parser; options { k = 2; } // starting rule statement : selectStatement (SEMICOLON)? EOF ; selectStatement : queryExpression (computeClause)? (forClause)? (optionClause)? ; queryExpression : subQueryExpression (unionOperator subQueryExpression)* (orderByClause)? ; subQueryExpression : querySpecification | LPAREN queryExpression RPAREN ; querySpecification : selectClause (fromClause)? (whereClause)? (groupByClause (havingClause)? )? ; selectClause : SELECT (ALL | DISTINCT)? (TOP Integer (PERCENT)? (WITH TIES)? )? selectList ; whereClause : WHERE searchCondition ; orderByClause : ORDER BY expression (ASC | DESC)? (COMMA expression (ASC | DESC)? )* ; groupByClause : GROUP BY (ALL)? expression (COMMA expression)* (WITH (CUBE | ROLLUP) )? ; havingClause : HAVING searchCondition ; optionClause : OPTION LPAREN queryHint (COMMA queryHint)* RPAREN ; queryHint : (HASH | ORDER) GROUP | (CONCAT | HASH | MERGE) UNION | (LOOP | MERGE | HASH) JOIN | FAST Integer | FORCE ORDER | MAXDOP Integer | ROBUST PLAN | KEEP PLAN | KEEPFIXED PLAN | EXPAND VIEWS ; forClause : FOR ( BROWSE | XML (RAW | AUTO | EXPLICIT) (COMMA XMLDATA)? (COMMA ELEMENTS)? (COMMA BINARY BASE64) ) ; computeClause : COMPUTE // only allowed functions are: AVG, COUNT, MAX, MIN, STDEV, STDEVP, VAR, VARP, SUM identifier LPAREN expression RPAREN (COMMA identifier LPAREN expression RPAREN)* (BY expression (COMMA expression)* )? ; searchCondition : subSearchCondition ( (AND | OR) subSearchCondition )* ; subSearchCondition : (NOT)? ( (LPAREN searchCondition RPAREN) => LPAREN searchCondition RPAREN | predicate ) ; predicate : ( expression ( // expression comparisonOperator expression comparisonOperator ( expression | (ALL | SOME | ANY) LPAREN selectStatement RPAREN ) | IS (NOT)? NULL | (NOT)? ( LIKE expression (ESCAPE expression)? // only single char | BETWEEN expression AND expression | IN LPAREN ( (selectStatement) => selectStatement | expression (COMMA expression)* ) RPAREN ) | CONTAINS LPAREN (dbObject | STAR) COMMA (stringLiteral | Variable) RPAREN | FREETEXT LPAREN (dbObject | STAR) COMMA (stringLiteral | Variable) RPAREN ) | EXISTS LPAREN selectStatement RPAREN ) ; selectList : selectItem ( COMMA selectItem )* ; selectItem : STAR // "*, *" is a valid select list | ( // starts with: "alias = column_name" (alias2) => ( (alias2 dbObject COMMA) => alias2 column | (alias2 dbObject (binaryOperator | LPAREN)) => alias2 expression | (alias2 column) => alias2 column | (alias2 expression) => alias2 expression ) // all table columns: "table.*" | (tableColumns) => tableColumns // some shortcuts: | (dbObject (alias1)? COMMA) => column (alias1)? | (dbObject (binaryOperator | LPAREN) ) => expression (alias1)? // less obvious cases: | (column) => column (alias1)? | (expression) => expression (alias1)? ) ; fromClause : FROM tableSource (COMMA tableSource)* ; tableSource : subTableSource (joinedTable)* ; subTableSource : ( LPAREN ( (joinedTables) => joinedTables RPAREN | (queryExpression) => queryExpression RPAREN alias1 // "derived table", mandatory alias ) | (function) => function (alias1)? | dbObject (alias1)? ( (WITH)? LPAREN tableHint (COMMA tableHint)* RPAREN )? | Variable (alias1)? | (CONTAINSTABLE | FREETEXTTABLE) LPAREN dbObject COMMA (dbObject | STAR) COMMA (stringLiteral | Variable) (COMMA Integer)? RPAREN (alias1)? | COLON COLON function (alias1)? // built-in function ) ; joinedTable : CROSS JOIN subTableSource // "joinHint JOIN" is invalid join expression | ( (INNER | (LEFT | RIGHT | FULL) (OUTER)? ) (joinHint)? )? JOIN tableSource ON searchCondition ; joinedTables : subTableSource (joinedTable)+ ; joinHint : LOOP | HASH | MERGE | REMOTE ; tableHint : INDEX ( LPAREN (identifier | Integer) ( COMMA (identifier | Integer) )* RPAREN | ASSIGNEQUAL identifier // old index hint syntax ) | FASTFIRSTROW | HOLDLOCK | NOLOCK | PAGLOCK | READCOMMITED | READPAST | READUNCOMMITED | REPEATABLEREAD | ROWLOCK | SERIALIZABLE | TABLOCK | TABLOCKX | UPDLOCK | XLOCK ; collate : COLLATE identifier ; alias1 : // alias name can also be single-quoted literal (but not for table names) (AS)? ( identifier | stringLiteral | keywordAsIdentifier ) ; alias2 : ( identifier | stringLiteral | keywordAsIdentifier ) ASSIGNEQUAL ; tableColumns : o:dbObject DOT_STAR ; column : (PLUS)* // "++column_name" is valid and updatable column name ( dbObject // for expression like "(column)" SQL Server returns updatable column | LPAREN column RPAREN ) (collate)? // it is not well documented but COLLATE can be used almost anywhere ... ; expression : // current definition ignores operator precedence subExpression (binaryOperator subExpression)* ; subExpression : (unaryOperator)? ( constant | Variable | (function) => function | LPAREN ( (selectStatement) => selectStatement // select statement returning a single value | expression ) RPAREN | dbObject // column | parameterlessFunction | caseFunction | castFunction ) (collate)? // it is not well documented but COLLATE can be used almost everywhere ... ; // todo: create a separate rule for aggregate functions function : // LEFT and RIGHT keywords are also function names (dbObject | LEFT | RIGHT) LPAREN ( expression (COMMA expression)* | STAR // aggregate functions like Count(), Checksum() accept "*" as a parameter | (ALL | DISTINCT) (STAR | expression) // aggregate function | Variable ASSIGNEQUAL expression (COMMA Variable ASSIGNEQUAL expression)* )? RPAREN ; caseFunction : CASE ( expression (WHEN expression THEN expression)+ | (WHEN searchCondition THEN expression)+ // boolean expression ) (ELSE expression)? END ; castFunction : CAST LPAREN expression AS identifier (LPAREN Integer (COMMA Integer)? RPAREN)? RPAREN ; dbObject // server.catalog.schema.object // server.catalog..object : (identifier | IDENTITYCOL | ROWGUIDCOL | keywordAsIdentifier) ( DOT (identifier | IDENTITYCOL | ROWGUIDCOL | keywordAsIdentifier) | (DOT DOT) => DOT DOT (identifier | IDENTITYCOL | ROWGUIDCOL | keywordAsIdentifier) )* ; parameterlessFunction : // any others ? CURRENT_TIMESTAMP | CURRENT_USER | SESSION_USER | SYSTEM_USER ; systemVariable : F_CONNECTIONS | F_CPU_BUSY | F_CURSOR_ROWS | F_DATEFIRST | F_DBTS | F_ERROR | F_FETCH_STATUS | F_IDENTITY | F_IDLE | F_IO_BUSY | F_LANGID | F_LANGUAGE | F_LOCK_TIMEOUT | F_MAX_CONNECTIONS | F_MAX_PRECISION | F_NESTLEVEL | F_OPTIONS | F_PACK_RECEIVED | F_PACK_SENT | F_PACKET_ERRORS | F_PROCID | F_REMSERVER | F_ROWCOUNT | F_SERVERNAME | F_SERVICENAME | F_SPID | F_TEXTSIZE | F_TIMETICKS | F_TOTAL_ERRORS | F_TOTAL_READ | F_TOTAL_WRITE | F_TRANCOUNT | F_VERSION ; keywordAsIdentifier : ( AUTO | BASE64 | BINARY | CAST | CONCAT | CUBE | ELEMENTS | EXPAND | EXPLICIT | FAST | FASTFIRSTROW | FORCE | HASH | KEEP | KEEPFIXED | LOOP | MAXDOP | MERGE | NOLOCK | PAGLOCK | RAW | READCOMMITED | READPAST | READUNCOMMITED | REMOTE | REPEATABLEREAD | ROBUST | ROLLUP | ROWLOCK | SERIALIZABLE | TABLOCK | TABLOCKX | TIES | UPDLOCK | VIEWS | XLOCK | XML | XMLDATA ) ; stringLiteral : UnicodeStringLiteral | ASCIIStringLiteral ; identifier : NonQuotedIdentifier | QuotedIdentifier ; constant : Integer | Real | NULL | stringLiteral | HexLiteral | Currency | ODBCDateTime | systemVariable ; unaryOperator : PLUS | MINUS | TILDE ; binaryOperator : arithmeticOperator | bitwiseOperator ; arithmeticOperator : PLUS | MINUS | STAR | DIVIDE | MOD ; bitwiseOperator : AMPERSAND | TILDE | BITWISEOR | BITWISEXOR ; comparisonOperator : ASSIGNEQUAL | NOTEQUAL1 | NOTEQUAL2 | LESSTHANOREQUALTO1 | LESSTHANOREQUALTO2 | LESSTHAN | GREATERTHANOREQUALTO1 | GREATERTHANOREQUALTO2 | GREATERTHAN ; logicalOperator : ALL | AND | ANY | BETWEEN | EXISTS | IN | LIKE | NOT | OR | SOME ; unionOperator : UNION (ALL)? ; // LEXER ********************************************************************************* class SqlLexer extends Lexer; options { testLiterals = false; k = 2; caseSensitive = false; caseSensitiveLiterals = false; charVocabulary='\u0000'..'\uFFFE'; } tokens { ADD = "add" ; ALL = "all" ; ALTER = "alter" ; AND = "and" ; ANY = "any" ; AS = "as" ; ASC = "asc" ; AUTHORIZATION = "authorization" ; AUTO = "auto" ; BACKUP = "backup" ; BASE64 = "base64" ; BEGIN = "begin" ; BETWEEN = "between" ; BINARY = "binary" ; BREAK = "break" ; BROWSE = "browse" ; BULK = "bulk" ; BY = "by" ; CASCADE = "cascade" ; CASE = "case" ; CAST = "cast" ; CHECK = "check" ; CHECKPOINT = "checkpoint" ; CLOSE = "close" ; CLUSTERED = "clustered" ; // COALESCE = "coalesce" ; COLLATE = "collate" ; COLUMN = "column" ; COMMIT = "commit" ; COMPUTE = "compute" ; CONCAT = "concat" ; CONSTRAINT = "constraint" ; CONTAINS = "contains" ; CONTAINSTABLE = "containstable" ; CONTINUE = "continue" ; // CONVERT = "convert" ; CREATE = "create" ; CROSS = "cross" ; CUBE = "cube" ; CURRENT = "current" ; CURRENT_DATE = "current_date" ; CURRENT_TIME = "current_time" ; CURRENT_TIMESTAMP = "current_timestamp" ; CURRENT_USER = "current_user" ; CURSOR = "cursor" ; DATABASE = "database" ; DBCC = "dbcc" ; DEALLOCATE = "deallocate" ; DECLARE = "declare" ; DEFAULT = "default" ; DELETE = "delete" ; DENY = "deny" ; DESC = "desc" ; DISK = "disk" ; DISTINCT = "distinct" ; DISTRIBUTED = "distributed" ; DOUBLE = "double" ; DROP = "drop" ; // DUMMY = "dummy" ; DUMP = "dump" ; ELEMENTS = "elements" ; ELSE = "else" ; END = "end" ; ERRLVL = "errlvl" ; ESCAPE = "escape" ; EXCEPT = "except" ; EXEC = "exec" ; EXECUTE = "execute" ; EXISTS = "exists" ; EXIT = "exit" ; EXPAND = "expand" ; EXPLICIT = "explicit" ; FAST = "fast" ; FASTFIRSTROW = "fastfirstrow" ; FETCH = "fetch" ; FILE = "file" ; FILLFACTOR = "fillfactor" ; FOR = "for" ; FORCE = "force" ; FOREIGN = "foreign" ; FREETEXT = "freetext" ; FREETEXTTABLE = "freetexttable" ; FROM = "from" ; FULL = "full" ; FUNCTION = "function" ; GOTO = "goto" ; GRANT = "grant" ; GROUP = "group" ; HASH = "hash" ; HAVING = "having" ; HOLDLOCK = "holdlock" ; IDENTITY = "identity" ; IDENTITY_INSERT = "identity_insert" ; IDENTITYCOL = "identitycol" ; IF = "if" ; IN = "in" ; INDEX = "index" ; INNER = "inner" ; INSERT = "insert" ; INTERSECT = "intersect" ; INTO = "into" ; IS = "is" ; JOIN = "join" ; KEEP = "keep" ; KEEPFIXED = "keepfixed" ; KEY = "key" ; KILL = "kill" ; LEFT = "left" ; LIKE = "like" ; LINENO = "lineno" ; LOAD = "load" ; LOOP = "loop" ; MAXDOP = "maxdop" ; MERGE = "merge" ; NATIONAL = "national" ; NOCHECK = "nocheck" ; NOLOCK = "nolock" ; NONCLUSTERED = "nonclustered" ; NOT = "not" ; NULL = "null" ; // NULLIF = "nullif" ; OF = "of" ; OFF = "off" ; OFFSETS = "offsets" ; ON = "on" ; OPEN = "open" ; OPENDATASOURCE = "opendatasource" ; OPENQUERY = "openquery" ; OPENROWSET = "openrowset" ; OPENXML = "openxml" ; OPTION = "option" ; OR = "or" ; ORDER = "order" ; OUTER = "outer" ; OVER = "over" ; PAGLOCK = "paglock" ; PERCENT = "percent" ; PLAN = "plan" ; PRECISION = "precision" ; PRIMARY = "primary" ; PRINT = "print" ; PROC = "proc" ; PROCEDURE = "procedure" ; PUBLIC = "public" ; RAISERROR = "raiserror" ; RAW = "raw" ; READ = "read" ; READCOMMITED = "readcommited" ; READPAST = "readpast" ; READTEXT = "readtext" ; READUNCOMMITED = "readuncommited" ; RECONFIGURE = "reconfigure" ; REFERENCES = "references" ; REMOTE = "remote" ; REPEATABLEREAD = "repeatableread" ; REPLICATION = "replication" ; RESTORE = "restore" ; RESTRICT = "restrict" ; RETURN = "return" ; REVOKE = "revoke" ; RIGHT = "right" ; ROBUST = "robust" ; ROLLBACK = "rollback" ; ROLLUP = "rollup" ; ROWCOUNT = "rowcount" ; ROWGUIDCOL = "rowguidcol" ; ROWLOCK = "rowlock" ; RULE = "rule" ; SAVE = "save" ; SCHEMA = "schema" ; SELECT = "select" ; SERIALIZABLE = "serializable" ; SESSION_USER = "session_user" ; SET = "set" ; SETUSER = "setuser" ; SHUTDOWN = "shutdown" ; SOME = "some" ; STATISTICS = "statistics" ; SYSTEM_USER = "system_user" ; TABLE = "table" ; TABLOCK = "tablock" ; TABLOCKX = "tablockx" ; TEXTSIZE = "textsize" ; THEN = "then" ; TIES = "ties" ; TO = "to" ; TOP = "top" ; TRAN = "tran" ; TRANSACTION = "transaction" ; TRIGGER = "trigger" ; TRUNCATE = "truncate" ; TSEQUAL = "tsequal" ; UNION = "union" ; UNIQUE = "unique" ; UPDATE = "update" ; UPDATETEXT = "updatetext" ; UPDLOCK = "updlock" ; USE = "use" ; USER = "user" ; VALUES = "values" ; VARYING = "varying" ; VIEW = "view" ; VIEWS = "views" ; WAITFOR = "waitfor" ; WHEN = "when" ; WHERE = "where" ; WHILE = "while" ; WITH = "with" ; WRITETEXT = "writetext" ; XLOCK = "xlock" ; XML = "xml" ; XMLDATA = "xmldata" ; // system variables F_CONNECTIONS = "@@connections" ; F_CPU_BUSY = "@@cpu_busy" ; F_CURSOR_ROWS = "@@cursor_rows" ; F_DATEFIRST = "@@datefirst" ; F_DBTS = "@@dbts" ; F_ERROR = "@@error" ; F_FETCH_STATUS = "@@fetch_status" ; F_IDENTITY = "@@identity" ; F_IDLE = "@@idle" ; F_IO_BUSY = "@@io_busy" ; F_LANGID = "@@langid" ; F_LANGUAGE = "@@language" ; F_LOCK_TIMEOUT = "@@lock_timeout" ; F_MAX_CONNECTIONS = "@@max_connections" ; F_MAX_PRECISION = "@@max_precision" ; F_NESTLEVEL = "@@nestlevel" ; F_OPTIONS = "@@options" ; F_PACK_RECEIVED = "@@pack_received" ; F_PACK_SENT = "@@pack_sent" ; F_PACKET_ERRORS = "@@packet_errors" ; F_PROCID = "@@procid" ; F_REMSERVER = "@@remserver" ; F_ROWCOUNT = "@@rowcount" ; F_SERVERNAME = "@@servername" ; F_SERVICENAME = "@@servicename" ; F_SPID = "@@spid" ; F_TEXTSIZE = "@@textsize" ; F_TIMETICKS = "@@timeticks" ; F_TOTAL_ERRORS = "@@total_errors" ; F_TOTAL_READ = "@@total_read" ; F_TOTAL_WRITE = "@@total_write" ; F_TRANCOUNT = "@@trancount" ; F_VERSION = "@@version" ; } // Operators protected DOT:; // generated as a part of Number rule COLON : ':' ; COMMA : ',' ; SEMICOLON : ';' ; LPAREN : '(' ; RPAREN : ')' ; //LSQUARE : '[' ; //RSQUARE : ']' ; ASSIGNEQUAL : '=' ; NOTEQUAL1 : "<>" ; NOTEQUAL2 : "!=" ; LESSTHANOREQUALTO1 : "<=" ; LESSTHANOREQUALTO2 : "!>" ; LESSTHAN : "<" ; GREATERTHANOREQUALTO1 : ">=" ; GREATERTHANOREQUALTO2 : "!<" ; GREATERTHAN : ">" ; DIVIDE : '/' ; PLUS : '+' ; MINUS : '-' ; STAR : '*' ; MOD : '%' ; AMPERSAND : '&' ; TILDE : '~' ; BITWISEOR : '|' ; BITWISEXOR : '^' ; DOT_STAR : ".*" ; Whitespace : (' ' | '\t' | '\n' | '\r') { _ttype = Token.SKIP; } ; // COMMENTS SingleLineComment : "--"( ~('\r' | '\n') )* { _ttype = Token.SKIP; } ; MultiLineComment : "/*" (~'*')* '*' ('*' | ( ~('*' | '/') (~'*')* '*') )* '/' { _ttype = Token.SKIP; } ; // LITERALS protected Letter : 'a'..'z' | '_' | '#' | '@' | '\u0080'..'\ufffe' ; protected Digit : '0'..'9' ; protected Integer :; protected Real :; protected Exponent : 'e' ( '+' | '-' )? (Digit)+ ; Number : ( (Digit)+ ('.' | 'e') ) => (Digit)+ ( '.' (Digit)* (Exponent)? | Exponent) { _ttype = Real; } | '.' { _ttype = DOT; } ( (Digit)+ (Exponent)? { _ttype = Real; } )? | (Digit)+ { _ttype = Integer; } | "0x" ('a'..'f' | Digit)* { _ttype = HexLiteral; } // "0x" is valid hex literal ; protected Currency : // generated as a part of NonQuotedIdentifier rule ('$' | '\u00a3'..'\u00a5' | '\u09f2'..'\u09f3' | '\u0e3f' | '\u20a0'..'\u20a4' | '\u20a6'..'\u20ab') ((Digit)+ ('.' (Digit)* )? | '.' (Digit)+) ; ODBCDateTime : '{' (Whitespace)? ("ts" | 't' | 'd') (Whitespace)? ('n')? '\'' (~'\'')* '\'' ( '\'' (~'\'')* '\'' )* (Whitespace)? '}' ; NonQuotedIdentifier options { testLiterals = true; } : (Currency) => Currency { _ttype = Currency; } | ('a'..'z' | '_' | '#' | '\u0080'..'\ufffe') (Letter | Digit)* // first char other than '@' ; QuotedIdentifier : ( '[' (~']')* ']' (']' (~']')* ']')* | '"' (~'"')* '"' ('"' (~'"')* '"')* ) ; Variable // test for literals in case of a function begining with '@' (eg. "@@ERROR") options { testLiterals = true; } : '@' (Letter | Digit)+ ; ASCIIStringLiteral : '\'' (~'\'')* '\'' ( '\'' (~'\'')* '\'' )* ; UnicodeStringLiteral : 'n' '\'' (~'\'')* '\'' ( '\'' (~'\'')* '\'' )* ; // Numeric Constants protected HexLiteral // generated as a part of Number rule : // "0x" ('0'..'9' | 'a'..'f')* ;