GrammarΒΆ
An external dependency for EQL is the Python library Lark. Lark generates a parser generator for the below grammar, which EQL uses to parse queries.
definitions: definition*
?definition: macro | constant
macro: "macro" name "(" [name ("," name)*] ")" expr
constant: "const" name EQUALS literal
query_with_definitions: definitions piped_query
piped_query: base_query [pipes]
| pipes
base_query: sequence
| sample
| join
| event_query
event_query: [name "where"] expr
sequence: "sequence" [join_values with_params? | with_params join_values?] subquery_by+ [until_subquery_by]
sample: "sample" join_values? subquery_by+
join: "join" join_values? subquery_by subquery_by+ until_subquery_by?
until_subquery_by.2: "until" subquery_by
pipes: pipe+
pipe: "|" name [single_atom single_atom+ | expressions]
join_values.2: "by" expressions
?with_params.2: "with" "maxspan" EQUALS time_range
repeated_sequence.2: "with" "runs" EQUALS UNSIGNED_INTEGER
sequence_alias.2: "as" name
time_range: number name?
subquery_by: subquery fork_param? join_values? repeated_sequence? sequence_alias?
subquery: "[" event_query "]"
fork_param: "fork" (EQUALS boolean)?
// Expressions
expressions: expr ("," expr)* [","]
?expr: or_expr
?or_expr: and_expr ("or" and_expr)*
?and_expr: not_expr ("and" not_expr)*
?not_expr.3: NOT_OP* term
?term: sum_expr comp_op sum_expr -> comparison
| sum_expr "not" IN "(" expressions [","]? ")" -> not_in_set
| sum_expr IN "(" expressions [","]? ")" -> in_set
| sum_expr STRING_PREDICATE (literal | "(" literal ("," literal)* ")") -> string_predicate
| sum_expr
// Need to recover these tokens
IN.3: "in~" | "in"
EQUALS: "==" | "="
STRING_PREDICATE.3: ":"
| "like~"
| "regex~"
| "like"
| "regex"
COMP_OP: "<=" | "<" | "!=" | ">=" | ">"
?comp_op: EQUALS | COMP_OP
MULT_OP: "*" | "/" | "%"
NOT_OP: "not"
?sum_expr: mul_expr (SIGN mul_expr)*
?mul_expr: named_subquery_test (MULT_OP named_subquery_test)*
?named_subquery_test: named_subquery
| method_chain
named_subquery.2: name "of" subquery
?method_chain: value method*
?value: SIGN? function_call
| SIGN? atom
// hacky approach to work around this ambiguity introduced with the colon operator
// x : length
// x : length( ) not allowed, now requires `:length(` form
METHOD_START.3: ":" NAME "("
method_name: METHOD_START
method: method_name [expressions] ")"
function_call: (INSENSITIVE_NAME | NAME) "(" [expressions] ")"
?atom: single_atom
| "(" expr ")"
?signed_single_atom: SIGN? single_atom
?single_atom: literal
| varpath
| field
| base_field
base_field: name | escaped_name
field: FIELD
| OPTIONAL_FIELD
literal: number
| boolean
| null
| string
!boolean: "true"
| "false"
null: "null"
number: UNSIGNED_INTEGER
| DECIMAL
string: RAW_TQ_STRING
| DQ_STRING
| SQ_STRING
| RAW_DQ_STRING
| RAW_SQ_STRING
varpath: "$" (field | base_field)
// Check against keyword usage
name: NAME
escaped_name: ESCAPED_NAME
// Tokens
// pin the first "." or "[" to resolve token ambiguities
// sequence by pid [1] [true] looks identical to:
// sequence by pid[1] [true]
FIELD: FIELD_IDENT (ATTR | INDEX)+
OPTIONAL_FIELD: "?" FIELD_IDENT (ATTR | INDEX)*
ATTR: "." WHITESPACE? FIELD_IDENT
INDEX: "[" WHITESPACE? UNSIGNED_INTEGER WHITESPACE? "]"
FIELD_IDENT: NAME | ESCAPED_NAME
// create a non-conflicting helper rule to deconstruct
field_parts: field_ident ("." field_ident | "[" array_index "]")*
!array_index: UNSIGNED_INTEGER
!field_ident: NAME | ESCAPED_NAME
LCASE_LETTER: "a".."z"
UCASE_LETTER: "A".."Z"
DIGIT: "0".."9"
LETTER: UCASE_LETTER | LCASE_LETTER
WORD: LETTER+
ESCAPED_NAME: "`" /[^`\r\n]+/ "`"
INSENSITIVE_NAME.2: ("_"|LETTER) ("_"|LETTER|DIGIT)* "~"
NAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
UNSIGNED_INTEGER: /[0-9]+/
EXPONENT: /[Ee][-+]?\d+/
DECIMAL: UNSIGNED_INTEGER? "." UNSIGNED_INTEGER+ EXPONENT?
| UNSIGNED_INTEGER EXPONENT
SIGN: "+" | "-"
DQ_STRING: /"(\\[btnfr"'\\]|\\u\{[a-zA-Z0-9]{2,8}\}|[^\r\n"\\])*"/
SQ_STRING: /'(\\[btnfr"'\\]|[^\r\n'\\])*'/
RAW_DQ_STRING: /\?"(\\\"|[^"\r\n])*"/
RAW_SQ_STRING: /\?'(\\\'|[^'\r\n])*'/
RAW_TQ_STRING.2: /"""[^\r\n]*?""""?"?/
%import common.NEWLINE
COMMENT: "//" /[^\n]*/
ML_COMMENT: "/*" /(.|\n|\r)*?/ "*/"
WHITESPACE: (" " | "\r" | "\n" | "\t" )+
%ignore COMMENT
%ignore ML_COMMENT
%ignore WHITESPACE