regexper-static/src/js/parser/javascript/grammar.peg

89 lines
3.1 KiB
Plaintext
Raw Normal View History

2014-11-26 00:37:54 +00:00
grammar JavascriptRegexp
root <- ( ( "/" regexp "/" _flags:[igm]* ) / regexp ""? ) <Root>
regexp <- _match:match _alternates:( "|" match )* <Regexp>
match <- _anchor_start:anchor_start?
(!repeat) _parts:match_fragment*
_anchor_end:anchor_end? <Match>
match_fragment <- _content:( subexp / charset / terminal ) _repeat:repeat? <MatchFragment>
2014-11-26 00:37:54 +00:00
anchor_start <- "^"
anchor_end <- "$"
repeat <- _spec:( repeat_any / repeat_required / repeat_optional / repeat_spec ) _greedy:repeat_greedy? <Repeat>
repeat_any <- "*" <RepeatAny>
repeat_required <- "+" <RepeatRequired>
repeat_optional <- "?" <RepeatOptional>
repeat_spec <- ( "{" _min:[0-9]+ "," _max:[0-9]+ "}"
/ "{," _max:[0-9]+ "}"
/ "{" _min:[0-9]+ ",}"
/ "{" _exact:[0-9]+ "}" ) <RepeatSpec>
2014-11-26 00:37:54 +00:00
repeat_greedy <- "?"
subexp <- "(" ( subexp_no_capture / subexp_positive_lookahead / subexp_negative_lookahead )? regexp ")" <Subexp>
2014-11-26 00:37:54 +00:00
subexp_no_capture <- "?:"
subexp_positive_lookahead <- "?="
subexp_negative_lookahead <- "?!"
charset <- "[" "^"? ( charset_range / charset_terminal )* "]" <Charset>
2014-11-26 00:37:54 +00:00
charset_range <- charset_terminal "-" charset_terminal
charset_terminal <- charset_escape / charset_literal
charset_escape <- ( backspace_esc
/ control_esc
/ digit_esc
/ non_digit_esc
/ form_feed_esc
/ line_feed_esc
/ carriage_return_esc
/ white_space_esc
/ non_white_space_esc
/ tab_esc
/ vertical_tab_esc
/ word_esc
/ non_word_esc
/ octal_esc
/ hex_esc
/ unicode_esc
/ null_esc
/ literal_esc )
charset_literal <- [^\\\]]
terminal <- any_character / escape / literal <Terminal>
2014-11-26 00:37:54 +00:00
any_character <- "."
escape <- ( word_boundary_esc
/ non_word_boundary_esc
/ control_esc
/ digit_esc
/ non_digit_esc
/ form_feed_esc
/ line_feed_esc
/ carriage_return_esc
/ white_space_esc
/ non_white_space_esc
/ tab_esc
/ vertical_tab_esc
/ word_esc
/ non_word_esc
/ back_reference
/ octal_esc
/ hex_esc
/ unicode_esc
/ null_esc
/ literal_esc )
literal <- [^|\\/.\[\(\)?+*$^]
back_reference <- "\\" [1-9]
word_boundary_esc <- "\\b"
non_word_boundary_esc <- "\\B"
backspace_esc <- "\\b"
control_esc <- "\\b" .
digit_esc <- "\\d"
non_digit_esc <- "\\D"
form_feed_esc <- "\\f"
line_feed_esc <- "\\n"
carriage_return_esc <- "\\r"
white_space_esc <- "\\s"
non_white_space_esc <- "\\S"
tab_esc <- "\\t"
vertical_tab_esc <- "\\v"
word_esc <- "\\w"
non_word_esc <- "\\W"
octal_esc <- "\\0" [0-7]+
hex_esc <- "\\x" [0-9a-fA-F] [0-9a-fA-F]
unicode_esc <- "\\u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
null_esc <- "\\0"
literal_esc <- "\\" .