Adding initial cut of JS regexp parser

This commit is contained in:
Jeff Avallone 2014-11-25 19:37:54 -05:00
parent 9136e3f37e
commit abd8a52f6b
3 changed files with 88 additions and 6 deletions

View File

@ -1,3 +1,3 @@
import parser from './parser.peg';
import parser from './parser/javascript.peg';
console.log('testing:', parser.parse('[0,1,2,3]'));
window.parser = parser;

View File

@ -1,4 +0,0 @@
grammar Lists
value <- list / number
list <- "[" ( value ("," value)* )? "]"
number <- [0-9]

View File

@ -0,0 +1,86 @@
grammar JavascriptRegexp
root <- regexp_literal / regexp
regexp_literal <- "/" regexp "/" [igm]*
regexp <- match ( "|" regexp )?
match <- anchor_start? ( ( subexp / charset / terminal ) repeat? )* anchor_end?
anchor_start <- "^"
anchor_end <- "$"
repeat <- ( repeat_any / repeat_required / repeat_optional / repeat_spec ) repeat_greedy?
repeat_any <- "*"
repeat_required <- "+"
repeat_optional <- "?"
repeat_spec <- "{" [0-9]+ "," [0-9]+ "}"
/ "{," [0-9]+ "}"
/ "{" [0-9]+ ",}"
/ "{" [0-9]+ "}"
repeat_greedy <- "?"
subexp <- "(" ( subexp_no_capture / subexp_positive_lookahead / subexp_negative_lookahead )? regexp ")"
subexp_no_capture <- "?:"
subexp_positive_lookahead <- "?="
subexp_negative_lookahead <- "?!"
charset <- "[" "^"? ( charset_range / charset_terminal )* "]"
charset_range <- charset_terminal "-" charset_terminal
charset_terminal <- charset_escape / charset_literal
charset_escape <- ( backspace_esc
/ control_esc
/ digit_esc
/ non_digit_esc
/ form_feed_esc
/ line_feed_esc
/ carriage_return_esc
/ white_space_esc
/ non_white_space_esc
/ tab_esc
/ vertical_tab_esc
/ word_esc
/ non_word_esc
/ octal_esc
/ hex_esc
/ unicode_esc
/ null_esc
/ literal_esc )
charset_literal <- [^\\\]]
terminal <- any_character / escape / literal
any_character <- "."
escape <- ( word_boundary_esc
/ non_word_boundary_esc
/ control_esc
/ digit_esc
/ non_digit_esc
/ form_feed_esc
/ line_feed_esc
/ carriage_return_esc
/ white_space_esc
/ non_white_space_esc
/ tab_esc
/ vertical_tab_esc
/ word_esc
/ non_word_esc
/ back_reference
/ octal_esc
/ hex_esc
/ unicode_esc
/ null_esc
/ literal_esc )
literal <- [^|\\/.\[\(\)?+*$^]
back_reference <- "\\" [1-9]
word_boundary_esc <- "\\b"
non_word_boundary_esc <- "\\B"
backspace_esc <- "\\b"
control_esc <- "\\b" .
digit_esc <- "\\d"
non_digit_esc <- "\\D"
form_feed_esc <- "\\f"
line_feed_esc <- "\\n"
carriage_return_esc <- "\\r"
white_space_esc <- "\\s"
non_white_space_esc <- "\\S"
tab_esc <- "\\t"
vertical_tab_esc <- "\\v"
word_esc <- "\\w"
non_word_esc <- "\\W"
octal_esc <- "\\0" [0-7]+
hex_esc <- "\\x" [0-9a-fA-F] [0-9a-fA-F]
unicode_esc <- "\\u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
null_esc <- "\\0"
literal_esc <- "\\" .