From abd8a52f6bcdb46e5dcd05b78228d1f4b9be886d Mon Sep 17 00:00:00 2001 From: Jeff Avallone Date: Tue, 25 Nov 2014 19:37:54 -0500 Subject: [PATCH] Adding initial cut of JS regexp parser --- src/js/main.js | 4 +- src/js/parser.peg | 4 -- src/js/parser/javascript.peg | 86 ++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 6 deletions(-) delete mode 100644 src/js/parser.peg create mode 100644 src/js/parser/javascript.peg diff --git a/src/js/main.js b/src/js/main.js index 8c08e82..ff636bc 100644 --- a/src/js/main.js +++ b/src/js/main.js @@ -1,3 +1,3 @@ -import parser from './parser.peg'; +import parser from './parser/javascript.peg'; -console.log('testing:', parser.parse('[0,1,2,3]')); +window.parser = parser; diff --git a/src/js/parser.peg b/src/js/parser.peg deleted file mode 100644 index 2f31a84..0000000 --- a/src/js/parser.peg +++ /dev/null @@ -1,4 +0,0 @@ -grammar Lists - value <- list / number - list <- "[" ( value ("," value)* )? "]" - number <- [0-9] diff --git a/src/js/parser/javascript.peg b/src/js/parser/javascript.peg new file mode 100644 index 0000000..a684d42 --- /dev/null +++ b/src/js/parser/javascript.peg @@ -0,0 +1,86 @@ +grammar JavascriptRegexp + root <- regexp_literal / regexp + regexp_literal <- "/" regexp "/" [igm]* + regexp <- match ( "|" regexp )? + match <- anchor_start? ( ( subexp / charset / terminal ) repeat? )* anchor_end? + anchor_start <- "^" + anchor_end <- "$" + repeat <- ( repeat_any / repeat_required / repeat_optional / repeat_spec ) repeat_greedy? + repeat_any <- "*" + repeat_required <- "+" + repeat_optional <- "?" + repeat_spec <- "{" [0-9]+ "," [0-9]+ "}" + / "{," [0-9]+ "}" + / "{" [0-9]+ ",}" + / "{" [0-9]+ "}" + repeat_greedy <- "?" + subexp <- "(" ( subexp_no_capture / subexp_positive_lookahead / subexp_negative_lookahead )? regexp ")" + subexp_no_capture <- "?:" + subexp_positive_lookahead <- "?=" + subexp_negative_lookahead <- "?!" + charset <- "[" "^"? ( charset_range / charset_terminal )* "]" + charset_range <- charset_terminal "-" charset_terminal + charset_terminal <- charset_escape / charset_literal + charset_escape <- ( backspace_esc + / control_esc + / digit_esc + / non_digit_esc + / form_feed_esc + / line_feed_esc + / carriage_return_esc + / white_space_esc + / non_white_space_esc + / tab_esc + / vertical_tab_esc + / word_esc + / non_word_esc + / octal_esc + / hex_esc + / unicode_esc + / null_esc + / literal_esc ) + charset_literal <- [^\\\]] + terminal <- any_character / escape / literal + any_character <- "." + escape <- ( word_boundary_esc + / non_word_boundary_esc + / control_esc + / digit_esc + / non_digit_esc + / form_feed_esc + / line_feed_esc + / carriage_return_esc + / white_space_esc + / non_white_space_esc + / tab_esc + / vertical_tab_esc + / word_esc + / non_word_esc + / back_reference + / octal_esc + / hex_esc + / unicode_esc + / null_esc + / literal_esc ) + literal <- [^|\\/.\[\(\)?+*$^] + back_reference <- "\\" [1-9] + word_boundary_esc <- "\\b" + non_word_boundary_esc <- "\\B" + backspace_esc <- "\\b" + control_esc <- "\\b" . + digit_esc <- "\\d" + non_digit_esc <- "\\D" + form_feed_esc <- "\\f" + line_feed_esc <- "\\n" + carriage_return_esc <- "\\r" + white_space_esc <- "\\s" + non_white_space_esc <- "\\S" + tab_esc <- "\\t" + vertical_tab_esc <- "\\v" + word_esc <- "\\w" + non_word_esc <- "\\W" + octal_esc <- "\\0" [0-7]+ + hex_esc <- "\\x" [0-9a-fA-F] [0-9a-fA-F] + unicode_esc <- "\\u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] + null_esc <- "\\0" + literal_esc <- "\\" .