Fixing character set ranges

Certain escape sequences are not considered in ranges, and lead to the
hyphen to be part of the matched set of characters
This commit is contained in:
Jeff Avallone 2014-12-22 16:09:27 -05:00
parent d882ee8e08
commit b35dece228
2 changed files with 31 additions and 3 deletions

View File

@ -1,6 +1,7 @@
import javascript from 'src/js/parser/javascript/parser.js'; import javascript from 'src/js/parser/javascript/parser.js';
import util from 'src/js/util.js'; import util from 'src/js/util.js';
import Q from 'q'; import Q from 'q';
import _ from 'lodash';
describe('parser/javascript/charset_range.js', function() { describe('parser/javascript/charset_range.js', function() {
@ -17,6 +18,20 @@ describe('parser/javascript/charset_range.js', function() {
})); }));
}); });
_.each([
'\\d-a',
'\\D-a',
'\\s-a',
'\\S-a',
'\\w-a',
'\\W-a'
], str => {
it(`does not parse "${str}" as a CharsetRange`, function() {
var parser = new javascript.Parser(str);
expect(parser.__consume__charset_range()).toEqual(null);
});
});
it('throws an exception when the range is out of order', function() { it('throws an exception when the range is out of order', function() {
var parser = new javascript.Parser('z-a'); var parser = new javascript.Parser('z-a');
expect(() => { expect(() => {

View File

@ -17,9 +17,11 @@ grammar JavascriptRegexp
subexp_positive_lookahead <- "?=" subexp_positive_lookahead <- "?="
subexp_negative_lookahead <- "?!" subexp_negative_lookahead <- "?!"
charset <- "[" invert:"^"? parts:( charset_range / charset_terminal )* "]" <Charset> charset <- "[" invert:"^"? parts:( charset_range / charset_terminal )* "]" <Charset>
charset_range <- first:charset_terminal "-" last:charset_terminal <CharsetRange> charset_range <- first:charset_range_terminal "-" last:charset_range_terminal <CharsetRange>
charset_terminal <- charset_escape <CharsetEscape> charset_terminal <- charset_escape <CharsetEscape>
/ charset_literal <Literal> / charset_literal <Literal>
charset_range_terminal <- charset_range_escape <CharsetEscape>
/ charset_literal <Literal>
charset_escape <- "\\" esc:( charset_escape <- "\\" esc:(
code:[bdDfnrsStvwW] arg:""? code:[bdDfnrsStvwW] arg:""?
/ code:"c" arg:[a-zA-Z] / code:"c" arg:[a-zA-Z]
@ -27,7 +29,16 @@ grammar JavascriptRegexp
/ code:"x" arg:( [0-9a-fA-F] [0-9a-fA-F] ) / code:"x" arg:( [0-9a-fA-F] [0-9a-fA-F] )
/ code:"u" arg:( [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] ) / code:"u" arg:( [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] )
/ code:"0" arg:""? ) / code:"0" arg:""? )
charset_literal <- ( ""? literal:[^\\\]] ) / ( literal:"\\" &"c" ) / ( "\\" literal:. ) charset_range_escape <- "\\" esc:(
code:[bfnrtv] arg:""?
/ code:"c" arg:[a-zA-Z]
/ code:"0" arg:[0-7]+
/ code:"x" arg:( [0-9a-fA-F] [0-9a-fA-F] )
/ code:"u" arg:( [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] )
/ code:"0" arg:""? )
charset_literal <- ( ""? literal:[^\\\]] )
/ ( literal:"\\" &"c" )
/ ( "\\" literal:[^bdDfnrsStvwW] )
terminal <- "." <AnyCharacter> terminal <- "." <AnyCharacter>
/ escape <Escape> / escape <Escape>
/ literal <Literal> / literal <Literal>
@ -38,4 +49,6 @@ grammar JavascriptRegexp
/ code:"x" arg:( [0-9a-fA-F] [0-9a-fA-F] ) / code:"x" arg:( [0-9a-fA-F] [0-9a-fA-F] )
/ code:"u" arg:( [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] ) / code:"u" arg:( [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] )
/ code:"0" arg:""? ) / code:"0" arg:""? )
literal <- ( ""? literal:[^|\\/.\[\(\)?+*$^] ) / ( literal:"\\" &"c" ) / ( "\\" literal:. ) literal <- ( ""? literal:[^|\\/.\[\(\)?+*$^] )
/ ( literal:"\\" &"c" )
/ ( "\\" literal:. )