
182 lines
4.4 KiB
Raw Permalink Normal View History

"use strict";
module.exports = parse;
var re_ws = /^\s/,
re_name = /^(?:\\.|[\w\-\u00c0-\uFFFF])+/,
re_escape = /\\([\da-f]{1,6}\s?|(\s)|.)/ig,
//modified version of
re_attr = /^\s*((?:\\.|[\w\u00c0-\uFFFF\-])+)\s*(?:(\S?)=\s*(?:(['"])(.*?)\3|(#?(?:\\.|[\w\u00c0-\uFFFF\-])*)|)|)\s*(i)?\]/;
var actionTypes = {
__proto__: null,
"undefined": "exists",
"": "equals",
"~": "element",
"^": "start",
"$": "end",
"*": "any",
"!": "not",
"|": "hyphen"
var simpleSelectors = {
__proto__: null,
">": "child",
"<": "parent",
"~": "sibling",
"+": "adjacent"
var attribSelectors = {
__proto__: null,
"#": ["id", "equals"],
".": ["class", "element"]
//unescape function taken from
function funescape( _, escaped, escapedWhitespace ) {
var high = "0x" + escaped - 0x10000;
// NaN means non-codepoint
// Support: Firefox
// Workaround erroneous numeric interpretation of +"0x"
return high !== high || escapedWhitespace ?
escaped :
// BMP codepoint
high < 0 ?
String.fromCharCode( high + 0x10000 ) :
// Supplemental Plane codepoint (surrogate pair)
String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 );
function unescapeCSS(str){
return str.replace(re_escape, funescape);
function getClosingPos(selector){
var pos = 1, counter = 1, len = selector.length;
for(; counter > 0 && pos < len; pos++){
if(selector.charAt(pos) === "(") counter++;
else if(selector.charAt(pos) === ")") counter--;
return pos;
function parse(selector, options){
selector = (selector + "").trimLeft();
var subselects = [],
tokens = [],
sawWS = false,
data, firstChar, name;
function getName(){
var sub = selector.match(re_name)[0];
selector = selector.substr(sub.length);
return unescapeCSS(sub);
while(selector !== ""){
tokens.push({type: "descendant"});
sawWS = false;
name = getName();
if(!options || ("lowerCaseTags" in options ? options.lowerCaseTags : !options.xmlMode)){
name = name.toLowerCase();
tokens.push({type: "tag", name: name});
} else if(re_ws.test(selector)){
sawWS = true;
selector = selector.trimLeft();
} else {
firstChar = selector.charAt(0);
selector = selector.substr(1);
if(firstChar in simpleSelectors){
tokens.push({type: simpleSelectors[firstChar]});
selector = selector.trimLeft();
sawWS = false;
} else if(firstChar === ","){
if(tokens.length === 0){
throw new SyntaxError("empty sub-selector");
tokens = [];
selector = selector.trimLeft();
sawWS = false;
} else if(sawWS){
tokens.push({type: "descendant"});
sawWS = false;
if(firstChar === "*"){
tokens.push({type: "universal"});
} else if(firstChar in attribSelectors){
type: "attribute",
name: attribSelectors[firstChar][0],
action: attribSelectors[firstChar][1],
value: getName(),
ignoreCase: false
} else if(firstChar === "["){
data = selector.match(re_attr);
throw new SyntaxError("Malformed attribute selector: " + selector);
selector = selector.substr(data[0].length);
name = unescapeCSS(data[1]);
!options || (
"lowerCaseAttributeNames" in options ?
options.lowerCaseAttributeNames :
name = name.toLowerCase();
type: "attribute",
name: name,
action: actionTypes[data[2]],
value: unescapeCSS(data[4] || data[5] || ""),
ignoreCase: !!data[6]
} else if(firstChar === ":"){
//if(selector.charAt(0) === ":"){} //TODO pseudo-element
name = getName().toLowerCase();
data = null;
if(selector.charAt(0) === "("){
var pos = getClosingPos(selector);
data = selector.substr(1, pos - 2);
selector = selector.substr(pos);
tokens.push({type: "pseudo", name: name, data: data});
} else {
//otherwise, the parser needs to throw or it would enter an infinite loop
throw new SyntaxError("Unmatched selector: " + firstChar + selector);
if(subselects.length > 0 && tokens.length === 0){
throw new SyntaxError("empty sub-selector");
return subselects;