注意:之后的讲解通通以div > p + .aaron[type="checkbox"], #id:first-child
这个选择器为例
词法解析
拿到这个选择器之后首先要做的事情是把它分解成一个个单元,单元结构如下
{
value: '匹配到的字符串',
type: '对应的Token类型',
matches: '正则匹配到的一个结构'
}
上面的选择器拆分之后就是
[
[
{ value: "div", type: "TAG", matches: ["div"] },
{ value: " > ", type: ">" },
{ value: "p", type: "TAG", matches: ["p"] },
{ value: " + ", type: "+" },
{ value: ".aaron", type: "CLASS", matches: ["aaron"] },
{ value: "[type='checkbox']", type: "ATTR", matches: ["type", "=", "checkbox"] }
],
[
{ value: "#id", type: "ID", matches: ["id"] },
{ value: ":first-child", type: "CHILD", matches: Array }
]
]
这一个过程叫做词法解析
Sizzle.tokenize
Sizzle.tokenize()
就是词法解析的接口,将css选择器传入就能得到上面的Token序列
,接下来用代码去实现。
整体架构
首先搭建一个大体的框架,和jQuery整体架构一样,外部用一个即时函数包裹
(function(window, undefined) {
var Sizzle = function() {
}
window.Sizzle = Sizzle;
})(window);
主体
注释已经做了很详细的讲解了,剩下的就是在debug模式下一步步的调试,看代码是如何执行的。其实词法解析这一篇内容大可以跳过,因为它的任务只是将css选择器解析成Token序列
,不理解Sizzle.tokenize
的源码也不影响接下来的代码学习,只需要有解析这个步骤罢了。至于怎么解析,要是能力很强的话可以自己写一个,像我这种菜鸟选择照搬jQuery源码
Sizzle.tokenize = function (selector, parseOnly) {
var matched, // 用于判断每执行一次while循环是否解析成功,如果为false说明css选择器有错误,直接终止循环
match, // 正则匹配结果
tokens, // Token序列
soFar = selector, // 表示字符串未解析的部分
groups = [], // 存放已经解析好的Token序列
preFilters = Expr.preFilter, // 预处理用
cached = tokenCache[selector]; // 缓存
if (cached) {
return parseOnly ? 0 : cached;
}
while (soFar) {
// 检查是否有逗号,有逗号的话就是多个Token序列
// 比如像'div > p + .aaron[type="checkbox"], #id:first-child'这个选择器就是两个Token序列
// groups = [
// [序列一], // div > p + .aaron[type="checkbox"]
// [序列二] // #id:first-child
// ]
if (!matched || (match = rcomma.exec(soFar))) {
if (match) {
// 清除逗号
soFar = soFar.slice(match[0].length);
}
// 每有一个逗号,都要往groups压入一个Token序列
// 然后就是最开始的时候也要往groups压入一个Token
groups.push(tokens = []);
}
matched = false;
// 处理特殊的Token:>, +, 空格, ~
if (match = rcombinators.exec(soFar)) {
matched = match.shift();
tokens.push({
value: matched,
type: match[0].replace(rtrim, " ")
});
// 处理完之后将其从待处理的字符中删除
soFar = soFar.slice(matched.length);
}
// 这里开始分析这几种Token:TAG, ID, CLASS, ATTR, CHILD, PSEUDO, NAME
// 如果通过正则匹配到了Token格式:match = matchExpr[ type ].exec( soFar )
// 需要预处理的Token:ATTR, CHILD, PSEUDO
// 交给预处理器处理:match = preFilters[ type ]( match )
for (var type in Expr.filter) {
if ( match = matchExpr[type].exec(soFar) ) {
// 预处理
if (preFilters[type]) {
match = preFilters[type](match);
}
matched = match.shift();
tokens.push({
value: matched,
type: type,
matches: match
})
// 处理完之后将其从待处理的字符中删除
soFar = soFar.slice(matched.length);
}
}
// 如果到了这里都还没matched到,那么说明这个选择器在这里有错误
// 直接中断词法分析过程
// 这就是Sizzle对词法分析的异常处理
if (!matched) {
break;
}
}
// 如果只需要这个接口检查选择器的合法性,直接就返回soFar的剩余长度,因为soFar长度大于零说明选择器不合法
// 其余情况,如果soFar不等于"",抛出异常;否则把groups记录在cache里边并返回
return parseOnly ?
soFar.length :
soFar ?
Sizzle.error(selector) :
// 这里对选择器(selector)做一个缓存
tokenCache(selector, groups);
}
其余代码
如果只有上面的代码是运行不了的,因为上面代码所用到的一些变量和方法是未定义的,接下来把那些缺失的变量和方法添上
定义变量
这些变量几乎都是正则表达式,用来匹配不同Token(TAG, ID, CLASS, ATTR, CHILD, PSEUDO, NAME)
的。表达式不用去研究了,只需要明白每个表达式是匹配哪种Token的就行了。我之前尝试去弄懂为什么表达式要那样写,然而到现在我都没弄懂,而且研究源码也不是每个细节都需要弄懂,只要把整体思路理解了就行了。
var
// 选择器缓存
tokenCache = createCache(),
booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|" +
"ismap|loop|multiple|open|readonly|required|scoped",
// 空白字符
whitespace = "[\\x20\\t\\r\\n\\f]",
// ClassName或ID Name
identifier = "(?:\\\\[\\da-fA-F]{1,6}" + whitespace +
"?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+",
// css选择器属性,如[type="phone"]
attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace +
// Operator (capture 2)
"*([*^$|!~]?=)" + whitespace +
// "Attribute values must be CSS identifiers [capture 5]
// or strings [capture 3 or capture 4]"
"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" +
whitespace + "*\\]",
pseudos = ":(" + identifier + ")(?:\\((" +
// To reduce the number of selectors needing tokenize in the preFilter, prefer arguments:
// 1. quoted (capture 3; capture 4 or capture 5)
"('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" +
// 2. simple (capture 6)
"((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" +
// 3. anything else (capture 2)
".*" +
")\\)|)",
// Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter
rwhitespace = new RegExp( whitespace + "+", "g" ),
// 去除左右空格用
rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" +
whitespace + "+$", "g" ),
// 逗号
rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ),
// >, +, 空格, ~
rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace +
"*" ),
rdescend = new RegExp( whitespace + "|>" ),
rpseudo = new RegExp( pseudos ),
ridentifier = new RegExp( "^" + identifier + "$" ),
matchExpr = {
"ID": new RegExp( "^#(" + identifier + ")" ),
"CLASS": new RegExp( "^\\.(" + identifier + ")" ),
"TAG": new RegExp( "^(" + identifier + "|[*])" ),
"ATTR": new RegExp( "^" + attributes ),
"PSEUDO": new RegExp( "^" + pseudos ),
"CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" +
whitespace + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" +
whitespace + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ),
"bool": new RegExp( "^(?:" + booleans + ")$", "i" ),
// For use in libraries implementing .is()
// We use this for POS matching in `select`
"needsContext": new RegExp( "^" + whitespace +
"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace +
"*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" )
},
rhtml = /HTML$/i,
rinputs = /^(?:input|select|textarea|button)$/i,
rheader = /^h\d$/i,
rnative = /^[^{]+\{\s*\[native \w/,
// Easily-parseable/retrievable ID or TAG or CLASS selectors
rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,
rsibling = /[+~]/,
// CSS escapes
// http://www.w3.org/TR/CSS21/syndata.html#escaped-characters
runescape = new RegExp( "\\\\[\\da-fA-F]{1,6}" + whitespace + "?|\\\\([^\\r\\n\\f])", "g" ),
funescape = function( escape, nonHex ) {
var high = "0x" + escape.slice( 1 ) - 0x10000;
return nonHex ?
// Strip the backslash prefix from a non-hex escape sequence
nonHex :
// Replace a hexadecimal escape sequence with the encoded Unicode code point
// Support: IE <=11+
// For values outside the Basic Multilingual Plane (BMP), manually construct a
// surrogate pair
high < 0 ?
String.fromCharCode( high + 0x10000 ) :
String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 );
};
定义方法
var Expr = Sizzle.selectors = {
// 选择器缓存长度
cacheLength: 50,
// 预处理
preFilter: {
"ATTR": function( match ) {
match[ 1 ] = match[ 1 ].replace( runescape, funescape );
// Move the given value to match[3] whether quoted or unquoted
match[ 3 ] = ( match[ 3 ] || match[ 4 ] ||
match[ 5 ] || "" ).replace( runescape, funescape );
if ( match[ 2 ] === "~=" ) {
match[ 3 ] = " " + match[ 3 ] + " ";
}
return match.slice( 0, 4 );
},
"CHILD": function( match ) {
/* matches from matchExpr["CHILD"]
1 type (only|nth|...)
2 what (child|of-type)
3 argument (even|odd|\d*|\d*n([+-]\d+)?|...)
4 xn-component of xn+y argument ([+-]?\d*n|)
5 sign of xn-component
6 x of xn-component
7 sign of y-component
8 y of y-component
*/
match[ 1 ] = match[ 1 ].toLowerCase();
if ( match[ 1 ].slice( 0, 3 ) === "nth" ) {
// nth-* requires argument
if ( !match[ 3 ] ) {
Sizzle.error( match[ 0 ] );
}
// numeric x and y parameters for Expr.filter.CHILD
// remember that false/true cast respectively to 0/1
match[ 4 ] = +( match[ 4 ] ?
match[ 5 ] + ( match[ 6 ] || 1 ) :
2 * ( match[ 3 ] === "even" || match[ 3 ] === "odd" ) );
match[ 5 ] = +( ( match[ 7 ] + match[ 8 ] ) || match[ 3 ] === "odd" );
// other types prohibit arguments
} else if ( match[ 3 ] ) {
Sizzle.error( match[ 0 ] );
}
return match;
},
"PSEUDO": function( match ) {
var excess,
unquoted = !match[ 6 ] && match[ 2 ];
if ( matchExpr[ "CHILD" ].test( match[ 0 ] ) ) {
return null;
}
// Accept quoted arguments as-is
if ( match[ 3 ] ) {
match[ 2 ] = match[ 4 ] || match[ 5 ] || "";
// Strip excess characters from unquoted arguments
} else if ( unquoted && rpseudo.test( unquoted ) &&
// Get excess from tokenize (recursively)
( excess = tokenize( unquoted, true ) ) &&
// advance to the next closing parenthesis
( excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length ) ) {
// excess is a negative index
match[ 0 ] = match[ 0 ].slice( 0, excess );
match[ 2 ] = unquoted.slice( 0, excess );
}
// Return only captures needed by the pseudo filter method (type and argument)
return match.slice( 0, 3 );
}
},
filter: {
"TAG": function( nodeNameSelector ) {
var nodeName = nodeNameSelector.replace( runescape, funescape ).toLowerCase();
return nodeNameSelector === "*" ?
function() {
return true;
} :
function( elem ) {
return elem.nodeName && elem.nodeName.toLowerCase() === nodeName;
};
},
"CLASS": function( className ) {
var pattern = classCache[ className + " " ];
return pattern ||
( pattern = new RegExp( "(^|" + whitespace +
")" + className + "(" + whitespace + "|$)" ) ) && classCache(
className, function( elem ) {
return pattern.test(
typeof elem.className === "string" && elem.className ||
typeof elem.getAttribute !== "undefined" &&
elem.getAttribute( "class" ) ||
""
);
} );
},
"ATTR": function( name, operator, check ) {
return function( elem ) {
var result = Sizzle.attr( elem, name );
if ( result == null ) {
return operator === "!=";
}
if ( !operator ) {
return true;
}
result += "";
/* eslint-disable max-len */
return operator === "=" ? result === check :
operator === "!=" ? result !== check :
operator === "^=" ? check && result.indexOf( check ) === 0 :
operator === "*=" ? check && result.indexOf( check ) > -1 :
operator === "$=" ? check && result.slice( -check.length ) === check :
operator === "~=" ? ( " " + result.replace( rwhitespace, " " ) + " " ).indexOf( check ) > -1 :
operator === "|=" ? result === check || result.slice( 0, check.length + 1 ) === check + "-" :
false;
/* eslint-enable max-len */
};
},
"CHILD": function( type, what, _argument, first, last ) {
var simple = type.slice( 0, 3 ) !== "nth",
forward = type.slice( -4 ) !== "last",
ofType = what === "of-type";
return first === 1 && last === 0 ?
// Shortcut for :nth-*(n)
function( elem ) {
return !!elem.parentNode;
} :
function( elem, _context, xml ) {
var cache, uniqueCache, outerCache, node, nodeIndex, start,
dir = simple !== forward ? "nextSibling" : "previousSibling",
parent = elem.parentNode,
name = ofType && elem.nodeName.toLowerCase(),
useCache = !xml && !ofType,
diff = false;
if ( parent ) {
// :(first|last|only)-(child|of-type)
if ( simple ) {
while ( dir ) {
node = elem;
while ( ( node = node[ dir ] ) ) {
if ( ofType ?
node.nodeName.toLowerCase() === name :
node.nodeType === 1 ) {
return false;
}
}
// Reverse direction for :only-* (if we haven't yet done so)
start = dir = type === "only" && !start && "nextSibling";
}
return true;
}
start = [ forward ? parent.firstChild : parent.lastChild ];
// non-xml :nth-child(...) stores cache data on `parent`
if ( forward && useCache ) {
// Seek `elem` from a previously-cached index
// ...in a gzip-friendly way
node = parent;
outerCache = node[ expando ] || ( node[ expando ] = {} );
// Support: IE <9 only
// Defend against cloned attroperties (jQuery gh-1709)
uniqueCache = outerCache[ node.uniqueID ] ||
( outerCache[ node.uniqueID ] = {} );
cache = uniqueCache[ type ] || [];
nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ];
diff = nodeIndex && cache[ 2 ];
node = nodeIndex && parent.childNodes[ nodeIndex ];
while ( ( node = ++nodeIndex && node && node[ dir ] ||
// Fallback to seeking `elem` from the start
( diff = nodeIndex = 0 ) || start.pop() ) ) {
// When found, cache indexes on `parent` and break
if ( node.nodeType === 1 && ++diff && node === elem ) {
uniqueCache[ type ] = [ dirruns, nodeIndex, diff ];
break;
}
}
} else {
// Use previously-cached element index if available
if ( useCache ) {
// ...in a gzip-friendly way
node = elem;
outerCache = node[ expando ] || ( node[ expando ] = {} );
// Support: IE <9 only
// Defend against cloned attroperties (jQuery gh-1709)
uniqueCache = outerCache[ node.uniqueID ] ||
( outerCache[ node.uniqueID ] = {} );
cache = uniqueCache[ type ] || [];
nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ];
diff = nodeIndex;
}
// xml :nth-child(...)
// or :nth-last-child(...) or :nth(-last)?-of-type(...)
if ( diff === false ) {
// Use the same loop as above to seek `elem` from the start
while ( ( node = ++nodeIndex && node && node[ dir ] ||
( diff = nodeIndex = 0 ) || start.pop() ) ) {
if ( ( ofType ?
node.nodeName.toLowerCase() === name :
node.nodeType === 1 ) &&
++diff ) {
// Cache the index of each encountered element
if ( useCache ) {
outerCache = node[ expando ] ||
( node[ expando ] = {} );
// Support: IE <9 only
// Defend against cloned attroperties (jQuery gh-1709)
uniqueCache = outerCache[ node.uniqueID ] ||
( outerCache[ node.uniqueID ] = {} );
uniqueCache[ type ] = [ dirruns, diff ];
}
if ( node === elem ) {
break;
}
}
}
}
}
// Incorporate the offset, then check against cycle size
diff -= last;
return diff === first || ( diff % first === 0 && diff / first >= 0 );
}
};
},
"PSEUDO": function( pseudo, argument ) {
// pseudo-class names are case-insensitive
// http://www.w3.org/TR/selectors/#pseudo-classes
// Prioritize by case sensitivity in case custom pseudos are added with uppercase letters
// Remember that setFilters inherits from pseudos
var args,
fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] ||
Sizzle.error( "unsupported pseudo: " + pseudo );
// The user may use createPseudo to indicate that
// arguments are needed to create the filter function
// just as Sizzle does
if ( fn[ expando ] ) {
return fn( argument );
}
// But maintain support for old signatures
if ( fn.length > 1 ) {
args = [ pseudo, pseudo, "", argument ];
return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ?
markFunction( function( seed, matches ) {
var idx,
matched = fn( seed, argument ),
i = matched.length;
while ( i-- ) {
idx = indexOf( seed, matched[ i ] );
seed[ idx ] = !( matches[ idx ] = matched[ i ] );
}
} ) :
function( elem ) {
return fn( elem, 0, args );
};
}
return fn;
},
"ID": function( id ) {
var attrId = id.replace( runescape, funescape );
return function( elem ) {
return elem.getAttribute( "id" ) === attrId;
};
}
},
}
// 异常
Sizzle.error = function( msg ) {
throw new Error( "Syntax error, unrecognized expression: " + msg );
}
// 选择器缓存
function createCache() {
var keys = [];
function cache(key, value) {
if (keys.push(key) > Expr.cacheLength) {
delete cache[keys.shift()];
}
return (cache[key] = value);
}
return cache;
}
代码下载
传送门