解析URL方案总结

对于一名前端工程来说,解析url这种操作是衡量是否合格的标准之一。今天,就来探讨几种神奇的解析方法。

(一)a标签自动解析URL

var a = document.createElement('a');
a.href = 'http://www.example.com/news.php?id=10#footer';

var div = document.createElement('div');
for (var key in a) {
    !(key in div) && !!a[key] && console.log(`${key} = ${a[key]}`);
}

输出结果如下所示:

relList = 
href = http://www.example.com/news.php?id=10#footer
origin = http://www.example.com
protocol = http:
host = www.example.com
hostname = www.example.com
pathname = /news.php
search = ?id=10
hash = #footer

这些都是a标签特有的属性,且a标签的该属性有值,也就是说,a标签自动完成了URL解析,对于前端来说,这曾经是解析URL最廉价的方式,如下为更加健壮的通用方法:

function parseURL(url) {
    var a =  document.createElement('a');
    a.href = url;
    return {
        source: url,
        protocol: a.protocol.replace(':',''),
        host: a.hostname,
        port: a.port,
        query: a.search,
        params: (function(){
            var ret = {},
                seg = a.search.replace(/^\?/,'').split('&'),
                len = seg.length, i = 0, s;
            for (;i

该方法绝对可靠

(二)JS URL API

var url = new URL('http://www.example.com:$88;9,[email protected]$/what??key=val?&{http://?query=2#45');
for (var key in url) {
    console.log(`${key} = ${url[key]}`);
}

Chrome下输出结果:

"href = http://www%2Eexample%2Ecom:$88%3B9,[email protected]$/what??key=val?&{http://?query=2#45"
"origin = http://www.abc.com$"
"protocol = http:"
username = www%2Eexample%2Ecom
password = $88%3B9,9
host = www.abc.com$
hostname = www.abc.com$
port = 
pathname = /what
search = ??key=val?&&
searchParams = %3Fkey=val%3F
hash = #123http://?query=2#45

如要获得hostname,有如下方法:

var getHostname = function(url) {
    return new URL(url).hostname;
};

该方法目前只有部分浏览器支持,兼容情况请查阅:https://developer.mozilla.org/zh-CN/docs/Web/API/URL/URL

(三)神级操作正则解析

var parseUrl = function(url) {
    var urlParseRE = /^\s*(((([^:\/#\?]+:)?(?:(\/\/)((?:(([^:@\/#\?]+)(?:\:([^:@\/#\?]+))?)@)?(([^:\/#\?\]\[]+|\[[^\/\]@#?]+\])(?:\:([0-9]+))?))?)?)?((\/?(?:[^\/\?#]+\/+)*)([^\?#]*)))?(\?[^#]+)?)(#.*)?/;

    var matches = urlParseRE.exec(url || "") || [];

    return {
        href:         matches[0] || "",
        hrefNoHash:   matches[1] || "",
        hrefNoSearch: matches[2] || "",
        domain:       matches[3] || "",
        protocol:     matches[4] || "",
        doubleSlash:  matches[5] || "",
        authority:    matches[6] || "",
        username:     matches[8] || "",
        password:     matches[9] || "",
        host:         matches[10] || "",
        hostname:     matches[11] || "",
        port:         matches[12] || "",
        pathname:     matches[13] || "",
        directory:    matches[14] || "",
        filename:     matches[15] || "",
        search:       matches[16] || "",
        hash:         matches[17] || ""
    };
};

解读:

/^                      #href
\s*
(                       #hrefNoHash
  (                     #hrefNoSearch
    (                   #domain
      ([^:\/#\?]+:)?    #protocol
      (?:
        (\/\/)          #doubleSlash
        (               #authority
          (?:
            (           #取结果时$7被跳过了,应该也用非捕获型括号(?:
              ([^:@\/#\?]+)     #username
              (?:
                \:
                ([^:@\/#\?]+)   #password
              )?
            )
            @
          )?
          (                     #host
            ([^:\/#\?\]\[]+|\[[^\/\]@#?]+\])    #hostname
            (?:
              \:
              ([0-9]+)  #port
            )?
          )
        )?
      )?
    )?
    (                   #pathname
      (\/?(?:[^\/\?#]+\/+)*)    #directory
      ([^\?#]*)         #filename
    )
  )?
  (\?[^#]+)?            #search
)
(#.*)?                  #hash
/

具体自行领会~

你可能感兴趣的:(解析URL方案总结)