如何parsing一个URL到JavaScript中的主机名和path?

我想要一个string

var a = "http://example.com/aa/bb/" 

并将其处理成一个对象

 a.hostname == "example.com" 

 a.pathname == "/aa/bb" 
 var getLocation = function(href) { var l = document.createElement("a"); l.href = href; return l; }; var l = getLocation("http://example.com/path"); console.debug(l.hostname) >> "example.com" console.debug(l.pathname) >> "/path" 

在这里find: https : //gist.github.com/jlong​​/2428561

 var parser = document.createElement('a'); parser.href = "http://example.com:3000/pathname/?search=test#hash"; parser.protocol; // => "http:" parser.host; // => "example.com:3000" parser.hostname; // => "example.com" parser.port; // => "3000" parser.pathname; // => "/pathname/" parser.hash; // => "#hash" parser.search; // => "?search=test" parser.origin; // => "http://example.com:3000" 

现代的方式:

 new URL("http://example.com/aa/bb/") 

返回一个包含属性hostnamepathname的对象以及其他一些对象 。

第一个参数是相对或绝对URL; 如果它是相对的,那么你需要指定第二个参数(基本URL)。 例如,对于相对于当前页面的URL:

 new URL("/aa/bb/", location) 

除了浏览器之外, 这个API在 v7以后也可以在Node.js中使用 ,通过require('url').URL

这是一个简单的函数,使用模仿标签行为的正则expression式。

优点

  • 可预测的行为(没有跨浏览器问题)
  • 不需要DOM
  • 真的很短

缺点

  • 正则expression式有点难以阅读

 function getLocation(href) { var match = href.match(/^(https?\:)\/\/(([^:\/?#]*)(?:\:([0-9]+))?)([\/]{0,1}[^?#]*)(\?[^#]*|)(#.*|)$/); return match && { href: href, protocol: match[1], host: match[2], hostname: match[3], port: match[4], pathname: match[5], search: match[6], hash: match[7] } } 

 getLocation("http://example.com/"); /* { "protocol": "http:", "host": "example.com", "hostname": "example.com", "port": undefined, "pathname": "/" "search": "", "hash": "", } */ getLocation("http://example.com:3000/pathname/?search=test#hash"); /* { "protocol": "http:", "host": "example.com:3000", "hostname": "example.com", "port": "3000", "pathname": "/pathname/", "search": "?search=test", "hash": "#hash" } */ 

编辑:

这里是正则expression式的细分

 var reURLInformation = new RegExp([ '^(https?:)//', // protocol '(([^:/?#]*)(?::([0-9]+))?)', // host (hostname and port) '(/{0,1}[^?#]*)', // pathname '(\\?[^#]*|)', // search '(#.*|)$' // hash ].join('')); var match = href.match(reURLInformation); 

freddiefujiwara的答案是相当不错的,但我也需要在Internet Explorer中支持相对URL。 我想出了以下解决scheme:

 function getLocation(href) { var location = document.createElement("a"); location.href = href; // IE doesn't populate all link properties when setting .href with a relative URL, // however .href will return an absolute URL which then can be used on itself // to populate these additional fields. if (location.host == "") { location.href = location.href; } return location; }; 

现在使用它来获得所需的属性:

 var a = getLocation('http://example.com/aa/bb/'); document.write(a.hostname); document.write(a.pathname); 

JSFiddle示例: http : //jsfiddle.net/6AEAB/

您不需要创build任何新的URI或createElement。

 window.location; // => "http://example.com:3000/pathname/?search=test#hash" 

返回你想要的对象:

 var loc = window.location; loc.protocol; // => "http:" loc.host; // => "example.com:3000" loc.hostname; // => "example.com" loc.port; // => "3000" loc.pathname; // => "/pathname/" loc.hash; // => "#hash" loc.search; // => "?search=test" 

js-uri (在Google Code上提供)接受一个stringURL并从中parsing一个URI对象:

 var some_uri = new URI("http://www.example.com/foo/bar"); alert(some_uri.authority); // www.example.com alert(some_uri); // http://www.example.com/foo/bar var blah = new URI("blah"); var blah_full = blah.resolve(some_uri); alert(blah_full); // http://www.example.com/foo/blah 

简单的正则expression式呢?

 url = "http://www.example.com/path/to/somwhere"; urlParts = /^(?:\w+\:\/\/)?([^\/]+)(.*)$/.exec(url); hostname = urlParts[1]; // www.example.com path = urlParts[2]; // /path/to/somwhere 

这里是我从https://gist.github.com/1847816复制的版本,但是被重写,所以它更容易阅读和debugging。; 将锚点数据复制到名为“result”的另一个variables的目的是因为锚点数据相当长,因此将有限数量的值复制到结果中将有助于简化结果。

 /** * See: https://gist.github.com/1847816 * Parse a URI, returning an object similar to Location * Usage: var uri = parseUri("hello?search#hash") */ function parseUri(url) { var result = {}; var anchor = document.createElement('a'); anchor.href = url; var keys = 'protocol hostname host pathname port search hash href'.split(' '); for (var keyIndex in keys) { var currentKey = keys[keyIndex]; result[currentKey] = anchor[currentKey]; } result.toString = function() { return anchor.href; }; result.requestUri = result.pathname + result.search; return result; } 

对于那些寻求在IE,Firefox和Chrome中运行的现代解决scheme的用户:

这些使用超链接元素的解决scheme都不会在chrome中工作。 如果您将无效的(或空白的)URL传递给chrome,它将始终返回从其调用脚本的主机。 所以在IE中你会变得空白,而在Chrome中,你会得到本地主机(或其他)。

如果你想看看引用者,这是骗人的。 你将要确保你回来的主机是在原来的url来处理这个问题:

  function getHostNameFromUrl(url) { // <summary>Parses the domain/host from a given url.</summary> var a = document.createElement("a"); a.href = url; // Handle chrome which will default to domain where script is called from if invalid return url.indexOf(a.hostname) != -1 ? a.hostname : ''; } 

您也可以使用php.js项目中的parse_url()函数。

码:

 parse_url('http://username:password@hostname/path?arg=value#anchor'); 

结果:

 { scheme: 'http', host: 'hostname', user: 'username', pass: 'password', path: '/path', query: 'arg=value', fragment: 'anchor' } 

跨浏览器的URLparsing ,解决IE 6,7,8和9的相对path问题

 function ParsedUrl(url) { var parser = document.createElement("a"); parser.href = url; // IE 8 and 9 dont load the attributes "protocol" and "host" in case the source URL // is just a pathname, that is, "/example" and not "http://domain.com/example". parser.href = parser.href; // IE 7 and 6 wont load "protocol" and "host" even with the above workaround, // so we take the protocol/host from window.location and place them manually if (parser.host === "") { var newProtocolAndHost = window.location.protocol + "//" + window.location.host; if (url.charAt(1) === "/") { parser.href = newProtocolAndHost + url; } else { // the regex gets everything up to the last "/" // /path/takesEverythingUpToAndIncludingTheLastForwardSlash/thisIsIgnored // "/" is inserted before because IE takes it of from pathname var currentFolder = ("/"+parser.pathname).match(/.*\//)[0]; parser.href = newProtocolAndHost + currentFolder + url; } } // copies all the properties to this object var properties = ['host', 'hostname', 'hash', 'href', 'port', 'protocol', 'search']; for (var i = 0, n = properties.length; i < n; i++) { this[properties[i]] = parser[properties[i]]; } // pathname is special because IE takes the "/" of the starting of pathname this.pathname = (parser.pathname.charAt(0) !== "/" ? "/" : "") + parser.pathname; } 

用法( 演示JSFiddle在这里 ):

 var myUrl = new ParsedUrl("http://www.example.com:8080/path?query=123#fragment"); 

结果:

 { hash: "#fragment" host: "www.example.com:8080" hostname: "www.example.com" href: "http://www.example.com:8080/path?query=123#fragment" pathname: "/path" port: "8080" protocol: "http:" search: "?query=123" } 

AngularJS的方式 – 在这里提琴: http : //jsfiddle.net/PT5BG/4/

 <!DOCTYPE html> <html> <head> <title>Parse URL using AngularJS</title> </head> <body ng-app ng-controller="AppCtrl" ng-init="init()"> <h3>Parse URL using AngularJS</h3> url: <input type="text" ng-model="url" value="" style="width:780px;"> <ul> <li>href = {{parser.href}}</li> <li>protocol = {{parser.protocol}}</li> <li>host = {{parser.host}}</li> <li>hostname = {{parser.hostname}}</li> <li>port = {{parser.port}}</li> <li>pathname = {{parser.pathname}}</li> <li>hash = {{parser.hash}}</li> <li>search = {{parser.search}}</li> </ul> <script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.0.6/angular.min.js"></script> <script> function AppCtrl($scope) { $scope.$watch('url', function() { $scope.parser.href = $scope.url; }); $scope.init = function() { $scope.parser = document.createElement('a'); $scope.url = window.location; } } </script> </body> </html> 

使用模块模式的简单而强大的解决scheme。 这包括一个修复IE的pathname不总是有一个前置斜杠( / )。

我已经创build了一个JSFiddle ,它提供了一个更dynamic的parsing器。 我build议你检查一下并提供反馈。

 var URLParser = (function (document) { var PROPS = 'protocol hostname host pathname port search hash href'.split(' '); var self = function (url) { this.aEl = document.createElement('a'); this.parse(url); }; self.prototype.parse = function (url) { this.aEl.href = url; if (this.aEl.host == "") { this.aEl.href = this.aEl.href; } PROPS.forEach(function (prop) { switch (prop) { case 'hash': this[prop] = this.aEl[prop].substr(1); break; default: this[prop] = this.aEl[prop]; } }, this); if (this.pathname.indexOf('/') !== 0) { this.pathname = '/' + this.pathname; } this.requestUri = this.pathname + this.search; }; self.prototype.toObj = function () { var obj = {}; PROPS.forEach(function (prop) { obj[prop] = this[prop]; }, this); obj.requestUri = this.requestUri; return obj; }; self.prototype.toString = function () { return this.href; }; return self; })(document); 

演示

 var URLParser = (function(document) { var PROPS = 'protocol hostname host pathname port search hash href'.split(' '); var self = function(url) { this.aEl = document.createElement('a'); this.parse(url); }; self.prototype.parse = function(url) { this.aEl.href = url; if (this.aEl.host == "") { this.aEl.href = this.aEl.href; } PROPS.forEach(function(prop) { switch (prop) { case 'hash': this[prop] = this.aEl[prop].substr(1); break; default: this[prop] = this.aEl[prop]; } }, this); if (this.pathname.indexOf('/') !== 0) { this.pathname = '/' + this.pathname; } this.requestUri = this.pathname + this.search; }; self.prototype.toObj = function() { var obj = {}; PROPS.forEach(function(prop) { obj[prop] = this[prop]; }, this); obj.requestUri = this.requestUri; return obj; }; self.prototype.toString = function() { return this.href; }; return self; })(document); /* Main */ var out = document.getElementById('out'); var urls = [ 'https://www.example.org:5887/foo/bar?a=1&b=2#section-1', 'ftp://www.files.com:22/folder?id=7' ]; var parser = new URLParser(); urls.forEach(function(url) { parser.parse(url); println(out, JSON.stringify(parser.toObj(), undefined, ' '), 0, '#0000A7'); }); /* Utility functions */ function print(el, text, bgColor, fgColor) { var span = document.createElement('span'); span.innerHTML = text; span.style['backgroundColor'] = bgColor || '#FFFFFF'; span.style['color'] = fgColor || '#000000'; el.appendChild(span); } function println(el, text, bgColor, fgColor) { print(el, text, bgColor, fgColor); el.appendChild(document.createElement('br')); } 
 body { background: #444; } span { background-color: #fff; border: thin solid black; display: inline-block; } #out { display: block; font-family: Consolas, Menlo, Monaco, Lucida Console, Liberation Mono, DejaVu Sans Mono, Bitstream Vera Sans Mono, Courier New, monospace, serif; font-size: 12px; white-space: pre; } 
 <div id="out"></div> 

今天我遇到了这个问题,我发现: URL – MDN Web API

 var url = new URL("http://test.example.com/dir/subdir/file.html#hash"); 

这个回报:

 { hash:"#hash", host:"test.example.com", hostname:"test.example.com", href:"http://test.example.com/dir/subdir/file.html#hash", origin:"http://test.example.com", password:"", pathname:"/dir/subdir/file.html", port:"", protocol:"http:", search: "", username: "" } 

希望我的第一个贡献可以帮助你!

为此,请使用https://www.npmjs.com/package/uri-parse-lib

 var t = parserURI("http://user:pass@example.com:8080/directory/file.ext?query=1&next=4&sed=5#anchor"); 

停止重新发明轮子。 使用https://github.com/medialize/URI.js/

 var uri = new URI("http://example.org:80/foo/hello.html"); // get host uri.host(); // returns string "example.org:80" // set host uri.host("example.org:80"); 

只需使用url.js库(对于web和node.js)。

https://github.com/websanova/js-url

 url: http://example.com?param=test#param=again url('?param'); // test url('#param'); // again url('protocol'); // http url('port'); // 80 url('domain'); // example.com url('tld'); // com etc... 

为什么不使用它?

  $scope.get_location=function(url_str){ var parser = document.createElement('a'); parser.href =url_str;//"http://example.com:3000/pathname/?search=test#hash"; var info={ protocol:parser.protocol, hostname:parser.hostname, // => "example.com" port:parser.port, // => "3000" pathname:parser.pathname, // => "/pathname/" search:parser.search, // => "?search=test" hash:parser.hash, // => "#hash" host:parser.host, // => "example.com:3000" } return info; } alert( JSON.stringify( $scope.get_location("http://localhost:257/index.php/deploy/?asd=asd#asd"),null,4 ) ); 
 function parseUrl(url) { var m = url.match(/^(([^:\/?#]+:)?(?:\/\/(([^\/?#:]*)(?::([^\/?#:]*))?)))?([^?#]*)(\?[^#]*)?(#.*)?$/), r = { hash: m[8] || "", // #asd host: m[3] || "", // localhost:257 hostname: m[4] || "", // localhost href: m[0] || "", // http://localhost:257/deploy/?asd=asd#asd origin: m[1] || "", // http://localhost:257 pathname: m[6] || (m[1] ? "/" : ""), // /deploy/ port: m[5] || "", // 257 protocol: m[2] || "", // http: search: m[7] || "" // ?asd=asd }; if (r.protocol.length == 2) { r.protocol = "file:///" + r.protocol.toUpperCase(); r.origin = r.protocol + "//" + r.host; } r.href = r.origin + r.pathname + r.search + r.hash; return m && r; }; 

它适用于绝对和相对的url