Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/url.js

Issue 5564089086509056: Issue 1801 - Use URL objects to process URLs in the background page (Closed)
Patch Set: Rebased and addressed comments Created Feb. 11, 2015, 5:06 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/basedomain.js ('k') | lib/whitelisting.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/url.js
===================================================================
--- a/lib/url.js
+++ b/lib/url.js
@@ -45,3 +45,116 @@
return URL;
})();
+
+/**
+ * Gets the IDN-decoded hostname from a URL object.
+ *
+ * @param {URL} [url]
+ * @return {string}
+ */
+function getDecodedHostname(url)
+{
+ let hostname = url.hostname;
+
+ if (hostname.indexOf("xn--") == -1)
+ return hostname;
+
+ return punycode.toUnicode(hostname);
+}
+exports.getDecodedHostname = getDecodedHostname;
+
+/**
+ * Gets the IDN-decoded hostname from the URL of a frame.
+ * If the URL don't have host information (like "about:blank"
+ * and "data:" URLs) it falls back to the parent frame.
+ *
+ * @param {Frame} [frame]
+ * @return {string}
+ */
+function extractHostFromFrame(frame)
+{
+ for (; frame; frame = frame.parent)
+ {
+ let hostname = getDecodedHostname(frame.url);
+ if (hostname)
+ return hostname;
+ }
+
+ return "";
+}
+exports.extractHostFromFrame = extractHostFromFrame;
+
+/**
+ * Converts a URL object into a string. For HTTP(S) URLs the hash and
+ * auth crendetials are stripped, and the hostname gets IDN-decoded.
+ *
+ * @param {URL} [url]
+ * @return {string}
+ */
+function stringifyURL(url)
+{
+ let protocol = url.protocol;
+ if (protocol != "http:" && protocol != "https:")
+ return url.href;
+
+ let host = getDecodedHostname(url);
+ if (url.port)
+ host += ":" + url.port;
+ return protocol + "//" + host + url.pathname + url.search;
+}
+exports.stringifyURL = stringifyURL;
+
+function isDomain(hostname)
+{
+ // No hostname or IPv4 address, also considering hexadecimal octets.
+ if (/^((0x[\da-f]+|\d+)(\.|$))*$/i.test(hostname))
+ return false;
+
+ // IPv6 address. Since there can't be colons in domains, we can
+ // just check whether there are any colons to exclude IPv6 addresses.
+ return hostname.indexOf(":") == -1;
+}
+
+function getBaseDomain(hostname)
+{
+ let bits = hostname.split(".");
+ let cutoff = bits.length - 2;
+
+ for (let i = 0; i < bits.length; i++)
+ {
+ let offset = publicSuffixes[bits.slice(i).join(".")];
+
+ if (typeof offset != "undefined")
+ {
+ cutoff = i - offset;
+ break;
+ }
+ }
+
+ if (cutoff <= 0)
+ return hostname;
+
+ return bits.slice(cutoff).join(".");
+}
+
+/**
+ * Checks whether the request's origin is different from the document's origin.
+ *
+ * @param {URL} [url] The request URL
+ * @param {string} [documentHost] The IDN-decoded hostname of the document
+ * @return {Boolean}
+ */
+function isThirdParty(url, documentHost)
+{
+ let requestHost = getDecodedHostname(url).replace(/\.+$/, "");
+ documentHost = documentHost.replace(/\.+$/, "");
+
+ if (requestHost == documentHost)
+ return false;
+
+ if (!isDomain(requestHost) || !isDomain(documentHost))
+ return true;
+
+ return getBaseDomain(requestHost) != getBaseDomain(documentHost);
+}
+exports.isThirdParty = isThirdParty;
« no previous file with comments | « lib/basedomain.js ('k') | lib/whitelisting.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld