/******************************************************************************* publicsuffixlist.js - an efficient javascript implementation to deal with Mozilla Foundation's Public Suffix List Copyright (C) 2013 Raymond Hill This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see {http://www.gnu.org/licenses/}. */ /*! Home: https://github.com/gorhill/publicsuffixlist.js */ 'use strict'; /* This code is mostly dumb: I consider this to be lower-level code, thus in order to ensure efficiency, the caller is responsible for sanitizing the inputs. */ /******************************************************************************/ // A single instance of PublicSuffixList is enough. ;(function(root) { /******************************************************************************/ var exceptions = new Map(); var rules = new Map(); // This value dictate how the search will be performed: // < this.cutoffLength = indexOf() // >= this.cutoffLength = binary search var cutoffLength = 256; var mustPunycode = /[^\w.*-]/; /******************************************************************************/ // In the context of this code, a domain is defined as: // "{label}.{public suffix}". // A single standalone label is a public suffix as per // http://publicsuffix.org/list/: // "If no rules match, the prevailing rule is '*' " // This means 'localhost' is not deemed a domain by this // code, since according to the definition above, it would be // evaluated as a public suffix. The caller is therefore responsible to // decide how to further interpret such public suffix. // // `hostname` must be a valid ascii-based hostname. function getDomain(hostname) { // A hostname starting with a dot is not a valid hostname. if ( !hostname || hostname.charAt(0) === '.' ) { return ''; } hostname = hostname.toLowerCase(); var suffix = getPublicSuffix(hostname); if ( suffix === hostname ) { return ''; } var pos = hostname.lastIndexOf('.', hostname.lastIndexOf('.', hostname.length - suffix.length) - 1); if ( pos <= 0 ) { return hostname; } return hostname.slice(pos + 1); } /******************************************************************************/ // Return longest public suffix. // // `hostname` must be a valid ascii-based string which respect hostname naming. function getPublicSuffix(hostname) { if ( !hostname ) { return ''; } // Since we slice down the hostname with each pass, the first match // is the longest, so no need to find all the matching rules. while ( true ) { let pos = hostname.indexOf('.'); if ( pos < 0 ) { return hostname; } if ( search(exceptions, hostname) ) { return hostname.slice(pos + 1); } if ( search(rules, hostname) ) { return hostname; } if ( search(rules, '*' + hostname.slice(pos)) ) { return hostname; } hostname = hostname.slice(pos + 1); } // unreachable } /******************************************************************************/ // Look up a specific hostname. function search(store, hostname) { // Extract TLD let tld, remainder; let pos = hostname.lastIndexOf('.'); if ( pos === -1 ) { tld = hostname; remainder = hostname; } else { tld = hostname.slice(pos + 1); remainder = hostname.slice(0, pos); } let substore = store.get(tld); if ( substore === undefined ) { return false; } // If substore is a string, use indexOf() if ( typeof substore === 'string' ) { return substore.indexOf(' ' + remainder + ' ') >= 0; } // It is an array: use binary search. let l = remainder.length; if ( l >= substore.length ) { return false; } let haystack = substore[l]; if ( haystack === null ) { return false; } let left = 0; let right = Math.floor(haystack.length / l + 0.5); while ( left < right ) { let i = left + right >> 1; let needle = haystack.substr(l*i, l); if ( remainder < needle ) { right = i; } else if ( remainder > needle ) { left = i + 1; } else { return true; } } return false; } /******************************************************************************/ // Parse and set a UTF-8 text-based suffix list. Format is same as found at: // http://publicsuffix.org/list/ // // `toAscii` is a converter from unicode to punycode. Required since the // Public Suffix List contains unicode characters. // Suggestion: use it's quite good. function parse(text, toAscii) { exceptions = new Map(); rules = new Map(); let lineBeg = 0; let textEnd = text.length; while ( lineBeg < textEnd ) { let lineEnd = text.indexOf('\n', lineBeg); if ( lineEnd < 0 ) { lineEnd = text.indexOf('\r', lineBeg); if ( lineEnd < 0 ) { lineEnd = textEnd; } } let line = text.slice(lineBeg, lineEnd).trim(); lineBeg = lineEnd + 1; if ( line.length === 0 ) { continue; } // Ignore comments let pos = line.indexOf('//'); if ( pos !== -1 ) { line = line.slice(0, pos); } // Ignore surrounding whitespaces line = line.trim(); if ( line.length === 0 ) { continue; } // Is this an exception rule? let store; if ( line.charAt(0) === '!' ) { store = exceptions; line = line.slice(1); } else { store = rules; } if ( mustPunycode.test(line) ) { line = toAscii(line); } // http://publicsuffix.org/list/: // "... all rules must be canonicalized in the normal way // for hostnames - lower-case, Punycode ..." line = line.toLowerCase(); // Extract TLD let tld; pos = line.lastIndexOf('.'); if ( pos === -1 ) { tld = line; } else { tld = line.slice(pos + 1); line = line.slice(0, pos); } // Store suffix using tld as key let substore = store.get(tld); if ( substore === undefined ) { store.set(tld, substore = []); } if ( line ) { substore.push(line); } } crystallize(exceptions); crystallize(rules); window.dispatchEvent(new CustomEvent('publicSuffixList')); } /******************************************************************************/ // Cristallize the storage of suffixes using optimal internal representation // for future look up. function crystallize(store) { for ( let entry of store ) { let tld = entry[0]; let suffixes = entry[1]; // No suffix if ( suffixes.length === 0 ) { store.set(tld, ''); continue; } // Concatenated list of suffixes less than cutoff length: // Store as string, lookup using indexOf() let s = suffixes.join(' '); if ( s.length < cutoffLength ) { store.set(tld, ' ' + s + ' '); continue; } // Concatenated list of suffixes greater or equal to cutoff length // Store as array keyed on suffix length, lookup using binary search. // I borrowed the idea to key on string length here: // http://ejohn.org/blog/dictionary-lookups-in-javascript/#comment-392072 let buckets = []; for ( let suffix of suffixes ) { let l = suffix.length; if ( buckets.length <= l ) { extendArray(buckets, l); } if ( buckets[l] === null ) { buckets[l] = []; } buckets[l].push(suffix); } for ( let i = 0; i < buckets.length; i++ ) { let bucket = buckets[i]; if ( bucket !== null ) { buckets[i] = bucket.sort().join(''); } } store.set(tld, buckets); } return store; } let extendArray = function(aa, rb) { for ( let i = aa.length; i <= rb; i++ ) { aa.push(null); } }; /******************************************************************************/ let selfieMagic = 3; let toSelfie = function() { return { magic: selfieMagic, rules: Array.from(rules), exceptions: Array.from(exceptions) }; }; let fromSelfie = function(selfie) { if ( selfie instanceof Object === false || selfie.magic !== selfieMagic ) { return false; } rules = new Map(selfie.rules); exceptions = new Map(selfie.exceptions); window.dispatchEvent(new CustomEvent('publicSuffixList')); return true; }; /******************************************************************************/ // Public API root = root || window; root.publicSuffixList = { 'version': '1.0', 'parse': parse, 'getDomain': getDomain, 'getPublicSuffix': getPublicSuffix, 'toSelfie': toSelfie, 'fromSelfie': fromSelfie }; /******************************************************************************/ })(this);