| Index: updateRules.py |
| =================================================================== |
| --- a/updateRules.py |
| +++ b/updateRules.py |
| @@ -233,31 +233,16 @@ additionalTLDs = [ |
| # From http://en.wikipedia.org/wiki/.zm |
| 'ac.zm', 'co.zm', 'com.zm', 'edu.zm', 'gov.zm', 'net.zm', 'org.zm', 'sch.zm', |
| # From http://en.wikipedia.org/wiki/.zw |
| 'co.zw', 'ac.zw', 'org.zw', |
| ] |
| -def getSuffixes(target, items): |
| - suffixes = {} |
| - for item, priority in items.iteritems(): |
| - suffix = item[-1] if len(item) else '' |
| - if not suffix in suffixes: |
| - suffixes[suffix] = {} |
| - suffixes[suffix][item[:-1]] = priority |
| - for suffix, items in suffixes.iteritems(): |
| - if len(items.keys()) == 1: |
| - item, priority = items.items()[0] |
| - target[suffix] = ''.join(reversed(item)) + ' ' + str(priority) |
| - else: |
| - target[suffix] = {} |
| - getSuffixes(target[suffix], items) |
| - |
| def urlopen(url, attempts=3): |
| """ |
| Tries to open a particular URL, retries on failure. |
| """ |
| for i in range(attempts): |
| try: |
| return urllib.urlopen(url) |
| except IOError, e: |
| @@ -286,20 +271,16 @@ def iterateTLDs(): |
| if tld: |
| yield tld |
| def getTLDs(domains, tldPriority): |
| for tld in iterateTLDs(): |
| if not tld in domains: |
| domains[tld] = tldPriority |
| -def updateSchemes(rules): |
| - rules['scheme'] = {} |
| - getSuffixes(rules['scheme'], schemes) |
| - |
| def updateDomains(rules): |
| domains = {} |
| reader = codecs.getreader('utf-8')(sys.stdin) |
| i = 0 |
| for domain in itertools.chain(reader.readlines(), additionalDomains): |
| domain = domain.rstrip() |
| if not domain or domain in domains: |
| continue |
| @@ -319,18 +300,17 @@ def updateDomains(rules): |
| if not domain: |
| break |
| if not domain in domains or domains[domain] < priority - maxPriority: |
| domains[domain] = priority - maxPriority |
| # Fill up with "official" TLDs |
| getTLDs(domains, -maxPriority) |
| - rules['domain'] = {} |
| - getSuffixes(rules['domain'], domains) |
| + rules['domain'] = domains |
| def writeRules(rules): |
| path = os.path.join('defaults', 'rules.json') |
|
Thomas Greiner
2012/09/25 12:31:37
Do you really want the file to be located at /defa
Wladimir Palant
2012/09/25 13:26:00
Actually, it's defaults/rules.json (relative to th
|
| file = codecs.open(path, 'rb', encoding='utf-8') |
| data = file.read() |
| file.close() |
| marker = '// Automatically generated dictionaries' |
| @@ -342,14 +322,14 @@ def writeRules(rules): |
| file = codecs.open(path, 'wb', encoding='utf-8') |
| file.write(data) |
| file.close() |
| def updateRules(): |
| rules = {} |
| rules['domainReferrals'] = domainReferrals |
| - updateSchemes(rules) |
| + rules['scheme'] = schemes |
| updateDomains(rules) |
| writeRules(rules) |
| if __name__ == "__main__": |
| updateRules() |