Index: updateRules.py |
=================================================================== |
--- a/updateRules.py |
+++ b/updateRules.py |
@@ -233,31 +233,16 @@ additionalTLDs = [ |
# From http://en.wikipedia.org/wiki/.zm |
'ac.zm', 'co.zm', 'com.zm', 'edu.zm', 'gov.zm', 'net.zm', 'org.zm', 'sch.zm', |
# From http://en.wikipedia.org/wiki/.zw |
'co.zw', 'ac.zw', 'org.zw', |
] |
-def getSuffixes(target, items): |
- suffixes = {} |
- for item, priority in items.iteritems(): |
- suffix = item[-1] if len(item) else '' |
- if not suffix in suffixes: |
- suffixes[suffix] = {} |
- suffixes[suffix][item[:-1]] = priority |
- for suffix, items in suffixes.iteritems(): |
- if len(items.keys()) == 1: |
- item, priority = items.items()[0] |
- target[suffix] = ''.join(reversed(item)) + ' ' + str(priority) |
- else: |
- target[suffix] = {} |
- getSuffixes(target[suffix], items) |
- |
def urlopen(url, attempts=3): |
""" |
Tries to open a particular URL, retries on failure. |
""" |
for i in range(attempts): |
try: |
return urllib.urlopen(url) |
except IOError, e: |
@@ -286,20 +271,16 @@ def iterateTLDs(): |
if tld: |
yield tld |
def getTLDs(domains, tldPriority): |
for tld in iterateTLDs(): |
if not tld in domains: |
domains[tld] = tldPriority |
-def updateSchemes(rules): |
- rules['scheme'] = {} |
- getSuffixes(rules['scheme'], schemes) |
- |
def updateDomains(rules): |
domains = {} |
reader = codecs.getreader('utf-8')(sys.stdin) |
i = 0 |
for domain in itertools.chain(reader.readlines(), additionalDomains): |
domain = domain.rstrip() |
if not domain or domain in domains: |
continue |
@@ -319,18 +300,17 @@ def updateDomains(rules): |
if not domain: |
break |
if not domain in domains or domains[domain] < priority - maxPriority: |
domains[domain] = priority - maxPriority |
# Fill up with "official" TLDs |
getTLDs(domains, -maxPriority) |
- rules['domain'] = {} |
- getSuffixes(rules['domain'], domains) |
+ rules['domain'] = domains |
def writeRules(rules): |
path = os.path.join('defaults', 'rules.json') |
Thomas Greiner
2012/09/25 12:31:37
Do you really want the file to be located at /defa
Wladimir Palant
2012/09/25 13:26:00
Actually, it's defaults/rules.json (relative to th
|
file = codecs.open(path, 'rb', encoding='utf-8') |
data = file.read() |
file.close() |
marker = '// Automatically generated dictionaries' |
@@ -342,14 +322,14 @@ def writeRules(rules): |
file = codecs.open(path, 'wb', encoding='utf-8') |
file.write(data) |
file.close() |
def updateRules(): |
rules = {} |
rules['domainReferrals'] = domainReferrals |
- updateSchemes(rules) |
+ rules['scheme'] = schemes |
updateDomains(rules) |
writeRules(rules) |
if __name__ == "__main__": |
updateRules() |