Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: localeTools.py

Issue 29562599: Issue 5751 - Removing legacy gecko support (Closed)
Patch Set: Rebasing agains 5763, purging .properties code, addressing comments Created Oct. 4, 2017, 11:37 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # This Source Code Form is subject to the terms of the Mozilla Public 1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this 2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 4
5 import re 5 import re
6 import os 6 import os
7 import sys 7 import sys
8 import codecs 8 import codecs
9 import json 9 import json
10 import urlparse 10 import urlparse
11 import urllib 11 import urllib
12 import urllib2 12 import urllib2
13 import mimetypes 13 import mimetypes
14 from StringIO import StringIO 14 from StringIO import StringIO
15 from ConfigParser import SafeConfigParser 15 from ConfigParser import SafeConfigParser
16 from zipfile import ZipFile 16 from zipfile import ZipFile
17 from xml.parsers.expat import ParserCreate, XML_PARAM_ENTITY_PARSING_ALWAYS 17 from xml.parsers.expat import ParserCreate, XML_PARAM_ENTITY_PARSING_ALWAYS
18 18
19 CROWDIN_AP_URL = 'https://api.crowdin.com/api/project' 19 CROWDIN_AP_URL = 'https://api.crowdin.com/api/project'
20 20
21 CROWDIN_LANG_MAPPING = { 21 CROWDIN_LANG_MAPPING = {
tlucas 2017/10/04 11:48:39 Note: result of rebasing
22 'br': 'br-FR', 22 'br': 'br-FR',
23 'dsb': 'dsb-DE', 23 'dsb': 'dsb-DE',
24 'es': 'es-ES', 24 'es': 'es-ES',
25 'fur': 'fur-IT', 25 'fur': 'fur-IT',
26 'fy': 'fy-NL', 26 'fy': 'fy-NL',
27 'ga': 'ga-IE', 27 'ga': 'ga-IE',
28 'gu': 'gu-IN', 28 'gu': 'gu-IN',
29 'hsb': 'hsb-DE', 29 'hsb': 'hsb-DE',
30 'hy': 'hy-AM', 30 'hy': 'hy-AM',
31 'ml': 'ml-IN', 31 'ml': 'ml-IN',
(...skipping 23 matching lines...) Expand all
55 except urllib2.HTTPError as e: 55 except urllib2.HTTPError as e:
56 raise Exception('Server returned HTTP Error {}:\n{}'.format(e.code, 56 raise Exception('Server returned HTTP Error {}:\n{}'.format(e.code,
57 e.read())) 57 e.read()))
58 58
59 if not raw: 59 if not raw:
60 return json.loads(result) 60 return json.loads(result)
61 61
62 return result 62 return result
63 63
64 64
65 class OrderedDict(dict):
66 def __init__(self):
67 self.__order = []
68
69 def __setitem__(self, key, value):
70 self.__order.append(key)
71 dict.__setitem__(self, key, value)
72
73 def iteritems(self):
74 done = set()
75 for key in self.__order:
76 if not key in done and key in self:
77 yield (key, self[key])
78 done.add(key)
79
80
81 def escapeEntity(value):
82 return value.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;') .replace('"', '&quot;')
83
84
85 def unescapeEntity(value):
86 return value.replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>') .replace('&quot;', '"')
87
88
89 def parseDTDString(data, path):
90 result = []
91 currentComment = [None]
92
93 parser = ParserCreate()
94 parser.UseForeignDTD(True)
95 parser.SetParamEntityParsing(XML_PARAM_ENTITY_PARSING_ALWAYS)
96
97 def ExternalEntityRefHandler(context, base, systemId, publicId):
98 subparser = parser.ExternalEntityParserCreate(context, 'utf-8')
99 subparser.Parse(data.encode('utf-8'), True)
100 return 1
101
102 def CommentHandler(data):
103 currentComment[0] = data.strip()
104
105 def EntityDeclHandler(entityName, is_parameter_entity, value, base, systemId , publicId, notationName):
106 result.append((unescapeEntity(entityName), currentComment[0], unescapeEn tity(value.strip())))
107 currentComment[0] = None
108
109 parser.ExternalEntityRefHandler = ExternalEntityRefHandler
110 parser.CommentHandler = CommentHandler
111 parser.EntityDeclHandler = EntityDeclHandler
112 parser.Parse('<!DOCTYPE root SYSTEM "foo"><root/>', True)
113
114 for entry in result:
115 yield entry
116
117
118 def escapeProperty(value):
119 return value.replace('\n', '\\n')
120
121
122 def unescapeProperty(value):
123 return value.replace('\\n', '\n')
124
125
126 def parsePropertiesString(data, path):
127 currentComment = None
128 for line in data.splitlines():
129 match = re.search(r'^\s*[#!]\s*(.*)', line)
130 if match:
131 currentComment = match.group(1)
132 elif '=' in line:
133 key, value = line.split('=', 1)
134 yield (unescapeProperty(key), currentComment, unescapeProperty(value ))
135 currentComment = None
136 elif re.search(r'\S', line):
137 print >>sys.stderr, 'Unrecognized data in file %s: %s' % (path, line )
138
139
140 def parseString(data, path):
141 result = {'_origData': data}
142 if path.endswith('.dtd'):
143 it = parseDTDString(data, path)
144 elif path.endswith('.properties'):
145 it = parsePropertiesString(data, path)
146 else:
147 return None
148
149 for name, comment, value in it:
150 result[name] = value
151 return result
152
153
154 def readFile(path):
155 fileHandle = codecs.open(path, 'rb', encoding='utf-8')
156 data = fileHandle.read()
157 fileHandle.close()
158 return parseString(data, path)
159
160
161 def generateStringEntry(key, value, path):
162 if path.endswith('.dtd'):
163 return '<!ENTITY %s "%s">\n' % (escapeEntity(key), escapeEntity(value))
164 else:
165 return '%s=%s\n' % (escapeProperty(key), escapeProperty(value))
166
167
168 def toJSON(path):
169 fileHandle = codecs.open(path, 'rb', encoding='utf-8')
170 data = fileHandle.read()
171 fileHandle.close()
172
173 if path.endswith('.dtd'):
174 it = parseDTDString(data, path)
175 elif path.endswith('.properties'):
176 it = parsePropertiesString(data, path)
177 else:
178 return None
179
180 result = OrderedDict()
181 for name, comment, value in it:
182 obj = {'message': value}
183 if comment == None:
184 obj['description'] = name
185 else:
186 obj['description'] = '%s: %s' % (name, comment)
187 result[name] = obj
188 return json.dumps(result, ensure_ascii=False, indent=2)
189
190
191 def fromJSON(path, data):
192 data = json.loads(data)
193 if not data:
194 if os.path.exists(path):
195 os.remove(path)
196 return
197
198 dir = os.path.dirname(path)
199 if not os.path.exists(dir):
200 os.makedirs(dir)
201 file = codecs.open(path, 'wb', encoding='utf-8')
202 for key, value in data.iteritems():
203 file.write(generateStringEntry(key, value['message'], path))
204 file.close()
205
206
207 def preprocessChromeLocale(path, metadata, isMaster): 65 def preprocessChromeLocale(path, metadata, isMaster):
208 fileHandle = codecs.open(path, 'rb', encoding='utf-8') 66 fileHandle = codecs.open(path, 'rb', encoding='utf-8')
209 data = json.load(fileHandle) 67 data = json.load(fileHandle)
210 fileHandle.close() 68 fileHandle.close()
211 69
212 for key, value in data.iteritems(): 70 for key, value in data.iteritems():
213 if isMaster: 71 if isMaster:
214 # Make sure the key name is listed in the description 72 # Make sure the key name is listed in the description
215 if 'description' in value: 73 if 'description' in value:
216 value['description'] = '%s: %s' % (key, value['description']) 74 value['description'] = '%s: %s' % (key, value['description'])
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
261 locales.add(match.group(1)) 119 locales.add(match.group(1))
262 120
263 # We don't translate indvidual dialects of languages 121 # We don't translate indvidual dialects of languages
264 # other than English, Spanish, Portuguese and Chinese. 122 # other than English, Spanish, Portuguese and Chinese.
265 for locale in list(locales): 123 for locale in list(locales):
266 prefix = locale.split('-')[0] 124 prefix = locale.split('-')[0]
267 if prefix not in {'en', 'es', 'pt', 'zh'}: 125 if prefix not in {'en', 'es', 'pt', 'zh'}:
268 locales.remove(locale) 126 locales.remove(locale)
269 locales.add(prefix) 127 locales.add(prefix)
270 128
271 # Add languages with existing translations. 129 # Add languages with existing translations.
272 locales.update(localeConfig['locales']) 130 locales.update(localeConfig['locales'])
273 131
274 # Don't add the language we translate from as target translation. 132 # Don't add the language we translate from as target translation.
275 locales.remove(localeConfig['default_locale'].replace('_', '-')) 133 locales.remove(localeConfig['default_locale'].replace('_', '-'))
276 134
277 # Convert to locales understood by Crowdin. 135 # Convert to locales understood by Crowdin.
278 locales = {CROWDIN_LANG_MAPPING.get(locale, locale) for locale in locales} 136 locales = {CROWDIN_LANG_MAPPING.get(locale, locale) for locale in locales}
279 allowed = {locale['crowdin_code'] for locale in 137 allowed = {locale['crowdin_code'] for locale in
280 crowdin_request(projectName, 'supported-languages', key)} 138 crowdin_request(projectName, 'supported-languages', key)}
tlucas 2017/10/04 11:48:39 Note: result of rebasing
281 if not allowed.issuperset(locales): 139 if not allowed.issuperset(locales):
282 print "Warning, following locales aren't allowed by server: " + ', '.joi n(locales - allowed) 140 print "Warning, following locales aren't allowed by server: " + ', '.joi n(locales - allowed)
283 141
284 locales = sorted(locales & allowed) 142 locales = sorted(locales & allowed)
tlucas 2017/10/04 11:48:40 Note: result of rebasing
285 params = urllib.urlencode([('languages[]', locale) for locale in locales]) 143 params = urllib.urlencode([('languages[]', locale) for locale in locales])
286 crowdin_request(projectName, 'edit-project', key, post_data=params) 144 crowdin_request(projectName, 'edit-project', key, post_data=params)
287 145
288 146
289 def crowdin_prepare_upload(files): 147 def crowdin_prepare_upload(files):
290 """Create a post body and matching headers, which Crowdin can handle.""" 148 """Create a post body and matching headers, which Crowdin can handle."""
291 boundary = '----------ThIs_Is_tHe_bouNdaRY_$' 149 boundary = '----------ThIs_Is_tHe_bouNdaRY_$'
292 body = '' 150 body = ''
293 for name, data in files: 151 for name, data in files:
294 body += ( 152 body += (
(...skipping 20 matching lines...) Expand all
315 173
316 def updateTranslationMaster(localeConfig, metadata, dir, projectName, key): 174 def updateTranslationMaster(localeConfig, metadata, dir, projectName, key):
317 result = crowdin_request(projectName, 'info', key) 175 result = crowdin_request(projectName, 'info', key)
318 176
319 existing = set(map(lambda f: f['name'], result['files'])) 177 existing = set(map(lambda f: f['name'], result['files']))
320 add = [] 178 add = []
321 update = [] 179 update = []
322 for file in os.listdir(dir): 180 for file in os.listdir(dir):
323 path = os.path.join(dir, file) 181 path = os.path.join(dir, file)
324 if os.path.isfile(path): 182 if os.path.isfile(path):
325 if localeConfig['file_format'] == 'chrome-json' and file.endswith('. json'): 183 if file.endswith('.json'):
326 data = preprocessChromeLocale(path, metadata, True) 184 data = preprocessChromeLocale(path, metadata, True)
327 newName = file 185 newName = file
328 elif localeConfig['file_format'] == 'chrome-json': 186 else:
329 fileHandle = codecs.open(path, 'rb', encoding='utf-8') 187 fileHandle = codecs.open(path, 'rb', encoding='utf-8')
330 data = json.dumps({file: {'message': fileHandle.read()}}) 188 data = json.dumps({file: {'message': fileHandle.read()}})
331 fileHandle.close() 189 fileHandle.close()
332 newName = file + '.json' 190 newName = file + '.json'
333 else:
334 data = toJSON(path)
335 newName = file + '.json'
336 191
337 if data: 192 if data:
338 if newName in existing: 193 if newName in existing:
339 update.append((newName, data)) 194 update.append((newName, data))
340 existing.remove(newName) 195 existing.remove(newName)
341 else: 196 else:
342 add.append((newName, data)) 197 add.append((newName, data))
343 198
344 if len(add): 199 if len(add):
345 query = {'titles[{}]'.format(name): os.path.splitext(name)[0] 200 query = {'titles[{}]'.format(name): os.path.splitext(name)[0]
346 for name, _ in add} 201 for name, _ in add}
347 query['type'] = 'chrome' 202 query['type'] = 'chrome'
348 data, headers = crowdin_prepare_upload(add) 203 data, headers = crowdin_prepare_upload(add)
349 crowdin_request(projectName, 'add-file', key, query, post_data=data, 204 crowdin_request(projectName, 'add-file', key, query, post_data=data,
350 headers=headers) 205 headers=headers)
351 if len(update): 206 if len(update):
352 data, headers = crowdin_prepare_upload(update) 207 data, headers = crowdin_prepare_upload(update)
353 crowdin_request(projectName, 'update-file', key, post_data=data, 208 crowdin_request(projectName, 'update-file', key, post_data=data,
354 headers=headers) 209 headers=headers)
355 for file in existing: 210 for file in existing:
356 crowdin_request(projectName, 'delete-file', key, {'file': file}) 211 crowdin_request(projectName, 'delete-file', key, {'file': file})
357 212
358 213
359 def uploadTranslations(localeConfig, metadata, dir, locale, projectName, key): 214 def uploadTranslations(localeConfig, metadata, dir, locale, projectName, key):
360 files = [] 215 files = []
361 for file in os.listdir(dir): 216 for file in os.listdir(dir):
362 path = os.path.join(dir, file) 217 path = os.path.join(dir, file)
363 if os.path.isfile(path): 218 if os.path.isfile(path):
364 if localeConfig['file_format'] == 'chrome-json' and file.endswith('. json'): 219 if file.endswith('.json'):
365 data = preprocessChromeLocale(path, metadata, False) 220 data = preprocessChromeLocale(path, metadata, False)
366 newName = file 221 newName = file
367 elif localeConfig['file_format'] == 'chrome-json': 222 else:
368 fileHandle = codecs.open(path, 'rb', encoding='utf-8') 223 fileHandle = codecs.open(path, 'rb', encoding='utf-8')
369 data = json.dumps({file: {'message': fileHandle.read()}}) 224 data = json.dumps({file: {'message': fileHandle.read()}})
370 fileHandle.close() 225 fileHandle.close()
371 newName = file + '.json' 226 newName = file + '.json'
372 else:
373 data = toJSON(path)
374 newName = file + '.json'
375 227
376 if data: 228 if data:
377 files.append((newName, data)) 229 files.append((newName, data))
378 if len(files): 230 if len(files):
379 language = CROWDIN_LANG_MAPPING.get(locale, locale) 231 language = CROWDIN_LANG_MAPPING.get(locale, locale)
tlucas 2017/10/04 11:48:39 Note: result of rebasing
380 data, headers = crowdin_prepare_upload(files) 232 data, headers = crowdin_prepare_upload(files)
381 crowdin_request(projectName, 'upload-translation', key, 233 crowdin_request(projectName, 'upload-translation', key,
382 {'language': language}, post_data=data, 234 {'language': language}, post_data=data,
383 headers=headers) 235 headers=headers)
384 236
385 237
386 def getTranslations(localeConfig, projectName, key): 238 def getTranslations(localeConfig, projectName, key):
387 """Download all available translations from crowdin. 239 """Download all available translations from crowdin.
388 240
389 Trigger crowdin to build the available export, wait for crowdin to 241 Trigger crowdin to build the available export, wait for crowdin to
390 finish the job and download the generated zip afterwards. 242 finish the job and download the generated zip afterwards.
391 """ 243 """
392 crowdin_request(projectName, 'export', key) 244 crowdin_request(projectName, 'export', key)
393 245
394 result = crowdin_request(projectName, 'download/all.zip', key, raw=True) 246 result = crowdin_request(projectName, 'download/all.zip', key, raw=True)
395 zip = ZipFile(StringIO(result)) 247 zip = ZipFile(StringIO(result))
396 dirs = {} 248 dirs = {}
397 249
398 normalizedDefaultLocale = localeConfig['default_locale'] 250 normalizedDefaultLocale = localeConfig['default_locale'].replace('_', '-')
399 if localeConfig['name_format'] == 'ISO-15897':
400 normalizedDefaultLocale = normalizedDefaultLocale.replace('_', '-')
401 normalizedDefaultLocale = CROWDIN_LANG_MAPPING.get(normalizedDefaultLocale, 251 normalizedDefaultLocale = CROWDIN_LANG_MAPPING.get(normalizedDefaultLocale,
402 normalizedDefaultLocale) 252 normalizedDefaultLocale)
403 253
404 for info in zip.infolist(): 254 for info in zip.infolist():
405 if not info.filename.endswith('.json'): 255 if not info.filename.endswith('.json'):
406 continue 256 continue
407 257
408 dir, file = os.path.split(info.filename) 258 dir, file = os.path.split(info.filename)
409 if not re.match(r'^[\w\-]+$', dir) or dir == normalizedDefaultLocale: 259 if not re.match(r'^[\w\-]+$', dir) or dir == normalizedDefaultLocale:
410 continue 260 continue
411 if localeConfig['file_format'] == 'chrome-json' and file.count('.') == 1 : 261 if file.count('.') == 1:
412 origFile = file 262 origFile = file
413 else: 263 else:
414 origFile = re.sub(r'\.json$', '', file) 264 origFile = os.path.splitext(file)[0]
415 if (localeConfig['file_format'] == 'gecko-dtd' and
416 not origFile.endswith('.dtd') and
417 not origFile.endswith('.properties')):
418 continue
419 265
420 for key, value in CROWDIN_LANG_MAPPING.iteritems(): 266 for key, value in CROWDIN_LANG_MAPPING.iteritems():
tlucas 2017/10/04 11:48:39 Note: result of rebasing
421 if value == dir: 267 if value == dir:
422 dir = key 268 dir = key
423 if localeConfig['name_format'] == 'ISO-15897':
424 dir = dir.replace('-', '_')
425 269
426 data = zip.open(info.filename).read() 270 data = zip.open(info.filename).read()
427 if data == '[]': 271 if data == '[]':
428 continue 272 continue
429 273
430 if not dir in dirs: 274 if not dir in dirs:
431 dirs[dir] = set() 275 dirs[dir] = set()
432 dirs[dir].add(origFile) 276 dirs[dir].add(origFile)
433 277
434 path = os.path.join(localeConfig['base_path'], dir, origFile) 278 path = os.path.join(localeConfig['base_path'], dir, origFile)
435 if not os.path.exists(os.path.dirname(path)): 279 if not os.path.exists(os.path.dirname(path)):
436 os.makedirs(os.path.dirname(path)) 280 os.makedirs(os.path.dirname(path))
437 if localeConfig['file_format'] == 'chrome-json' and file.endswith('.json '): 281 if file.endswith('.json'):
438 postprocessChromeLocale(path, data) 282 postprocessChromeLocale(path, data)
439 elif localeConfig['file_format'] == 'chrome-json': 283 else:
440 data = json.loads(data) 284 data = json.loads(data)
441 if origFile in data: 285 if origFile in data:
442 fileHandle = codecs.open(path, 'wb', encoding='utf-8') 286 fileHandle = codecs.open(path, 'wb', encoding='utf-8')
443 fileHandle.write(data[origFile]['message']) 287 fileHandle.write(data[origFile]['message'])
444 fileHandle.close() 288 fileHandle.close()
445 else:
446 fromJSON(path, data)
447 289
448 # Remove any extra files 290 # Remove any extra files
449 for dir, files in dirs.iteritems(): 291 for dir, files in dirs.iteritems():
450 baseDir = os.path.join(localeConfig['base_path'], dir) 292 baseDir = os.path.join(localeConfig['base_path'], dir)
451 if not os.path.exists(baseDir): 293 if not os.path.exists(baseDir):
452 continue 294 continue
453 for file in os.listdir(baseDir): 295 for file in os.listdir(baseDir):
454 path = os.path.join(baseDir, file) 296 path = os.path.join(baseDir, file)
455 if os.path.isfile(path) and (file.endswith('.json') or file.endswith ('.properties') or file.endswith('.dtd')) and not file in files: 297 valid_extension = file.endswith('.json')
298 if os.path.isfile(path) and valid_extension and not file in files:
456 os.remove(path) 299 os.remove(path)
OLDNEW

Powered by Google App Engine
This is Rietveld