localeTools.py - Issue 29561557: Issue 5763 - Target languages supported by Firefox

Side by Side Diff: localeTools.py

Issue 29561557: Issue 5763 - Target languages supported by Firefox (Closed)

Patch Set: Adressed Vasily's comments Created Oct. 2, 2017, 10:57 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # This Source Code Form is subject to the terms of the Mozilla Public	1 # This Source Code Form is subject to the terms of the Mozilla Public

2 # License, v. 2.0. If a copy of the MPL was not distributed with this	2 # License, v. 2.0. If a copy of the MPL was not distributed with this

3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.	3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

4	4

5 import re	5 import re

6 import os	6 import os

7 import sys	7 import sys

8 import codecs	8 import codecs

9 import json	9 import json

10 import urlparse	10 import urlparse

11 import urllib	11 import urllib

12 import urllib2	12 import urllib2

13 import mimetypes	13 import mimetypes

14 from StringIO import StringIO	14 from StringIO import StringIO

15 from ConfigParser import SafeConfigParser	15 from ConfigParser import SafeConfigParser

16 from zipfile import ZipFile	16 from zipfile import ZipFile

17 from xml.parsers.expat import ParserCreate, XML_PARAM_ENTITY_PARSING_ALWAYS	17 from xml.parsers.expat import ParserCreate, XML_PARAM_ENTITY_PARSING_ALWAYS

18	18

19 langMappingGecko = {	19 CROWDIN_AP_URL = 'https://api.crowdin.com/api/project'

20 'bn-BD': 'bn',	20

	21 CROWDIN_LANG_MAPPING = {

21 'br': 'br-FR',	22 'br': 'br-FR',

22 'dsb': 'dsb-DE',	23 'dsb': 'dsb-DE',

23 'fj-FJ': 'fj',	24 'es': 'es-ES',

	25 'fur': 'fur-IT',

	26 'fy': 'fy-NL',

	27 'ga': 'ga-IE',

	28 'gu': 'gu-IN',

24 'hsb': 'hsb-DE',	29 'hsb': 'hsb-DE',

25 'hi-IN': 'hi',	30 'hy': 'hy-AM',

26 'ml': 'ml-IN',	31 'ml': 'ml-IN',

27 'nb-NO': 'nb',	32 'nn': 'nn-NO',

	33 'pa': 'pa-IN',

28 'rm': 'rm-CH',	34 'rm': 'rm-CH',

29 'ta-LK': 'ta',	35 'si': 'si-LK',

30 'wo-SN': 'wo',	36 'sv': 'sv-SE',

	37 'ur': 'ur-PK',

31 }	38 }

32	39

33 langMappingChrome = {

34 'es-419': 'es-MX',

35 'es': 'es-ES',

36 'sv': 'sv-SE',

37 'ml': 'ml-IN',

38 'gu': 'gu-IN',

39 }

40

41 chromeLocales = [

42 'am',

43 'ar',

44 'bg',

45 'bn',

46 'ca',

47 'cs',

48 'da',

49 'de',

50 'el',

51 'en-GB',

52 'en-US',

53 'es-419',

54 'es',

55 'et',

56 'fa',

57 'fi',

58 'fil',

59 'fr',

60 'gu',

61 'he',

62 'hi',

63 'hr',

64 'hu',

65 'id',

66 'it',

67 'ja',

68 'kn',

69 'ko',

70 'lt',

71 'lv',

72 'ml',

73 'mr',

74 'ms',

75 'nb',

76 'nl',

77 'pl',

78 'pt-BR',

79 'pt-PT',

80 'ro',

81 'ru',

82 'sk',

83 'sl',

84 'sr',

85 'sv',

86 'sw',

87 'ta',

88 'te',

89 'th',

90 'tr',

91 'uk',

92 'vi',

93 'zh-CN',

94 'zh-TW',

95 ]

96

97 CROWDIN_AP_URL = 'https://api.crowdin.com/api/project'

98

99	40

100 def crowdin_request(project_name, action, key, get={}, post_data=None,	41 def crowdin_request(project_name, action, key, get={}, post_data=None,

101 headers={}, raw=False):	42 headers={}, raw=False):

102 """Perform a call to crowdin and raise an Exception on failure."""	43 """Perform a call to crowdin and raise an Exception on failure."""

103 request = urllib2.Request(	44 request = urllib2.Request(

104 '{}/{}/{}?{}'.format(CROWDIN_AP_URL,	45 '{}/{}/{}?{}'.format(CROWDIN_AP_URL,

105 urllib.quote(project_name),	46 urllib.quote(project_name),

106 urllib.quote(action),	47 urllib.quote(action),

107 urllib.urlencode(dict(get, key=key, json=1))),	48 urllib.urlencode(dict(get, key=key, json=1))),

108 post_data,	49 post_data,

(...skipping 29 matching lines...) Expand all Loading...
138	79

139	80

140 def escapeEntity(value):	81 def escapeEntity(value):

141 return value.replace('&', '&').replace('<', '<').replace('>', '>') .replace('"', '"')	82 return value.replace('&', '&').replace('<', '<').replace('>', '>') .replace('"', '"')

142	83

143	84

144 def unescapeEntity(value):	85 def unescapeEntity(value):

145 return value.replace('&', '&').replace('<', '<').replace('>', '>') .replace('"', '"')	86 return value.replace('&', '&').replace('<', '<').replace('>', '>') .replace('"', '"')

146	87

147	88

148 def mapLocale(type, locale):

149 mapping = langMappingChrome if type == 'ISO-15897' else langMappingGecko

150 return mapping.get(locale, locale)

151

152

153 def parseDTDString(data, path):	89 def parseDTDString(data, path):

154 result = []	90 result = []

155 currentComment = [None]	91 currentComment = [None]

156	92

157 parser = ParserCreate()	93 parser = ParserCreate()

158 parser.UseForeignDTD(True)	94 parser.UseForeignDTD(True)

159 parser.SetParamEntityParsing(XML_PARAM_ENTITY_PARSING_ALWAYS)	95 parser.SetParamEntityParsing(XML_PARAM_ENTITY_PARSING_ALWAYS)

160	96

161 def ExternalEntityRefHandler(context, base, systemId, publicId):	97 def ExternalEntityRefHandler(context, base, systemId, publicId):

162 subparser = parser.ExternalEntityParserCreate(context, 'utf-8')	98 subparser = parser.ExternalEntityParserCreate(context, 'utf-8')

(...skipping 134 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
297 for key, value in parsed.iteritems():	233 for key, value in parsed.iteritems():

298 if 'description' in value:	234 if 'description' in value:

299 del value['description']	235 del value['description']

300	236

301 file = codecs.open(path, 'wb', encoding='utf-8')	237 file = codecs.open(path, 'wb', encoding='utf-8')

302 json.dump(parsed, file, ensure_ascii=False, sort_keys=True, indent=2, separa tors=(',', ': '))	238 json.dump(parsed, file, ensure_ascii=False, sort_keys=True, indent=2, separa tors=(',', ': '))

303 file.close()	239 file.close()

304	240

305	241

306 def setupTranslations(localeConfig, projectName, key):	242 def setupTranslations(localeConfig, projectName, key):

307 # Make a new set from the locales list, mapping to Crowdin friendly format	243 locales = set()

308 locales = {mapLocale(localeConfig['name_format'], locale)

309 for locale in localeConfig['locales']}

310	244

311 # Fill up with locales that we don't have but the browser supports	245 # Languages supported by Firefox

312 if 'chrome' in localeConfig['target_platforms']:	246 data = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read ()

313 for locale in chromeLocales:	247 for match in re.finditer(r'&lang=([\w\-]+)"', data):

314 locales.add(mapLocale('ISO-15897', locale))	248 locales.add(match.group(1))

315	249

316 if 'gecko' in localeConfig['target_platforms']:	250 # Languages supported by Firefox Language Packs

317 firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/a ll.html').read()	251 data = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-to ols/').read()

318 for match in re.finditer(r'&lang=([\w\-]+)"', firefoxLocales):	252 for match in re.finditer(r'<tr>.*?</tr>', data, re.S):

319 locales.add(mapLocale('BCP-47', match.group(1)))	253 if match.group(0).find('Install Language Pack') >= 0:

320 langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/la nguage-tools/').read()	254 match2 = re.search(r'lang="([\w\-]+)"', match.group(0))

321 for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):	255 if match2:

322 if match.group(0).find('Install Language Pack') >= 0:	256 locales.add(match2.group(1))

323 match2 = re.search(r'lang="([\w\-]+)"', match.group(0))

324 if match2:

325 locales.add(mapLocale('BCP-47', match2.group(1)))

326	257

327 allowed = set()	258 # Languages supported by Chrome (excluding es-419)

328 allowedLocales = crowdin_request(projectName, 'supported-languages', key)	259 data = urllib2.urlopen('https://packages.debian.org/sid/all/chromium-l10n/fi lelist').read()

	260 for match in re.finditer(r'locales/(?!es-419)([\w\-]+)\.pak', data):

	261 locales.add(match.group(1))

329	262

330 for locale in allowedLocales:	263 # We don't translate indvidual dialects of languages

331 allowed.add(locale['crowdin_code'])	264 # other than English, Spanish, Portuguese and Chinese.

	265 for locale in list(locales):

	266 prefix = locale.split('-')[0]

	267 if prefix not in {'en', 'es', 'pt', 'zh'}:

	268 locales.remove(locale)

	269 locales.add(prefix)

	270

	271 # Add languages with existing translations.

	272 locales.update(localeConfig['locales'])

	273

	274 # Don't add the language we translate from as target translation.

	275 locales.remove(localeConfig['default_locale'].replace('_', '-'))

	276

	277 # Convert to locales understood by Crowdin.

	278 locales = {CROWDIN_LANG_MAPPING.get(locale, locale) for locale in locales}

	279 allowed = {locale['crowdin_code'] for locale in

	280 crowdin_request(projectName, 'supported-languages', key)}

332 if not allowed.issuperset(locales):	281 if not allowed.issuperset(locales):

333 print "Warning, following locales aren't allowed by server: " + ', '.joi n(locales - allowed)	282 print "Warning, following locales aren't allowed by server: " + ', '.joi n(locales - allowed)

334	283

335 locales = list(locales & allowed)	284 locales = sorted(locales & allowed)

336 locales.sort()

337 params = urllib.urlencode([('languages[]', locale) for locale in locales])	285 params = urllib.urlencode([('languages[]', locale) for locale in locales])

338

339 crowdin_request(projectName, 'edit-project', key, post_data=params)	286 crowdin_request(projectName, 'edit-project', key, post_data=params)

340	287

341	288

342 def crowdin_prepare_upload(files):	289 def crowdin_prepare_upload(files):

343 """Create a post body and matching headers, which Crowdin can handle."""	290 """Create a post body and matching headers, which Crowdin can handle."""

344 boundary = '----------ThIs_Is_tHe_bouNdaRY_$'	291 boundary = '----------ThIs_Is_tHe_bouNdaRY_$'

345 body = ''	292 body = ''

346 for name, data in files:	293 for name, data in files:

347 body += (	294 body += (

348 '--{boundary}\r\n'	295 '--{boundary}\r\n'

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
422 data = json.dumps({file: {'message': fileHandle.read()}})	369 data = json.dumps({file: {'message': fileHandle.read()}})

423 fileHandle.close()	370 fileHandle.close()

424 newName = file + '.json'	371 newName = file + '.json'

425 else:	372 else:

426 data = toJSON(path)	373 data = toJSON(path)

427 newName = file + '.json'	374 newName = file + '.json'

428	375

429 if data:	376 if data:

430 files.append((newName, data))	377 files.append((newName, data))

431 if len(files):	378 if len(files):

432 language = mapLocale(localeConfig['name_format'], locale)	379 language = CROWDIN_LANG_MAPPING.get(locale, locale)

433 data, headers = crowdin_prepare_upload(files)	380 data, headers = crowdin_prepare_upload(files)

434 crowdin_request(projectName, 'upload-translation', key,	381 crowdin_request(projectName, 'upload-translation', key,

435 {'language': language}, post_data=data,	382 {'language': language}, post_data=data,

436 headers=headers)	383 headers=headers)

437	384

438	385

439 def getTranslations(localeConfig, projectName, key):	386 def getTranslations(localeConfig, projectName, key):

440 """Download all available translations from crowdin.	387 """Download all available translations from crowdin.

441	388

442 Trigger crowdin to build the available export, wait for crowdin to	389 Trigger crowdin to build the available export, wait for crowdin to

443 finish the job and download the generated zip afterwards.	390 finish the job and download the generated zip afterwards.

444 """	391 """

445 crowdin_request(projectName, 'export', key)	392 crowdin_request(projectName, 'export', key)

446	393

447 result = crowdin_request(projectName, 'download/all.zip', key, raw=True)	394 result = crowdin_request(projectName, 'download/all.zip', key, raw=True)

448 zip = ZipFile(StringIO(result))	395 zip = ZipFile(StringIO(result))

449 dirs = {}	396 dirs = {}

450	397

451 normalizedDefaultLocale = localeConfig['default_locale']	398 normalizedDefaultLocale = localeConfig['default_locale']

452 if localeConfig['name_format'] == 'ISO-15897':	399 if localeConfig['name_format'] == 'ISO-15897':

453 normalizedDefaultLocale = normalizedDefaultLocale.replace('_', '-')	400 normalizedDefaultLocale = normalizedDefaultLocale.replace('_', '-')

454 normalizedDefaultLocale = mapLocale(localeConfig['name_format'],	401 normalizedDefaultLocale = CROWDIN_LANG_MAPPING.get(normalizedDefaultLocale,

455 normalizedDefaultLocale)	402 normalizedDefaultLocale)

456	403

457 for info in zip.infolist():	404 for info in zip.infolist():

458 if not info.filename.endswith('.json'):	405 if not info.filename.endswith('.json'):

459 continue	406 continue

460	407

461 dir, file = os.path.split(info.filename)	408 dir, file = os.path.split(info.filename)

462 if not re.match(r'^[\w\-]+$', dir) or dir == normalizedDefaultLocale:	409 if not re.match(r'^[\w\-]+$', dir) or dir == normalizedDefaultLocale:

463 continue	410 continue

464 if localeConfig['file_format'] == 'chrome-json' and file.count('.') == 1 :	411 if localeConfig['file_format'] == 'chrome-json' and file.count('.') == 1 :

465 origFile = file	412 origFile = file

466 else:	413 else:

467 origFile = re.sub(r'\.json$', '', file)	414 origFile = re.sub(r'\.json$', '', file)

468 if (localeConfig['file_format'] == 'gecko-dtd' and	415 if (localeConfig['file_format'] == 'gecko-dtd' and

469 not origFile.endswith('.dtd') and	416 not origFile.endswith('.dtd') and

470 not origFile.endswith('.properties')):	417 not origFile.endswith('.properties')):

471 continue	418 continue

472	419

473 if localeConfig['name_format'] == 'ISO-15897':	420 for key, value in CROWDIN_LANG_MAPPING.iteritems():

474 mapping = langMappingChrome

475 else:

476 mapping = langMappingGecko

477

478 for key, value in mapping.iteritems():

479 if value == dir:	421 if value == dir:

480 dir = key	422 dir = key

481 if localeConfig['name_format'] == 'ISO-15897':	423 if localeConfig['name_format'] == 'ISO-15897':

482 dir = dir.replace('-', '_')	424 dir = dir.replace('-', '_')

483	425

484 data = zip.open(info.filename).read()	426 data = zip.open(info.filename).read()

485 if data == '[]':	427 if data == '[]':

486 continue	428 continue

487	429

488 if not dir in dirs:	430 if not dir in dirs:

(...skipping 16 matching lines...) Expand all Loading...
505	447

506 # Remove any extra files	448 # Remove any extra files

507 for dir, files in dirs.iteritems():	449 for dir, files in dirs.iteritems():

508 baseDir = os.path.join(localeConfig['base_path'], dir)	450 baseDir = os.path.join(localeConfig['base_path'], dir)

509 if not os.path.exists(baseDir):	451 if not os.path.exists(baseDir):

510 continue	452 continue

511 for file in os.listdir(baseDir):	453 for file in os.listdir(baseDir):

512 path = os.path.join(baseDir, file)	454 path = os.path.join(baseDir, file)

513 if os.path.isfile(path) and (file.endswith('.json') or file.endswith ('.properties') or file.endswith('.dtd')) and not file in files:	455 if os.path.isfile(path) and (file.endswith('.json') or file.endswith ('.properties') or file.endswith('.dtd')) and not file in files:

514 os.remove(path)	456 os.remove(path)

OLD	NEW

« no previous file with comments | « no previous file | packagerChrome.py » ('j') | no next file with comments »