 Issue 29633713:
  Issue 6077 - Create script to modify the list of search engines dynamically  (Closed)
    
  
    Issue 29633713:
  Issue 6077 - Create script to modify the list of search engines dynamically  (Closed) 
  | Left: | ||
| Right: | 
| OLD | NEW | 
|---|---|
| 1 # This file is part of Adblock Plus | 1 # This file is part of Adblock Plus | 
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH | 
| 3 # | 3 # | 
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify | 
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as | 
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. | 
| 7 # | 7 # | 
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, | 
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. | 
| 12 # | 12 # | 
| 13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License | 
| 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| 15 | 15 | 
| 16 import json | |
| 16 import os | 17 import os | 
| 17 import re | 18 import re | 
| 18 import sys | 19 import sys | 
| 19 | 20 | 
| 20 _LOCALE_RE = re.compile("^([a-z]{2,3}(?:-[A-Z]{2})?)$") | 21 _LOCALE_RE = re.compile("^([a-z]{2,3}(?:-[A-Z]{2})?)$") | 
| 21 _VALUES_LOCALE_RE = re.compile("^values-([a-z]{2,3}(?:-r[A-Z]{2})?)$") | 22 _VALUES_LOCALE_RE = re.compile("^values-([a-z]{2,3}(?:-r[A-Z]{2})?)$") | 
| 22 | 23 | 
| 24 _SEARCH_PROPS_RE = re.compile("^browser\.search\." | |
| 25 "(defaultenginename|order\.).*$") | |
| 26 _SHORTNAME_RE = re.compile("^<ShortName>(.*)</ShortName>$") | |
| 27 | |
| 23 _PROPERTY_FORMAT_RE = re.compile("^(([^=]*)=)(.*)$") | 28 _PROPERTY_FORMAT_RE = re.compile("^(([^=]*)=)(.*)$") | 
| 24 _ENTITY_FORMAT_RE = re.compile("^(\s*<!ENTITY\s*([^\"\s]*)\s*\")(.*)(\">)$") | 29 _ENTITY_FORMAT_RE = re.compile("^(\s*<!ENTITY\s*([^\"\s]*)\s*\")(.*)(\">)$") | 
| 25 _STRING_FORMAT_RE = re.compile( | 30 _STRING_FORMAT_RE = re.compile( | 
| 26 "^(\s*<string name=\"([^\"]*)\">)(.*)(</string>)$") | 31 "^(\s*<string name=\"([^\"]*)\">)(.*)(</string>)$") | 
| 27 | 32 | 
| 28 _MOZBUILD_PATH = os.path.join("python", "mozbuild") | 33 _MOZBUILD_PATH = os.path.join("python", "mozbuild") | 
| 29 | 34 | 
| 30 _CHROME_PATH = os.path.join("dist", "bin", "chrome") | 35 _CHROME_PATH = os.path.join("dist", "bin", "chrome") | 
| 31 _RES_PATH = os.path.join("mobile", "android", "base", "res") | 36 _RES_PATH = os.path.join("mobile", "android", "base", "res") | 
| 37 _I10N_PATH = os.path.join("abb-build", "l10n") | |
| 
diegocarloslima
2017/12/13 10:44:42
This var should be named L10N_PATH :)
 | |
| 38 _LISTJSON_PATH = os.path.join("mobile", "locales", "search") | |
| 39 _SEARCHPLUGINS_PATH = os.path.join("mobile", "locales", "searchplugins") | |
| 
diegocarloslima
2017/12/13 10:44:41
Maybe the ("mobile","locales") part could be extra
 | |
| 32 | 40 | 
| 33 _BROWSER_DIR = "browser" | 41 _BROWSER_DIR = "browser" | 
| 42 _REGION_PROPS_PATH = os.path.join(_BROWSER_DIR, "region.properties") | |
| 34 | 43 | 
| 35 _APPSTRINGS_PROPS_PATH = os.path.join(_BROWSER_DIR, "appstrings.properties") | 44 _APPSTRINGS_PROPS_PATH = os.path.join(_BROWSER_DIR, "appstrings.properties") | 
| 36 _STRINGS_XML_PATH = "strings.xml" | 45 _STRINGS_XML_PATH = "strings.xml" | 
| 46 _LIST_JSON = "list.json" | |
| 37 | 47 | 
| 38 _DEFAULT_LOCALE = "en-US" | 48 _DEFAULT_LOCALE = "en-US" | 
| 49 _KEY_VDF = "visibleDefaultEngines" | |
| 50 _KEY_LOC = "locales" | |
| 51 _KEY_DEF = "default" | |
| 
diegocarloslima
2017/12/13 10:44:42
I know that having short variable names helps to b
 | |
| 52 | |
| 53 # Add Ecosia as secondary search engine. | |
| 54 # See https://issues.adblockplus.org/ticket/5518 | |
| 55 _ECOSIA_ID = "ecosia" | |
| 56 | |
| 57 _SEARCH_ENGINE_ORDER_DEFAULT = [ | |
| 58 "duckduckgo", | |
| 59 "yahoo", | |
| 60 "google", | |
| 61 "wikipedia", | |
| 62 "amazondotcom"] | |
| 63 | |
| 64 _SEARCH_ENGINE_ORDER_ECOSIA = [ | |
| 65 "duckduckgo", | |
| 66 "yahoo", | |
| 67 "google", | |
| 68 "ecosia", | |
| 69 "wikipedia", | |
| 70 "amazon"] | |
| 71 | |
| 72 _SEARCH_ENGINE_ORDER = { | |
| 73 "de": _SEARCH_ENGINE_ORDER_ECOSIA, | |
| 74 "en-GB": _SEARCH_ENGINE_ORDER_ECOSIA, | |
| 75 "en-US": _SEARCH_ENGINE_ORDER_ECOSIA, | |
| 76 "fr": _SEARCH_ENGINE_ORDER_ECOSIA, | |
| 77 "nl": _SEARCH_ENGINE_ORDER_ECOSIA, | |
| 78 "zh-CN": ["baidu", | |
| 79 "duckduckgo", | |
| 80 "yahoo", | |
| 81 "google", | |
| 82 "wikipedia", | |
| 83 "amazon" | |
| 84 ] | |
| 85 } | |
| 39 | 86 | 
| 40 _FIREFOX_REPLACE_STR = "Firefox" | 87 _FIREFOX_REPLACE_STR = "Firefox" | 
| 41 _ABB_REPLACEMENT_STR = "Adblock Browser" | 88 _ABB_REPLACEMENT_STR = "Adblock Browser" | 
| 42 | 89 | 
| 43 # Some string values that contain Firefox such as 'Firefox Sync' shouldn't be | 90 # Some string values that contain Firefox such as 'Firefox Sync' shouldn't be | 
| 44 # replaced, so we keep a list of ids that are exceptions | 91 # replaced, so we keep a list of ids that are exceptions | 
| 45 _ENTITY_EXCEPTIONS = [ | 92 _ENTITY_EXCEPTIONS = [ | 
| 46 "overlay_no_synced_devices", | 93 "overlay_no_synced_devices", | 
| 47 "home_remote_tabs_need_to_sign_in", | 94 "home_remote_tabs_need_to_sign_in", | 
| 48 "home_remote_tabs_need_to_finish_migrating", | 95 "home_remote_tabs_need_to_finish_migrating", | 
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 102 return None | 149 return None | 
| 103 | 150 | 
| 104 | 151 | 
| 105 def _write_lines(filename, lines): | 152 def _write_lines(filename, lines): | 
| 106 """Writes lines into file appending \\n""" | 153 """Writes lines into file appending \\n""" | 
| 107 with open(filename, "w") as fd: | 154 with open(filename, "w") as fd: | 
| 108 for l in lines: | 155 for l in lines: | 
| 109 fd.write("%s\n" % l) | 156 fd.write("%s\n" % l) | 
| 110 | 157 | 
| 111 | 158 | 
| 112 def _transform_locale(locale, path, logger): | 159 def _transform_locale(locale, build_dir, locale_path, logger): | 
| 113 logger.info("Processing locale '%s'..." % locale) | 160 logger.info("Processing locale '%s'..." % locale) | 
| 161 search_list_path = os.path.join(build_dir, _LISTJSON_PATH) | |
| 162 _check_path_exists(search_list_path, logger) | |
| 
diegocarloslima
2017/12/13 10:44:41
I think it makes more sense to check for the exist
 | |
| 163 | |
| 164 # Check for region.properties existence | |
| 165 region_file_path = os.path.join(locale_path, _REGION_PROPS_PATH) | |
| 166 _check_path_exists(region_file_path, logger) | |
| 114 | 167 | 
| 115 # Check for appstrings.properties existence | 168 # Check for appstrings.properties existence | 
| 116 appstrings_file_path = os.path.join(path, _APPSTRINGS_PROPS_PATH) | 169 appstrings_file_path = os.path.join(locale_path, _APPSTRINGS_PROPS_PATH) | 
| 117 _check_path_exists(appstrings_file_path, logger) | 170 _check_path_exists(appstrings_file_path, logger) | 
| 118 | 171 | 
| 172 # Get whitelist and build regex | |
| 173 whitelist = _SEARCH_ENGINE_ORDER.get(locale, | |
| 174 _SEARCH_ENGINE_ORDER_DEFAULT) | |
| 175 white_re = re.compile("^(%s).*$" % "|".join(whitelist)) | |
| 176 | |
| 177 all_engine_ids = [] | |
| 178 engine_ids = [] | |
| 179 removed_engine_ids = [] | |
| 180 | |
| 181 jsonFile = open(os.path.join(search_list_path, _LIST_JSON), 'r') | |
| 182 data = json.load(jsonFile) | |
| 183 jsonFile.close() | |
| 
diegocarloslima
2017/12/13 10:44:42
We could use the `with open` syntax here, to avoid
 | |
| 184 | |
| 185 for item in data[_KEY_LOC][locale][_KEY_DEF][_KEY_VDF]: | |
| 186 all_engine_ids.append(item) | |
| 187 if white_re.match(item): | |
| 188 engine_ids.append(item) | |
| 189 else: | |
| 190 removed_engine_ids.append(item) | |
| 191 | |
| 192 # Make sure we still have search engines left | |
| 193 if len(engine_ids) == 0: | |
| 194 logger.fatal("No search engines left over for '%s'" % locale) | |
| 195 | |
| 196 # 'Parse' XML to get matching 'ShortName' for all engine IDs | |
| 197 engine_names = {} | |
| 198 search_plugins_path = os.path.join(build_dir, _SEARCHPLUGINS_PATH) | |
| 199 for eid in engine_ids[:]: | |
| 200 xml_file_path = os.path.join(search_plugins_path, "%s.xml" % eid) | |
| 201 if not os.path.exists(xml_file_path): | |
| 202 logger.info("Missing xml file for plugin %s. Searched in path %s" % | |
| 203 (eid, xml_file_path)) | |
| 204 engine_ids.remove(eid) | |
| 205 continue | |
| 206 short_name = None | |
| 207 with open(xml_file_path, "r") as fd: | |
| 208 for line in fd: | |
| 209 line = line.strip() | |
| 210 match = _SHORTNAME_RE.match(line) | |
| 211 if match: | |
| 212 short_name = match.group(1).strip() | |
| 213 | |
| 214 if not short_name: | |
| 215 logger.fatal("No ShortName defined for '%s' in '%s" % | |
| 216 (eid, locale)) | |
| 217 engine_names[eid] = short_name | |
| 218 | |
| 219 logger.info("Removed search engine IDs: %s" % | |
| 220 ", ".join(removed_engine_ids)) | |
| 221 logger.info("Remaining search engine IDs: %s" % ", ".join(engine_ids)) | |
| 222 | |
| 223 # Create search engine order with real engine names | |
| 224 engine_order = [] | |
| 225 for eid in whitelist: | |
| 226 sn = _get_shortname_from_id(eid, engine_ids, engine_names) | |
| 227 if sn: | |
| 228 engine_order.append(sn) | |
| 229 | |
| 230 logger.info("Resulting search engine ordered list: %s" % | |
| 231 (", ".join(engine_order))) | |
| 232 | |
| 233 # Read region.properties and remove browser.search.* lines | |
| 234 props = [] | |
| 235 with open(region_file_path, "r") as fd: | |
| 236 for line in fd: | |
| 237 line = line.rstrip("\r\n") | |
| 238 if not _SEARCH_PROPS_RE.match(line.strip()): | |
| 239 props.append(line) | |
| 240 | |
| 241 # Append default search engine name | |
| 242 props.append("browser.search.defaultenginename=%s" % engine_order[0]) | |
| 243 | |
| 244 # Append search engine order | |
| 245 for i in range(0, min(5, len(engine_order))): | |
| 
diegocarloslima
2017/12/13 10:44:42
The condition here should be `for i in range(0, le
 | |
| 246 props.append("browser.search.order.%d=%s" % (i + 1, engine_order[i])) | |
| 247 | |
| 248 # Write back region.properties | |
| 249 _write_lines(region_file_path, props) | |
| 250 | |
| 119 # Replaces ocurrences of 'Firefox' by 'Adblock Browser' in | 251 # Replaces ocurrences of 'Firefox' by 'Adblock Browser' in | 
| 120 # 'appstrings.properties' | 252 # 'appstrings.properties' | 
| 121 lines = [] | 253 lines = [] | 
| 122 replacement_count = 0 | 254 replacement_count = 0 | 
| 123 | 255 | 
| 124 with open(appstrings_file_path, "r") as fd: | 256 with open(appstrings_file_path, "r") as fd: | 
| 125 for line in fd: | 257 for line in fd: | 
| 126 line = line.rstrip("\r\n") | 258 line = line.rstrip("\r\n") | 
| 127 replacement = _replace_in_value(_PROPERTY_FORMAT_RE, line, | 259 replacement = _replace_in_value(_PROPERTY_FORMAT_RE, line, | 
| 128 _FIREFOX_REPLACE_STR, | 260 _FIREFOX_REPLACE_STR, | 
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 213 locales = _get_locales_from_path(chrome_path, _LOCALE_RE) | 345 locales = _get_locales_from_path(chrome_path, _LOCALE_RE) | 
| 214 values_locales = _get_locales_from_path(res_path, _VALUES_LOCALE_RE) | 346 values_locales = _get_locales_from_path(res_path, _VALUES_LOCALE_RE) | 
| 215 | 347 | 
| 216 locales_found_msg = "Found %d locales in %s" | 348 locales_found_msg = "Found %d locales in %s" | 
| 217 logger.info(locales_found_msg % (len(locales), chrome_path)) | 349 logger.info(locales_found_msg % (len(locales), chrome_path)) | 
| 218 logger.info(locales_found_msg % (len(values_locales), res_path)) | 350 logger.info(locales_found_msg % (len(values_locales), res_path)) | 
| 219 | 351 | 
| 220 for locale in locales: | 352 for locale in locales: | 
| 221 locale_path = os.path.join(chrome_path, locale, "locale", locale) | 353 locale_path = os.path.join(chrome_path, locale, "locale", locale) | 
| 222 if os.path.exists(locale_path): | 354 if os.path.exists(locale_path): | 
| 223 _transform_locale(locale, locale_path, logger) | 355 _transform_locale(locale, build_dir, locale_path, logger) | 
| 224 _generate_browser_search(locale, locale_path, res_path, build_dir) | 356 _generate_browser_search(locale, locale_path, res_path, build_dir) | 
| 225 else: | 357 else: | 
| 226 logger.error("Missing folder for locale '%s' in path: %s" % | 358 logger.error("Missing folder for locale '%s' in path: %s" % | 
| 227 (locale, locale_path)) | 359 (locale, locale_path)) | 
| 228 | 360 | 
| 229 for locale in values_locales: | 361 for locale in values_locales: | 
| 230 locale_path = os.path.join(res_path, "values-" + locale) | 362 locale_path = os.path.join(res_path, "values-" + locale) | 
| 231 _transform_values_locale(locale, locale_path, logger) | 363 _transform_values_locale(locale, locale_path, logger) | 
| 364 | |
| 365 | |
| 366 def transform_search_engines_list(abs_mozilla_dir, obj_dir, | |
| 
diegocarloslima
2017/12/13 10:44:42
I see that you're using here the `abs_mozilla_dir`
 | |
| 367 logger=MinimalLogger()): | |
| 368 # open the Mozilla list of search engines, put it into a buffer and | |
| 369 # close the JSON file after reading | |
| 370 search_list_path = os.path.join(abs_mozilla_dir, _LISTJSON_PATH) | |
| 371 jsonFile = open(os.path.join(search_list_path, _LIST_JSON), 'r') | |
| 372 data = json.load(jsonFile) | |
| 373 jsonFile.close() | |
| 
diegocarloslima
2017/12/13 10:44:41
We could use the `with open` syntax here, to avoid
 | |
| 374 | |
| 375 chrome_path = os.path.join(abs_mozilla_dir, _I10N_PATH) | |
| 
diegocarloslima
2017/12/13 10:44:42
Didn't really get why are you using a different ch
 | |
| 376 _check_path_exists(chrome_path, logger) | |
| 377 # get all locales we want to transform | |
| 378 locales = _get_locales_from_path(chrome_path, _LOCALE_RE) | |
| 379 | |
| 380 # set default search engine order | |
| 381 data[_KEY_DEF][_KEY_VDF] = _SEARCH_ENGINE_ORDER_DEFAULT | |
| 382 for loc in locales: | |
| 
diegocarloslima
2017/12/13 10:44:42
If we moved all the logic that is currently being
 
diegocarloslima
2017/12/13 10:55:29
Also, I think that this should be an internal func
 | |
| 383 whitelist = _SEARCH_ENGINE_ORDER.get(loc, | |
| 384 _SEARCH_ENGINE_ORDER_DEFAULT) | |
| 385 white_re = re.compile("^(%s).*$" % "|".join(whitelist)) | |
| 386 | |
| 387 # Read engine IDs from list.json, discard engines not on whitelist | |
| 388 all_engine_ids = [] | |
| 389 engine_ids = [] | |
| 390 removed_engine_ids = [] | |
| 391 | |
| 392 # Mozilla default list does not contain locale bn-BD, so we create it | |
| 393 # and use the values from locale bn-IN | |
| 394 if loc == "bn-BD": | |
| 395 data[_KEY_LOC].update({loc: {_KEY_DEF: {_KEY_VDF: data[_KEY_LOC] | |
| 396 ["bn-IN"][_KEY_DEF][_KEY_VDF]}}}) | |
| 397 # Mozilla default list does not contain locale wo, so we use the | |
| 398 # default order. In case they will not support any other locales in | |
| 399 # the future, we want the build to fail, to decide which order to use | |
| 400 elif loc == "wo": | |
| 401 data[_KEY_LOC].update({loc: {_KEY_DEF: | |
| 402 {_KEY_VDF: _SEARCH_ENGINE_ORDER_DEFAULT}}}) | |
| 403 | |
| 404 for item in data[_KEY_LOC][loc][_KEY_DEF][_KEY_VDF]: | |
| 405 all_engine_ids.append(item) | |
| 406 if len(item) > 0: | |
| 407 if white_re.match(item): | |
| 408 engine_ids.append(item) | |
| 409 else: | |
| 410 removed_engine_ids.append(item) | |
| 411 | |
| 412 if _ECOSIA_ID in whitelist and _ECOSIA_ID not in all_engine_ids: | |
| 413 all_engine_ids.append(_ECOSIA_ID) | |
| 414 engine_ids.append(_ECOSIA_ID) | |
| 415 | |
| 416 # Make sure we still have search engines left | |
| 417 if len(engine_ids) == 0: | |
| 418 logger.fatal("No search engines left over for '%s'" % loc) | |
| 419 | |
| 420 data[_KEY_LOC][loc][_KEY_DEF][_KEY_VDF] = all_engine_ids | |
| 421 | |
| 422 # Save our changes to list.json | |
| 423 with open(os.path.join(search_list_path, _LIST_JSON), 'w') as outfile: | |
| 424 json.dump(data, outfile, indent=4, sort_keys=True) | |
| OLD | NEW |