| OLD | NEW |
| 1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
| 2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
| 3 # | 3 # |
| 4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
| 5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
| 6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
| 7 # | 7 # |
| 8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
| 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 44 | 44 |
| 45 def __init__(self, error, text): | 45 def __init__(self, error, text): |
| 46 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 46 Exception.__init__(self, '{} in "{}"'.format(error, text)) |
| 47 self.text = text | 47 self.text = text |
| 48 self.error = error | 48 self.error = error |
| 49 | 49 |
| 50 | 50 |
| 51 # Constants related to filters (see https://adblockplus.org/filters). | 51 # Constants related to filters (see https://adblockplus.org/filters). |
| 52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). | 52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). |
| 53 """Selector type constants.""" | 53 """Selector type constants.""" |
| 54 |
| 54 URL_PATTERN = 'url-pattern' # Normal URL patterns. | 55 URL_PATTERN = 'url-pattern' # Normal URL patterns. |
| 55 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. | 56 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. |
| 56 CSS = 'css' # CSS selectors for hiding filters. | 57 CSS = 'css' # CSS selectors for hiding filters. |
| 57 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). | 58 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). |
| 58 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. | 59 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. |
| 59 | 60 |
| 60 | 61 |
| 61 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). | 62 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). |
| 62 """Filter action constants.""" | 63 """Filter action constants.""" |
| 64 |
| 63 BLOCK = 'block' # Block the request. | 65 BLOCK = 'block' # Block the request. |
| 64 ALLOW = 'allow' # Allow the request (whitelist). | 66 ALLOW = 'allow' # Allow the request (whitelist). |
| 65 HIDE = 'hide' # Hide selected element(s). | 67 HIDE = 'hide' # Hide selected element(s). |
| 66 SHOW = 'show' # Show selected element(s) (whitelist). | 68 SHOW = 'show' # Show selected element(s) (whitelist). |
| 67 | 69 |
| 68 | 70 |
| 69 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). | 71 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). |
| 70 """Filter option constants.""" | 72 """Filter option constants.""" |
| 73 |
| 71 # Resource types. | 74 # Resource types. |
| 72 OTHER = 'other' | 75 OTHER = 'other' |
| 73 SCRIPT = 'script' | 76 SCRIPT = 'script' |
| 74 IMAGE = 'image' | 77 IMAGE = 'image' |
| 75 STYLESHEET = 'stylesheet' | 78 STYLESHEET = 'stylesheet' |
| 76 OBJECT = 'object' | 79 OBJECT = 'object' |
| 77 SUBDOCUMENT = 'subdocument' | 80 SUBDOCUMENT = 'subdocument' |
| 78 DOCUMENT = 'document' | 81 DOCUMENT = 'document' |
| 79 WEBSOCKET = 'websocket' | 82 WEBSOCKET = 'websocket' |
| 80 WEBRTC = 'webrtc' | 83 WEBRTC = 'webrtc' |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 141 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') |
| 139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 142 Filter = _line_type('Filter', 'text selector action options', '{.text}') |
| 140 Include = _line_type('Include', 'target', '%include {0.target}%') | 143 Include = _line_type('Include', 'target', '%include {0.target}%') |
| 141 | 144 |
| 142 | 145 |
| 143 METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)') | 146 METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)') |
| 144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 147 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
| 145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 148 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
| 146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 149 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
| 147 FILTER_OPTIONS_REGEXP = re.compile( | 150 FILTER_OPTIONS_REGEXP = re.compile( |
| 148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 151 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$', |
| 149 ) | 152 ) |
| 150 | 153 |
| 151 | 154 |
| 152 def _parse_instruction(text): | 155 def _parse_instruction(text): |
| 153 match = INCLUDE_REGEXP.match(text) | 156 match = INCLUDE_REGEXP.match(text) |
| 154 if not match: | 157 if not match: |
| 155 raise ParseError('Unrecognized instruction', text) | 158 raise ParseError('Unrecognized instruction', text) |
| 156 return Include(match.group(1)) | 159 return Include(match.group(1)) |
| 157 | 160 |
| 158 | 161 |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 190 if selector.startswith('@@'): | 193 if selector.startswith('@@'): |
| 191 action = FILTER_ACTION.ALLOW | 194 action = FILTER_ACTION.ALLOW |
| 192 selector = selector[2:] | 195 selector = selector[2:] |
| 193 | 196 |
| 194 if '$' in selector: | 197 if '$' in selector: |
| 195 opt_match = FILTER_OPTIONS_REGEXP.search(selector) | 198 opt_match = FILTER_OPTIONS_REGEXP.search(selector) |
| 196 if opt_match: | 199 if opt_match: |
| 197 selector = selector[:opt_match.start(0)] | 200 selector = selector[:opt_match.start(0)] |
| 198 options = _parse_filter_options(opt_match.group(1)) | 201 options = _parse_filter_options(opt_match.group(1)) |
| 199 | 202 |
| 200 if (len(selector) > 1 and | 203 if (len(selector) > 1 |
| 201 selector.startswith('/') and selector.endswith('/')): | 204 and selector.startswith('/') and selector.endswith('/')): |
| 202 selector = {'type': SELECTOR_TYPE.URL_REGEXP, 'value': selector[1:-1]} | 205 selector = {'type': SELECTOR_TYPE.URL_REGEXP, 'value': selector[1:-1]} |
| 203 else: | 206 else: |
| 204 selector = {'type': SELECTOR_TYPE.URL_PATTERN, 'value': selector} | 207 selector = {'type': SELECTOR_TYPE.URL_PATTERN, 'value': selector} |
| 205 | 208 |
| 206 return Filter(text, selector, action, options) | 209 return Filter(text, selector, action, options) |
| 207 | 210 |
| 208 | 211 |
| 209 def _parse_hiding_filter(text, domain, type_flag, selector_value): | 212 def _parse_hiding_filter(text, domain, type_flag, selector_value): |
| 210 selector = {'type': SELECTOR_TYPE.CSS, 'value': selector_value} | 213 selector = {'type': SELECTOR_TYPE.CSS, 'value': selector_value} |
| 211 action = FILTER_ACTION.HIDE | 214 action = FILTER_ACTION.HIDE |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 271 ------- | 274 ------- |
| 272 namedtuple | 275 namedtuple |
| 273 Parsed line (see `_line_type`). | 276 Parsed line (see `_line_type`). |
| 274 | 277 |
| 275 Raises | 278 Raises |
| 276 ------ | 279 ------ |
| 277 ParseError | 280 ParseError |
| 278 ParseError: If the line can't be parsed. | 281 ParseError: If the line can't be parsed. |
| 279 | 282 |
| 280 """ | 283 """ |
| 281 POSITIONS = {'body', 'start', 'metadata'} | 284 positions = {'body', 'start', 'metadata'} |
| 282 if position not in POSITIONS: | 285 if position not in positions: |
| 283 raise ValueError('position should be one of {}'.format(POSITIONS)) | 286 raise ValueError('position should be one of {}'.format(positions)) |
| 284 | 287 |
| 285 if isinstance(line, type(b'')): | 288 if isinstance(line, type(b'')): |
| 286 line = line.decode('utf-8') | 289 line = line.decode('utf-8') |
| 287 | 290 |
| 288 stripped = line.strip() | 291 stripped = line.strip() |
| 289 | 292 |
| 290 if stripped == '': | 293 if stripped == '': |
| 291 return EmptyLine() | 294 return EmptyLine() |
| 292 | 295 |
| 293 if position == 'start': | 296 if position == 'start': |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 335 for line in lines: | 338 for line in lines: |
| 336 parsed_line = parse_line(line, position) | 339 parsed_line = parse_line(line, position) |
| 337 yield parsed_line | 340 yield parsed_line |
| 338 | 341 |
| 339 if position != 'body' and parsed_line.type in {'header', 'metadata'}: | 342 if position != 'body' and parsed_line.type in {'header', 'metadata'}: |
| 340 # Continue parsing metadata until it's over... | 343 # Continue parsing metadata until it's over... |
| 341 position = 'metadata' | 344 position = 'metadata' |
| 342 else: | 345 else: |
| 343 # ...then switch to parsing the body. | 346 # ...then switch to parsing the body. |
| 344 position = 'body' | 347 position = 'body' |
| OLD | NEW |