OLD | NEW |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-present eyeo GmbH | 2 # Copyright (C) 2006-present eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
44 | 44 |
45 def __init__(self, error, text): | 45 def __init__(self, error, text): |
46 Exception.__init__(self, '{} in "{}"'.format(error, text)) | 46 Exception.__init__(self, '{} in "{}"'.format(error, text)) |
47 self.text = text | 47 self.text = text |
48 self.error = error | 48 self.error = error |
49 | 49 |
50 | 50 |
51 # Constants related to filters (see https://adblockplus.org/filters). | 51 # Constants related to filters (see https://adblockplus.org/filters). |
52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). | 52 class SELECTOR_TYPE: # flake8: noqa (this is a namespace of constants). |
53 """Selector type constants.""" | 53 """Selector type constants.""" |
| 54 |
54 URL_PATTERN = 'url-pattern' # Normal URL patterns. | 55 URL_PATTERN = 'url-pattern' # Normal URL patterns. |
55 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. | 56 URL_REGEXP = 'url-regexp' # Regular expressions for URLs. |
56 CSS = 'css' # CSS selectors for hiding filters. | 57 CSS = 'css' # CSS selectors for hiding filters. |
57 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). | 58 XCSS = 'extended-css' # Extended CSS selectors (to emulate CSS4). |
58 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. | 59 ABP_SIMPLE = 'abp-simple' # Simplified element hiding syntax. |
59 | 60 |
60 | 61 |
61 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). | 62 class FILTER_ACTION: # flake8: noqa (this is a namespace of constants). |
62 """Filter action constants.""" | 63 """Filter action constants.""" |
| 64 |
63 BLOCK = 'block' # Block the request. | 65 BLOCK = 'block' # Block the request. |
64 ALLOW = 'allow' # Allow the request (whitelist). | 66 ALLOW = 'allow' # Allow the request (whitelist). |
65 HIDE = 'hide' # Hide selected element(s). | 67 HIDE = 'hide' # Hide selected element(s). |
66 SHOW = 'show' # Show selected element(s) (whitelist). | 68 SHOW = 'show' # Show selected element(s) (whitelist). |
67 | 69 |
68 | 70 |
69 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). | 71 class FILTER_OPTION: # flake8: noqa (this is a namespace of constants). |
70 """Filter option constants.""" | 72 """Filter option constants.""" |
| 73 |
71 # Resource types. | 74 # Resource types. |
72 OTHER = 'other' | 75 OTHER = 'other' |
73 SCRIPT = 'script' | 76 SCRIPT = 'script' |
74 IMAGE = 'image' | 77 IMAGE = 'image' |
75 STYLESHEET = 'stylesheet' | 78 STYLESHEET = 'stylesheet' |
76 OBJECT = 'object' | 79 OBJECT = 'object' |
77 SUBDOCUMENT = 'subdocument' | 80 SUBDOCUMENT = 'subdocument' |
78 DOCUMENT = 'document' | 81 DOCUMENT = 'document' |
79 WEBSOCKET = 'websocket' | 82 WEBSOCKET = 'websocket' |
80 WEBRTC = 'webrtc' | 83 WEBRTC = 'webrtc' |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
138 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') | 141 Metadata = _line_type('Metadata', 'key value', '! {0.key}: {0.value}') |
139 Filter = _line_type('Filter', 'text selector action options', '{.text}') | 142 Filter = _line_type('Filter', 'text selector action options', '{.text}') |
140 Include = _line_type('Include', 'target', '%include {0.target}%') | 143 Include = _line_type('Include', 'target', '%include {0.target}%') |
141 | 144 |
142 | 145 |
143 METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)') | 146 METADATA_REGEXP = re.compile(r'\s*!\s*(.*?)\s*:\s*(.*)') |
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') | 147 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') |
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) | 148 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) |
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') | 149 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') |
147 FILTER_OPTIONS_REGEXP = re.compile( | 150 FILTER_OPTIONS_REGEXP = re.compile( |
148 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$' | 151 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$', |
149 ) | 152 ) |
150 | 153 |
151 | 154 |
152 def _parse_instruction(text): | 155 def _parse_instruction(text): |
153 match = INCLUDE_REGEXP.match(text) | 156 match = INCLUDE_REGEXP.match(text) |
154 if not match: | 157 if not match: |
155 raise ParseError('Unrecognized instruction', text) | 158 raise ParseError('Unrecognized instruction', text) |
156 return Include(match.group(1)) | 159 return Include(match.group(1)) |
157 | 160 |
158 | 161 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
190 if selector.startswith('@@'): | 193 if selector.startswith('@@'): |
191 action = FILTER_ACTION.ALLOW | 194 action = FILTER_ACTION.ALLOW |
192 selector = selector[2:] | 195 selector = selector[2:] |
193 | 196 |
194 if '$' in selector: | 197 if '$' in selector: |
195 opt_match = FILTER_OPTIONS_REGEXP.search(selector) | 198 opt_match = FILTER_OPTIONS_REGEXP.search(selector) |
196 if opt_match: | 199 if opt_match: |
197 selector = selector[:opt_match.start(0)] | 200 selector = selector[:opt_match.start(0)] |
198 options = _parse_filter_options(opt_match.group(1)) | 201 options = _parse_filter_options(opt_match.group(1)) |
199 | 202 |
200 if (len(selector) > 1 and | 203 if (len(selector) > 1 |
201 selector.startswith('/') and selector.endswith('/')): | 204 and selector.startswith('/') and selector.endswith('/')): |
202 selector = {'type': SELECTOR_TYPE.URL_REGEXP, 'value': selector[1:-1]} | 205 selector = {'type': SELECTOR_TYPE.URL_REGEXP, 'value': selector[1:-1]} |
203 else: | 206 else: |
204 selector = {'type': SELECTOR_TYPE.URL_PATTERN, 'value': selector} | 207 selector = {'type': SELECTOR_TYPE.URL_PATTERN, 'value': selector} |
205 | 208 |
206 return Filter(text, selector, action, options) | 209 return Filter(text, selector, action, options) |
207 | 210 |
208 | 211 |
209 def _parse_hiding_filter(text, domain, type_flag, selector_value): | 212 def _parse_hiding_filter(text, domain, type_flag, selector_value): |
210 selector = {'type': SELECTOR_TYPE.CSS, 'value': selector_value} | 213 selector = {'type': SELECTOR_TYPE.CSS, 'value': selector_value} |
211 action = FILTER_ACTION.HIDE | 214 action = FILTER_ACTION.HIDE |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
271 ------- | 274 ------- |
272 namedtuple | 275 namedtuple |
273 Parsed line (see `_line_type`). | 276 Parsed line (see `_line_type`). |
274 | 277 |
275 Raises | 278 Raises |
276 ------ | 279 ------ |
277 ParseError | 280 ParseError |
278 ParseError: If the line can't be parsed. | 281 ParseError: If the line can't be parsed. |
279 | 282 |
280 """ | 283 """ |
281 POSITIONS = {'body', 'start', 'metadata'} | 284 positions = {'body', 'start', 'metadata'} |
282 if position not in POSITIONS: | 285 if position not in positions: |
283 raise ValueError('position should be one of {}'.format(POSITIONS)) | 286 raise ValueError('position should be one of {}'.format(positions)) |
284 | 287 |
285 if isinstance(line, type(b'')): | 288 if isinstance(line, type(b'')): |
286 line = line.decode('utf-8') | 289 line = line.decode('utf-8') |
287 | 290 |
288 stripped = line.strip() | 291 stripped = line.strip() |
289 | 292 |
290 if stripped == '': | 293 if stripped == '': |
291 return EmptyLine() | 294 return EmptyLine() |
292 | 295 |
293 if position == 'start': | 296 if position == 'start': |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
335 for line in lines: | 338 for line in lines: |
336 parsed_line = parse_line(line, position) | 339 parsed_line = parse_line(line, position) |
337 yield parsed_line | 340 yield parsed_line |
338 | 341 |
339 if position != 'body' and parsed_line.type in {'header', 'metadata'}: | 342 if position != 'body' and parsed_line.type in {'header', 'metadata'}: |
340 # Continue parsing metadata until it's over... | 343 # Continue parsing metadata until it's over... |
341 position = 'metadata' | 344 position = 'metadata' |
342 else: | 345 else: |
343 # ...then switch to parsing the body. | 346 # ...then switch to parsing the body. |
344 position = 'body' | 347 position = 'body' |
OLD | NEW |