Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: abp/filters/parser.py

Issue 29793573: Issue 6701 - Implement CSP support in python-abp (Closed) Base URL: https://hg.adblockplus.org/python-abp/
Patch Set: Created May 29, 2018, 4:54 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # This file is part of Adblock Plus <https://adblockplus.org/>, 1 # This file is part of Adblock Plus <https://adblockplus.org/>,
2 # Copyright (C) 2006-present eyeo GmbH 2 # Copyright (C) 2006-present eyeo GmbH
3 # 3 #
4 # Adblock Plus is free software: you can redistribute it and/or modify 4 # Adblock Plus is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License version 3 as 5 # it under the terms of the GNU General Public License version 3 as
6 # published by the Free Software Foundation. 6 # published by the Free Software Foundation.
7 # 7 #
8 # Adblock Plus is distributed in the hope that it will be useful, 8 # Adblock Plus is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 XBL = 'xbl' 93 XBL = 'xbl'
94 DTD = 'dtd' 94 DTD = 'dtd'
95 95
96 # Other options. 96 # Other options.
97 MATCH_CASE = 'match-case' 97 MATCH_CASE = 'match-case'
98 DOMAIN = 'domain' 98 DOMAIN = 'domain'
99 THIRD_PARTY = 'third-party' 99 THIRD_PARTY = 'third-party'
100 COLLAPSE = 'collapse' 100 COLLAPSE = 'collapse'
101 SITEKEY = 'sitekey' 101 SITEKEY = 'sitekey'
102 DONOTTRACK = 'donottrack' 102 DONOTTRACK = 'donottrack'
103 CSP = 'csp'
103 104
104 105
105 def _line_type(name, field_names, format_string): 106 def _line_type(name, field_names, format_string):
106 """Define a line type. 107 """Define a line type.
107 108
108 Parameters 109 Parameters
109 ---------- 110 ----------
110 name: str 111 name: str
111 The name of the line type to define. 112 The name of the line type to define.
112 field_names: str or list 113 field_names: str or list
(...skipping 25 matching lines...) Expand all
138 Include = _line_type('Include', 'target', '%include {0.target}%') 139 Include = _line_type('Include', 'target', '%include {0.target}%')
139 140
140 141
141 METADATA_REGEXP = re.compile(r'!\s*(\w+)\s*:\s*(.*)') 142 METADATA_REGEXP = re.compile(r'!\s*(\w+)\s*:\s*(.*)')
142 METADATA_KEYS = {'Homepage', 'Title', 'Expires', 'Checksum', 'Redirect', 143 METADATA_KEYS = {'Homepage', 'Title', 'Expires', 'Checksum', 'Redirect',
143 'Version'} 144 'Version'}
144 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%') 145 INCLUDE_REGEXP = re.compile(r'%include\s+(.+)%')
145 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I) 146 HEADER_REGEXP = re.compile(r'\[(Adblock(?:\s*Plus\s*[\d\.]+?)?)\]', flags=re.I)
146 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$') 147 HIDING_FILTER_REGEXP = re.compile(r'^([^/*|@"!]*?)#([@?])?#(.+)$')
147 FILTER_OPTIONS_REGEXP = re.compile( 148 FILTER_OPTIONS_REGEXP = re.compile(
148 r'\$(~?[\w-]+(?:=[^,\s]+)?(?:,~?[\w-]+(?:=[^,\s]+)?)*)$' 149 r'\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$'
150 )
151
152 # Regular expression that matches an invalid Content Security Policy
rhowell 2018/06/07 18:16:55 Removing this, since we aren't currently checking
153 INVALID_CSP_REGEXP = re.compile(
154 r'(;|^) ?(base-uri|referrer|report-to|report-uri|upgrade-insecure-requests)\ b'
149 ) 155 )
150 156
151 157
152 def _parse_comment(text): 158 def _parse_comment(text):
153 match = METADATA_REGEXP.match(text) 159 match = METADATA_REGEXP.match(text)
154 if match and match.group(1) in METADATA_KEYS: 160 if match and match.group(1) in METADATA_KEYS:
155 return Metadata(match.group(1), match.group(2)) 161 return Metadata(match.group(1), match.group(2))
156 return Comment(text[1:].strip()) 162 return Comment(text[1:].strip())
157 163
158 164
159 def _parse_header(text): 165 def _parse_header(text):
160 match = HEADER_REGEXP.match(text) 166 match = HEADER_REGEXP.match(text)
161 if not match: 167 if not match:
162 raise ParseError('Malformed header', text) 168 raise ParseError('Malformed header', text)
163 return Header(match.group(1)) 169 return Header(match.group(1))
164 170
165 171
166 def _parse_instruction(text): 172 def _parse_instruction(text):
167 match = INCLUDE_REGEXP.match(text) 173 match = INCLUDE_REGEXP.match(text)
168 if not match: 174 if not match:
169 raise ParseError('Unrecognized instruction', text) 175 raise ParseError('Unrecognized instruction', text)
170 return Include(match.group(1)) 176 return Include(match.group(1))
171 177
172 178
173 def _parse_option(option): 179 def _parse_option(option):
174 if '=' in option: 180 if '=' in option:
175 return option.split('=', 1) 181 return option.split('=', 1)
176 if option.startswith('~'): 182 if option.startswith('~'):
Vasily Kuznetsov 2018/05/30 19:49:26 If we want to be able to handle the form "~csp=xxx
rhowell 2018/06/07 18:16:55 I talked to Dave, and he said that ~csp=xxxx and ~
Vasily Kuznetsov 2018/06/07 18:28:31 Let's switch the ifs -- this way both ~csp=xxx and
rhowell 2018/06/07 23:13:06 Done.
177 return option[1:], False 183 return option[1:], False
178 return option, True 184 return option, True
179 185
180 186
181 def _parse_filter_option(option): 187 def _parse_filter_option(option):
182 name, value = _parse_option(option) 188 name, value = _parse_option(option)
183 189
184 # Handle special cases of multivalued options. 190 # Handle special cases of multivalued options.
185 if name == FILTER_OPTION.DOMAIN: 191 if name == FILTER_OPTION.DOMAIN:
186 value = [_parse_option(o) for o in value.split('|')] 192 value = [_parse_option(o) for o in value.split('|')]
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 Raises 318 Raises
313 ------ 319 ------
314 ParseError 320 ParseError
315 Thrown during iteration for invalid filter list lines. 321 Thrown during iteration for invalid filter list lines.
316 TypeError 322 TypeError
317 If `lines` is not iterable. 323 If `lines` is not iterable.
318 324
319 """ 325 """
320 for line in lines: 326 for line in lines:
321 yield parse_line(line) 327 yield parse_line(line)
OLDNEW
« no previous file with comments | « .hgignore ('k') | tests/test_parser.py » ('j') | tests/test_parser.py » ('J')

Powered by Google App Engine
This is Rietveld