sitescripts/extensions/pad/validation.py - Issue 5723465818570752: Issue 520 - Generate PAD files for download portals when updating download links

Delta Between Two Patch Sets: sitescripts/extensions/pad/validation.py

Issue 5723465818570752: Issue 520 - Generate PAD files for download portals when updating download links (Closed)

Left Patch Set: Upgraded to new PAD 4.0 spec and addressed comments Created May 27, 2014, noon

Right Patch Set: Addressed comments Created June 4, 2014, 4:35 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
	1 # This file is part of the Adblock Plus web scripts,

	2 # Copyright (C) 2006-2014 Eyeo GmbH

	3 #

	4 # Adblock Plus is free software: you can redistribute it and/or modify

	5 # it under the terms of the GNU General Public License version 3 as

	6 # published by the Free Software Foundation.

	7 #

	8 # Adblock Plus is distributed in the hope that it will be useful,

	9 # but WITHOUT ANY WARRANTY; without even the implied warranty of

	10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

	11 # GNU General Public License for more details.

	12 #

	13 # You should have received a copy of the GNU General Public License

	14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.

	15

1 import itertools	16 import itertools

2 import warnings	17 import warnings

3 import re	18 import re

4 import urllib2	19 import urllib2

5 from xml.dom import minidom	20 from xml.dom import minidom

6	21

7 FIELDS = [	22 FIELDS = [

8 (['XML_DIZ_INFO', 'MASTER_PAD_VERSION_INFO', 'MASTER_PAD_VERSION'], r'^4\.0\Z' ),	23 (['XML_DIZ_INFO', 'MASTER_PAD_VERSION_INFO', 'MASTER_PAD_VERSION'], r'^4\.0\Z' ),

9 (['XML_DIZ_INFO', 'MASTER_PAD_VERSION_INFO', 'MASTER_PAD_EDITOR'], r'^[^<\x09] {0,100}\Z'),	24 (['XML_DIZ_INFO', 'MASTER_PAD_VERSION_INFO', 'MASTER_PAD_EDITOR'], r'^[^<\x09] {0,100}\Z'),

10 (['XML_DIZ_INFO', 'MASTER_PAD_VERSION_INFO', 'MASTER_PAD_INFO'], r'^[^<\x09]{0 ,1000}\Z'),	25 (['XML_DIZ_INFO', 'MASTER_PAD_VERSION_INFO', 'MASTER_PAD_INFO'], r'^[^<\x09]{0 ,1000}\Z'),

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
115 (['XML_DIZ_INFO', 'ASP', 'ASP_Member_Number'], None),	130 (['XML_DIZ_INFO', 'ASP', 'ASP_Member_Number'], None),

116 ]	131 ]

117	132

118 def validate_fields(fields, nodes, filename):	133 def validate_fields(fields, nodes, filename):

119 expected_nodes = set()	134 expected_nodes = set()

120	135

121 for node_name, fields in itertools.groupby(fields, lambda (path, regex): path[ 0]):	136 for node_name, fields in itertools.groupby(fields, lambda (path, regex): path[ 0]):

122 expected_nodes.add(node_name)	137 expected_nodes.add(node_name)

123	138

124 regex = None	139 regex = None

125 leaf = False

126 nested_fields = []	140 nested_fields = []

127 for path, regex_ in fields:	141 for path, regex_ in fields:

128 if path == [node_name]:	142 if path == [node_name]:

129 regex = regex_	143 regex = regex_

130 leaf = True

131 else:	144 else:

132 nested_fields.append((path[1:], regex_))	145 nested_fields.append((path[1:], regex_))

133	146

134 found = False	147 found = False

135 for node in nodes:	148 for node in nodes:

136 if node.nodeName == node_name:	149 if node.nodeName == node_name:

137 if found:	150 if found:

138 warnings.warn('invalid PAD file (duplicate node)\n'	151 warnings.warn('invalid PAD file (duplicate node)\n'

139 'filename: %s\n'	152 'filename: %s\n'

140 'node: %s' % (filename, node_name))	153 'node: %s' % (filename, node_name))

141	154

142 if regex:	155 if regex:

143 value = ''.join(child.toxml() for child in node.childNodes)	156 value = ''.join(child.toxml() for child in node.childNodes)

144 if not re.match(regex, value):	157 if not re.match(regex, value):

145 warnings.warn('invalid PAD file (invalid value)\n'	158 warnings.warn('invalid PAD file (invalid value)\n'

146 'filename: %s\n'	159 'filename: %s\n'

147 'node: %s\n'	160 'node: %s\n'

148 'value: %s\n'	161 'value: %s\n'

149 'regex: %s' % (filename, node_name, value, regex))	162 'regex: %s' % (filename, node_name, value, regex))

150	163

151 if nested_fields:	164 if nested_fields:

152 » validate_fields(nested_fields, node.childNodes, filename)	165 validate_fields(nested_fields, node.childNodes, filename)

153	166

154 found = True	167 found = True

155	168

156 if not found:	169 if not found:

157 if regex and not re.match(regex, ''):	170 if regex and not re.match(regex, ''):

158 warnings.warn('invalid PAD file (missing node)\n'	171 warnings.warn('invalid PAD file (missing node)\n'

159 'filename: %s\n'	172 'filename: %s\n'

160 'node: %s' % (filename, node_name))	173 'node: %s' % (filename, node_name))

161	174

162 validate_fields(nested_fields, [], filename)	175 validate_fields(nested_fields, [], filename)

(...skipping 19 matching lines...) Expand all Loading...
182 for field in doc.getElementsByTagName('Field'):	195 for field in doc.getElementsByTagName('Field'):

183 path, regex = [	196 path, regex = [

184 ''.join(node.nodeValue for node in field.getElementsByTagName(name)[0].chi ldNodes)	197 ''.join(node.nodeValue for node in field.getElementsByTagName(name)[0].chi ldNodes)

185 for name in ('Path', 'RegEx')	198 for name in ('Path', 'RegEx')

186 ]	199 ]

187 print ' (%r, %s),' % (str(path).split('/'), "r'%s'" % regex if regex else ' None')	200 print ' (%r, %s),' % (str(path).split('/'), "r'%s'" % regex if regex else ' None')

188 print ']'	201 print ']'

189	202

190 if __name__ == '__main__':	203 if __name__ == '__main__':

191 print_fields()	204 print_fields()

LEFT	RIGHT