Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: lib/filterClasses.js

Issue 29680689: [$csp2 adblockpluscore] Issue 6329 - Add the CSP filter type (Closed)
Left Patch Set: Fully normalise whitespace, avoid strict equality Created March 12, 2018, 1:35 p.m.
Right Patch Set: Addressed Sebastian's feedback Created March 21, 2018, 4:51 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | test/filterClasses.js » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 /** 91 /**
92 * Regular expression that RegExp filters specified as RegExps should match 92 * Regular expression that RegExp filters specified as RegExps should match
93 * @type {RegExp} 93 * @type {RegExp}
94 */ 94 */
95 Filter.regexpRegExp = /^(@@)?\/.*\/(?:\$~?[\w-]+(?:=[^,\s]+)?(?:,~?[\w-]+(?:=[^, \s]+)?)*)?$/; 95 Filter.regexpRegExp = /^(@@)?\/.*\/(?:\$~?[\w-]+(?:=[^,\s]+)?(?:,~?[\w-]+(?:=[^, \s]+)?)*)?$/;
96 /** 96 /**
97 * Regular expression that options on a RegExp filter should match 97 * Regular expression that options on a RegExp filter should match
98 * @type {RegExp} 98 * @type {RegExp}
99 */ 99 */
100 Filter.optionsRegExp = /\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$/; 100 Filter.optionsRegExp = /\$(~?[\w-]+(?:=[^,]+)?(?:,~?[\w-]+(?:=[^,]+)?)*)$/;
101 /**
102 * Regular expression that matches an invalid Content Security Policy
103 * @type {RegExp}
104 */
105 Filter.invalidCSPRegExp = /(;|^) ?(base-uri|referrer|report-to|report-uri|upgrad e-insecure-requests)\b/i;
101 106
102 /** 107 /**
103 * Creates a filter of correct type from its text representation - does the 108 * Creates a filter of correct type from its text representation - does the
104 * basic parsing and calls the right constructor then. 109 * basic parsing and calls the right constructor then.
105 * 110 *
106 * @param {string} text as in Filter() 111 * @param {string} text as in Filter()
107 * @return {Filter} 112 * @return {Filter}
108 */ 113 */
109 Filter.fromText = function(text) 114 Filter.fromText = function(text)
110 { 115 {
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 * Removes unnecessary whitespaces from filter text, will only return null if 170 * Removes unnecessary whitespaces from filter text, will only return null if
166 * the input parameter is null. 171 * the input parameter is null.
167 * @param {string} text 172 * @param {string} text
168 * @return {string} 173 * @return {string}
169 */ 174 */
170 Filter.normalize = function(text) 175 Filter.normalize = function(text)
171 { 176 {
172 if (!text) 177 if (!text)
173 return text; 178 return text;
174 179
175 // Remove line breaks and such 180 // Remove line breaks, tabs etc
176 text = text.replace(/[^\S ]/g, ""); 181 text = text.replace(/[^\S ]+/g, "");
177 182
178 // Don't remove spaces inside comments 183 // Don't remove spaces inside comments
179 if (/^\s*!/.test(text)) 184 if (/^ *!/.test(text))
180 return text.trim(); 185 return text.trim();
181 186
182 // Special treatment for element hiding filters, right side is allowed to 187 // Special treatment for element hiding filters, right side is allowed to
183 // contain spaces 188 // contain spaces
184 if (Filter.elemhideRegExp.test(text)) 189 if (Filter.elemhideRegExp.test(text))
185 { 190 {
186 let [, domain, separator, selector] = /^(.*?)(#@?#?)(.*)$/.exec(text); 191 let [, domain, separator, selector] = /^(.*?)(#@?#?)(.*)$/.exec(text);
187 return domain.replace(/\s/g, "") + separator + selector.trim(); 192 return domain.replace(/ +/g, "") + separator + selector.trim();
188 } 193 }
189 194
190 // For most regexp filters we strip all whitespace, but the values of $csp 195 // For most regexp filters we strip all spaces, but $csp filter options
Manish Jethani 2018/03/12 18:53:59 So this doesn't work for the following cases: 1.
Sebastian Noack 2018/03/12 23:19:01 Splitting the options list into an array (for ever
Manish Jethani 2018/03/13 06:59:30 You mean in terms of memory consumption? I ran a t
Manish Jethani 2018/03/13 07:35:00 OK, so I inlined that bit, now this seems to perfo
kzar 2018/03/14 13:54:37 Well spotted, I've added some unit tests for those
Sebastian Noack 2018/03/14 20:59:20 Any reason, why this suggestion was ignored, witho
kzar 2018/03/15 10:26:24 I ignored that since it assumed the first '$' was
Manish Jethani 2018/03/15 10:42:41 I think Sebastian is referring to the rest of the
kzar 2018/03/15 11:38:50 So instead of splitting the options string by ","
Manish Jethani 2018/03/15 12:00:11 Yes, it just turned out to be faster.
Sebastian Noack 2018/03/15 17:25:41 I guess, if it's only in the code path hit for $cs
191 // filter options are allowed to contain single (non trailing) spaces. 196 // are allowed to contain single (non trailing) spaces.
192 let strippedText = text.replace(/\s/g, ""); 197 let strippedText = text.replace(/ +/g, "");
Manish Jethani 2018/03/12 16:34:53 Since we have already stripped out all non-space w
kzar 2018/03/14 13:54:38 I think you're right, Done.
193 if (!/csp=/i.test(strippedText)) 198 if (!strippedText.includes("$") || !/\bcsp=/i.test(strippedText))
194 return strippedText; 199 return strippedText;
195 200
196 let optionsMatch = Filter.optionsRegExp.exec(strippedText); 201 let optionsMatch = Filter.optionsRegExp.exec(strippedText);
Manish Jethani 2018/03/12 16:34:53 We can just look for "$" here and if none is prese
kzar 2018/03/14 13:54:38 No, that's not good enough unfortunately. There ca
Manish Jethani 2018/03/14 18:10:49 OK, but I wonder if we shouldn't just look for a "
197 if (!optionsMatch) 202 if (!optionsMatch)
198 return strippedText; 203 return strippedText;
199 204
200 // We know where the options part starts in the filter text that's stripped 205 // For $csp filters we must first separate out the options part of the
201 // of whitespace, next we must find the corresponding position in the original 206 // text, being careful to preserve its spaces.
202 // filter text. 207 let beforeOptions = strippedText.substring(0, optionsMatch.index);
203 let optionsPosition = 0; 208 let strippedDollarIndex = -1;
Manish Jethani 2018/03/12 16:34:53 This part is not being used at all.
kzar 2018/03/14 13:54:38 Whoops, you're right. I've fixed that now.
204 let offset = 0; 209 let dollarIndex = -1;
205 while (offset > -1) 210 do
206 { 211 {
207 optionsPosition = text.substring(optionsPosition).indexOf("$"); 212 strippedDollarIndex = beforeOptions.indexOf("$", strippedDollarIndex + 1);
208 offset = strippedText.substring(offset, optionsPosition).indexOf("$"); 213 dollarIndex = text.indexOf("$", dollarIndex + 1);
209 } 214 }
210 215 while (strippedDollarIndex != -1);
211 // Finally with that we can generally strip whitespace, being careful to not 216 let optionsText = text.substr(dollarIndex + 1);
212 // to for $csp filter values. 217
213 let parts = []; 218 // Then we can normalize spaces in the options part safely
214 let position = 0; 219 let options = optionsText.split(",");
215 let cspRegexp = /(c\s*s\s*p\s*=)([^,]+)/ig; 220 for (let i = 0; i < options.length; i++)
216 let cspMatch; 221 {
217 while (cspMatch = cspRegexp.exec(text)) 222 let option = options[i];
218 { 223 let cspMatch = /^ *c *s *p *=/i.exec(option);
219 parts.push( 224 if (cspMatch)
Manish Jethani 2018/03/12 16:34:53 We've already stripped whitespace once, now we're
kzar 2018/03/14 13:54:37 No because we're expecting thousands of non-csp fi
220 text.substring(position, cspMatch.index + cspMatch[1].length) 225 {
221 .replace(/\s/g, "") 226 options[i] = cspMatch[0].replace(/ +/g, "") +
222 ); 227 option.substr(cspMatch[0].length).trim().replace(/ +/g, " ");
223 parts.push( 228 }
224 text.substr(cspMatch.index + cspMatch[1].length, cspMatch[2].length) 229 else
225 .replace(/\s+/g, " ") 230 options[i] = option.replace(/ +/g, "");
226 .trim() 231 }
227 ); 232
228 position = cspMatch.index + cspMatch[0].length; 233 return beforeOptions + "$" + options.join();
229 }
230 parts.push(text.substr(position).replace(/\s/g, ""));
231 return parts.join("");
232 }; 234 };
233 235
234 /** 236 /**
235 * @see filterToRegExp 237 * @see filterToRegExp
236 */ 238 */
237 Filter.toRegExp = filterToRegExp; 239 Filter.toRegExp = filterToRegExp;
238 240
239 /** 241 /**
240 * Class for invalid filters 242 * Class for invalid filters
241 * @param {string} text see Filter() 243 * @param {string} text see Filter()
(...skipping 533 matching lines...) Expand 10 before | Expand all | Expand 10 after
775 { 777 {
776 options = match[1].split(","); 778 options = match[1].split(",");
777 text = match.input.substr(0, match.index); 779 text = match.input.substr(0, match.index);
778 for (let option of options) 780 for (let option of options)
779 { 781 {
780 let value = null; 782 let value = null;
781 let separatorIndex = option.indexOf("="); 783 let separatorIndex = option.indexOf("=");
782 if (separatorIndex >= 0) 784 if (separatorIndex >= 0)
783 { 785 {
784 value = option.substr(separatorIndex + 1); 786 value = option.substr(separatorIndex + 1);
785 option = option.substr(0, separatorIndex).toUpperCase(); 787 option = option.substr(0, separatorIndex);
786
787 if (option == "CSP")
788 value = value.trim();
789 else
790 value = value.replace(/\s/g, "");
791 } 788 }
792 else 789 option = option.replace(/-/, "_").toUpperCase();
793 option = option.toUpperCase();
794
795 option = option.replace(/-/, "_");
796
797 if (option in RegExpFilter.typeMap) 790 if (option in RegExpFilter.typeMap)
798 { 791 {
799 if (contentType == null) 792 if (contentType == null)
800 contentType = 0; 793 contentType = 0;
801 contentType |= RegExpFilter.typeMap[option]; 794 contentType |= RegExpFilter.typeMap[option];
802 795
803 if (option == "CSP" && typeof value != "undefined") 796 if (option == "CSP" && typeof value != "undefined")
804 { 797 csp = value;
805 if (csp)
806 csp.push(value);
807 else
808 csp = [value];
809 }
810 } 798 }
811 else if (option[0] == "~" && option.substr(1) in RegExpFilter.typeMap) 799 else if (option[0] == "~" && option.substr(1) in RegExpFilter.typeMap)
812 { 800 {
813 if (contentType == null) 801 if (contentType == null)
814 ({contentType} = RegExpFilter.prototype); 802 ({contentType} = RegExpFilter.prototype);
815 contentType &= ~RegExpFilter.typeMap[option.substr(1)]; 803 contentType &= ~RegExpFilter.typeMap[option.substr(1)];
816 } 804 }
817 else if (option == "MATCH_CASE") 805 else if (option == "MATCH_CASE")
818 matchCase = true; 806 matchCase = true;
819 else if (option == "~MATCH_CASE") 807 else if (option == "~MATCH_CASE")
820 matchCase = false; 808 matchCase = false;
821 else if (option == "DOMAIN" && typeof value != "undefined") 809 else if (option == "DOMAIN" && typeof value != "undefined")
822 domains = value.toUpperCase(); 810 domains = value.toUpperCase();
823 else if (option == "THIRD_PARTY") 811 else if (option == "THIRD_PARTY")
824 thirdParty = true; 812 thirdParty = true;
825 else if (option == "~THIRD_PARTY") 813 else if (option == "~THIRD_PARTY")
826 thirdParty = false; 814 thirdParty = false;
827 else if (option == "COLLAPSE") 815 else if (option == "COLLAPSE")
828 collapse = true; 816 collapse = true;
829 else if (option == "~COLLAPSE") 817 else if (option == "~COLLAPSE")
830 collapse = false; 818 collapse = false;
831 else if (option == "SITEKEY" && typeof value != "undefined") 819 else if (option == "SITEKEY" && typeof value != "undefined")
832 sitekeys = value.toUpperCase(); 820 sitekeys = value.toUpperCase();
833 else 821 else
834 return new InvalidFilter(origText, "filter_unknown_option"); 822 return new InvalidFilter(origText, "filter_unknown_option");
835 } 823 }
836 } 824 }
837 text = text.replace(/\s/g, "");
838 825
839 try 826 try
840 { 827 {
841 if (blocking) 828 if (blocking)
842 { 829 {
843 if (csp) 830 if (csp && Filter.invalidCSPRegExp.test(csp))
844 { 831 return new InvalidFilter(origText, "filter_invalid_csp");
845 csp = csp.join("; ").toLowerCase();
846
847 // Prevent filters from injecting report-uri or report-to directives
848 // since they are a privacy concern. Regexp based upon reBadCSP[1].
849 // [1] - https://github.com/gorhill/uBlock/blob/67e06f53b4d73df6179f6d32 0553a55da4ead40e/src/js/static-net-filtering.js#L1362
850 if (/(;|^)\s*report-(to|uri)\b/.test(csp))
851 return new InvalidFilter(origText, "filter_invalid_csp");
852 }
853 832
854 return new BlockingFilter(origText, text, contentType, matchCase, domains, 833 return new BlockingFilter(origText, text, contentType, matchCase, domains,
855 thirdParty, sitekeys, collapse, csp); 834 thirdParty, sitekeys, collapse, csp);
856 } 835 }
857 return new WhitelistFilter(origText, text, contentType, matchCase, domains, 836 return new WhitelistFilter(origText, text, contentType, matchCase, domains,
858 thirdParty, sitekeys); 837 thirdParty, sitekeys);
859 } 838 }
860 catch (e) 839 catch (e)
861 { 840 {
862 return new InvalidFilter(origText, "filter_invalid_regexp"); 841 return new InvalidFilter(origText, "filter_invalid_regexp");
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
1101 */ 1080 */
1102 function ElemHideEmulationFilter(text, domains, selector) 1081 function ElemHideEmulationFilter(text, domains, selector)
1103 { 1082 {
1104 ElemHideBase.call(this, text, domains, selector); 1083 ElemHideBase.call(this, text, domains, selector);
1105 } 1084 }
1106 exports.ElemHideEmulationFilter = ElemHideEmulationFilter; 1085 exports.ElemHideEmulationFilter = ElemHideEmulationFilter;
1107 1086
1108 ElemHideEmulationFilter.prototype = extend(ElemHideBase, { 1087 ElemHideEmulationFilter.prototype = extend(ElemHideBase, {
1109 type: "elemhideemulation" 1088 type: "elemhideemulation"
1110 }); 1089 });
LEFTRIGHT

Powered by Google App Engine
This is Rietveld