| LEFT | RIGHT |
| 1 #include <climits> | 1 #include <climits> |
| 2 | 2 |
| 3 #include <emscripten.h> | 3 #include <emscripten.h> |
| 4 | 4 |
| 5 #include "RegExpFilter.h" | 5 #include "RegExpFilter.h" |
| 6 #include "WhitelistFilter.h" | |
| 7 #include "InvalidFilter.h" | |
| 8 #include "StringScanner.h" | 6 #include "StringScanner.h" |
| 9 #include "StringMap.h" | 7 #include "StringMap.h" |
| 10 | 8 |
| 11 namespace | 9 namespace |
| 12 { | 10 { |
| 13 enum | 11 enum |
| 14 { | 12 { |
| 15 TYPE_OTHER = 0x1, | 13 TYPE_OTHER = 0x1, |
| 16 TYPE_SCRIPT = 0x2, | 14 TYPE_SCRIPT = 0x2, |
| 17 TYPE_IMAGE = 0x4, | 15 TYPE_IMAGE = 0x4, |
| (...skipping 28 matching lines...) Expand all Loading... |
| 46 {u"media"_str, TYPE_MEDIA}, | 44 {u"media"_str, TYPE_MEDIA}, |
| 47 {u"font"_str, TYPE_FONT}, | 45 {u"font"_str, TYPE_FONT}, |
| 48 {u"background"_str, TYPE_IMAGE}, // Backwards compat | 46 {u"background"_str, TYPE_IMAGE}, // Backwards compat |
| 49 | 47 |
| 50 {u"popup"_str, TYPE_POPUP}, | 48 {u"popup"_str, TYPE_POPUP}, |
| 51 {u"genericblock"_str, TYPE_GENERICBLOCK}, | 49 {u"genericblock"_str, TYPE_GENERICBLOCK}, |
| 52 {u"generichide"_str, TYPE_GENERICHIDE}, | 50 {u"generichide"_str, TYPE_GENERICHIDE}, |
| 53 {u"elemhide"_str, TYPE_ELEMHIDE}, | 51 {u"elemhide"_str, TYPE_ELEMHIDE}, |
| 54 }; | 52 }; |
| 55 | 53 |
| 56 int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_POPUP | | 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | |
| 57 TYPE_GENERICBLOCK | TYPE_GENERICHIDE); | 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE); |
| 58 | 56 |
| 59 int GenerateRegExp(const String& regexp, bool matchCase) | 57 int GenerateRegExp(const String& regexp, bool matchCase) |
| 60 { | 58 { |
| 61 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); | 59 return EM_ASM_INT(return regexps.create($0, $1), ®exp, matchCase); |
| 62 } | 60 } |
| 63 } | 61 |
| 64 | 62 void NormalizeWhitespace(DependentString& text) |
| 65 RegExpFilter::RegExpFilter(const String& text, | 63 { |
| 66 String::size_type patternStart, String::size_type patternEnd) | 64 // We want to remove all spaces but bail out early in the common scenario |
| 67 : ActiveFilter(text, true), mRegexpId(0), | 65 // that the string contains no spaces. |
| 68 mRegexpSource(String(mText, patternStart, patternEnd - patternStart)), | 66 |
| 69 mContentType(-1), mMatchCase(false), mThirdParty(TrippleState::ANY) | 67 // Look for the first space |
| 70 { | 68 String::size_type len = text.length(); |
| 71 String options(mText, patternEnd + 1); | 69 String::size_type pos; |
| 72 StringScanner scanner(options, u','); | 70 for (pos = 0; pos < len; pos++) |
| 73 int optionStart = 0; | 71 if (text[pos] == ' ') |
| 74 int optionEnd = -1; | 72 break; |
| 75 int valueStart = -1; | 73 |
| 74 if (pos >= len) |
| 75 return; |
| 76 |
| 77 // Found spaces, move characters to remove them |
| 78 String::size_type delta = 1; |
| 79 for (pos = pos + 1; pos < len; pos++) |
| 80 { |
| 81 if (text[pos] == ' ') |
| 82 delta++; |
| 83 else |
| 84 text[pos - delta] = text[pos]; |
| 85 } |
| 86 text.reset(text, 0, len - delta); |
| 87 } |
| 88 |
| 89 void ParseOption(String& text, DependentString& error, RegExpFilterData& data, |
| 90 int optionStart, int optionEnd, int valueStart, int valueEnd) |
| 91 { |
| 92 if (optionEnd <= optionStart) |
| 93 return; |
| 94 |
| 95 bool reverse = false; |
| 96 if (text[optionStart] == u'~') |
| 97 { |
| 98 reverse = true; |
| 99 optionStart++; |
| 100 } |
| 101 |
| 102 DependentString name(text, optionStart, optionEnd - optionStart); |
| 103 for (size_t i = 0; i < name.length(); ++i) |
| 104 { |
| 105 char16_t currChar = name[i]; |
| 106 if (currChar >= u'A' && currChar <= u'Z') |
| 107 name[i] = currChar + u'a' - u'A'; |
| 108 else if (currChar == u'_') |
| 109 name[i] = u'-'; |
| 110 } |
| 111 |
| 112 auto it = typeMap.find(name); |
| 113 if (it) |
| 114 { |
| 115 if (data.mContentType < 0) |
| 116 data.mContentType = reverse ? defaultTypeMask : 0; |
| 117 if (reverse) |
| 118 data.mContentType &= ~it->second; |
| 119 else |
| 120 data.mContentType |= it->second; |
| 121 } |
| 122 else if (name.equals(u"domain"_str)) |
| 123 { |
| 124 if (valueStart >= 0 && valueEnd > valueStart) |
| 125 { |
| 126 data.mDomainsStart = valueStart; |
| 127 data.mDomainsEnd = valueEnd; |
| 128 DependentString(text, valueStart, valueEnd - valueStart).toLower(); |
| 129 } |
| 130 } |
| 131 else if (name.equals(u"sitekey"_str)) |
| 132 { |
| 133 if (valueStart >= 0 && valueEnd > valueStart) |
| 134 { |
| 135 data.mSitekeysStart = valueStart; |
| 136 data.mSitekeysEnd = valueEnd; |
| 137 } |
| 138 } |
| 139 else if (name.equals(u"match-case"_str)) |
| 140 data.mMatchCase = !reverse; |
| 141 else if (name.equals(u"third-party"_str)) |
| 142 data.mThirdParty = reverse ? TrippleState::NO : TrippleState::YES; |
| 143 else if (name.equals(u"collapse"_str)) |
| 144 data.mCollapse = reverse ? TrippleState::NO : TrippleState::YES; |
| 145 else |
| 146 error.reset(u"filter_unknown_option"_str); |
| 147 } |
| 148 |
| 149 void ParseOptions(String& text, DependentString& error, RegExpFilterData& data
, |
| 150 String::size_type optionsStart) |
| 151 { |
| 152 data.mMatchCase = false; |
| 153 data.mThirdParty = TrippleState::ANY; |
| 154 data.mCollapse = TrippleState::ANY; |
| 155 data.mDomainsStart = String::npos; |
| 156 data.mSitekeysStart = String::npos; |
| 157 if (optionsStart >= text.length()) |
| 158 { |
| 159 data.mContentType = defaultTypeMask; |
| 160 return; |
| 161 } |
| 162 |
| 163 data.mContentType = -1; |
| 164 |
| 165 int optionStart = data.mPatternEnd + 1; |
| 166 int optionEnd = -1; |
| 167 int valueStart = -1; |
| 168 |
| 169 StringScanner scanner(text, optionStart, u','); |
| 170 bool done = false; |
| 171 while (!done) |
| 172 { |
| 173 done = scanner.done(); |
| 174 switch (scanner.next()) |
| 175 { |
| 176 case u'=': |
| 177 if (optionEnd < 0) |
| 178 { |
| 179 optionEnd = scanner.position(); |
| 180 valueStart = optionEnd + 1; |
| 181 } |
| 182 break; |
| 183 case u',': |
| 184 if (optionEnd < 0) |
| 185 optionEnd = scanner.position(); |
| 186 ParseOption(text, error, data, optionStart, optionEnd, valueStart, |
| 187 scanner.position()); |
| 188 if (!error.empty()) |
| 189 return; |
| 190 |
| 191 optionStart = scanner.position() + 1; |
| 192 optionEnd = -1; |
| 193 valueStart = -1; |
| 194 break; |
| 195 } |
| 196 } |
| 197 |
| 198 if (data.mContentType < 0) |
| 199 data.mContentType = defaultTypeMask; |
| 200 } |
| 201 } |
| 202 |
| 203 RegExpFilter::RegExpFilter(Type type, const String& text, const RegExpFilterData
& data) |
| 204 : ActiveFilter(type, text, true), mData(data) |
| 205 { |
| 206 } |
| 207 |
| 208 RegExpFilter::~RegExpFilter() |
| 209 { |
| 210 if (mData.HasRegExp()) |
| 211 EM_ASM_ARGS(regexps.delete($0), mData.mRegexpId); |
| 212 } |
| 213 |
| 214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error, |
| 215 RegExpFilterData& data) |
| 216 { |
| 217 NormalizeWhitespace(text); |
| 218 |
| 219 Filter::Type type = Type::BLOCKING; |
| 220 |
| 221 data.mPatternStart = 0; |
| 222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') |
| 223 { |
| 224 type = Type::WHITELIST; |
| 225 data.mPatternStart = 2; |
| 226 } |
| 227 |
| 228 data.mPatternEnd = text.find(u'$', data.mPatternStart); |
| 229 if (data.mPatternEnd == text.npos) |
| 230 data.mPatternEnd = text.length(); |
| 231 |
| 232 ParseOptions(text, error, data, data.mPatternEnd + 1); |
| 233 if (!error.empty()) |
| 234 return Type::INVALID; |
| 235 |
| 236 if (data.mPatternEnd - data.mPatternStart >= 2 && |
| 237 text[data.mPatternStart] == u'/' && |
| 238 text[data.mPatternEnd - 1] == u'/') |
| 239 { |
| 240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1, |
| 241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase)); |
| 242 if (data.mRegexpId == -1) |
| 243 { |
| 244 error.reset(u"filter_invalid_regexp"_str); |
| 245 return Type::INVALID; |
| 246 } |
| 247 } |
| 248 |
| 249 return type; |
| 250 } |
| 251 |
| 252 void RegExpFilter::ParseSitekeys(const String& sitekeys) const |
| 253 { |
| 254 StringScanner scanner(sitekeys, 0, u'|'); |
| 255 size_t start = 0; |
| 76 bool done = false; | 256 bool done = false; |
| 77 while (!done) | 257 while (!done) |
| 78 { | 258 { |
| 79 done = scanner.done(); | 259 done = scanner.done(); |
| 80 switch (scanner.next()) | 260 if (scanner.next() == u'|') |
| 81 { | 261 { |
| 82 case u'=': | 262 if (scanner.position() > start) |
| 83 if (optionEnd < 0) | 263 AddSitekey(DependentString(sitekeys, start, scanner.position() - start))
; |
| 84 { | 264 start = scanner.position() + 1; |
| 85 optionEnd = scanner.position(); | 265 } |
| 86 valueStart = optionEnd + 1; | |
| 87 } | |
| 88 break; | |
| 89 case u',': | |
| 90 if (optionEnd < 0) | |
| 91 optionEnd = scanner.position(); | |
| 92 ProcessOption(options, optionStart, optionEnd, valueStart, scanner.posit
ion()); | |
| 93 optionStart = scanner.position() + 1; | |
| 94 optionEnd = -1; | |
| 95 valueStart = -1; | |
| 96 break; | |
| 97 } | |
| 98 } | |
| 99 if (mContentType < 0) | |
| 100 mContentType = defaultTypeMask; | |
| 101 | |
| 102 size_t len = mRegexpSource.length(); | |
| 103 if (len >= 2 && mRegexpSource[0] == u'/' && mRegexpSource[len - 1] == u'/') | |
| 104 { | |
| 105 mRegexpSource.reset(mRegexpSource, 1 , len - 2); | |
| 106 mRegexpId = GenerateRegExp(mRegexpSource, mMatchCase); | |
| 107 | |
| 108 int errorLength = EM_ASM_INT(return regexps.getErrorLength($0), mRegexpId); | |
| 109 if (errorLength >= 0) | |
| 110 { | |
| 111 String error(errorLength); | |
| 112 EM_ASM_ARGS(regexps.getError($0, $1), mRegexpId, error.data()); | |
| 113 throw error; | |
| 114 } | |
| 115 } | |
| 116 } | |
| 117 | |
| 118 RegExpFilter::~RegExpFilter() | |
| 119 { | |
| 120 if (mRegexpId) | |
| 121 EM_ASM_ARGS(regexps.delete($0), mRegexpId); | |
| 122 } | |
| 123 | |
| 124 void RegExpFilter::ProcessOption(String& options, int optionStart, | |
| 125 int optionEnd, int valueStart, int valueEnd) | |
| 126 { | |
| 127 if (optionEnd <= optionStart) | |
| 128 return; | |
| 129 | |
| 130 bool reverse = false; | |
| 131 if (options[optionStart] == u'~') | |
| 132 { | |
| 133 reverse = true; | |
| 134 optionStart++; | |
| 135 } | |
| 136 | |
| 137 String name(options, optionStart, optionEnd - optionStart); | |
| 138 for (size_t i = 0; i < name.length(); ++i) | |
| 139 { | |
| 140 char16_t currChar = name[i]; | |
| 141 if (currChar >= u'A' && currChar <= u'Z') | |
| 142 name[i] = currChar + u'a' - u'A'; | |
| 143 else if (currChar == u'_') | |
| 144 name[i] = u'-'; | |
| 145 } | |
| 146 | |
| 147 auto it = typeMap.find(name); | |
| 148 if (it != typeMap.end()) | |
| 149 { | |
| 150 if (mContentType < 0) | |
| 151 mContentType = reverse ? defaultTypeMask : 0; | |
| 152 if (reverse) | |
| 153 mContentType &= ~it->second; | |
| 154 else | |
| 155 mContentType |= it->second; | |
| 156 } | |
| 157 else if (name.equals(u"domain"_str)) | |
| 158 { | |
| 159 if (valueStart >= 0 && valueEnd > valueStart) | |
| 160 ParseDomains(String(options, valueStart, valueEnd - valueStart), u'|'); | |
| 161 } | |
| 162 else if (name.equals(u"sitekey"_str)) | |
| 163 { | |
| 164 if (valueStart >= 0 && valueEnd > valueStart) | |
| 165 { | |
| 166 StringScanner scanner(String(options, valueStart, valueEnd - valueStart),
u'|'); | |
| 167 size_t start = 0; | |
| 168 bool done = false; | |
| 169 while (!done) | |
| 170 { | |
| 171 done = scanner.done(); | |
| 172 if (scanner.next() == u'|') | |
| 173 { | |
| 174 if (scanner.position() > start) | |
| 175 AddSitekey(String(options, valueStart + start, scanner.position() -
start)); | |
| 176 start = scanner.position() + 1; | |
| 177 } | |
| 178 } | |
| 179 } | |
| 180 } | |
| 181 else if (name.equals(u"match-case"_str)) | |
| 182 mMatchCase = !reverse; | |
| 183 else if (name.equals(u"third-party"_str)) | |
| 184 mThirdParty = reverse ? TrippleState::NO : TrippleState::YES; | |
| 185 else if (name.equals(u"collapse"_str)) | |
| 186 mCollapse = reverse ? TrippleState::NO : TrippleState::YES; | |
| 187 else | |
| 188 { | |
| 189 String error(u"Unknown option "_str); | |
| 190 error.append(name); | |
| 191 throw std::move(error); | |
| 192 } | |
| 193 } | |
| 194 | |
| 195 Filter* RegExpFilter::Create(const String& text) | |
| 196 { | |
| 197 bool blocking = true; | |
| 198 String::size_type patternStart = 0; | |
| 199 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') | |
| 200 { | |
| 201 blocking = false; | |
| 202 patternStart = 2; | |
| 203 } | |
| 204 | |
| 205 String::size_type patternEnd = text.find(u'$', patternStart); | |
| 206 if (patternEnd == text.npos) | |
| 207 patternEnd = text.length(); | |
| 208 | |
| 209 try | |
| 210 { | |
| 211 if (blocking) | |
| 212 return new RegExpFilter(text, patternStart, patternEnd); | |
| 213 else | |
| 214 return new WhitelistFilter(text, patternStart, patternEnd); | |
| 215 } | |
| 216 catch (const String& reason) | |
| 217 { | |
| 218 return new InvalidFilter(text, reason); | |
| 219 } | 266 } |
| 220 } | 267 } |
| 221 | 268 |
| 222 void RegExpFilter::InitJSTypes() | 269 void RegExpFilter::InitJSTypes() |
| 223 { | 270 { |
| 224 EM_ASM(exports.RegExpFilter.typeMap = {};); | 271 EM_ASM(exports.RegExpFilter.typeMap = {};); |
| 225 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) | 272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) |
| 226 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_")
.toUpperCase()] = $1, &(it->first), it->second); | 273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to
UpperCase()] = $1, &(it->first), it->second); |
| 227 } | 274 } |
| 228 | 275 |
| 229 String RegExpFilter::RegExpFromSource(const String& source) | 276 OwnedString RegExpFilter::RegExpFromSource(const String& source) |
| 230 { | 277 { |
| 231 /* TODO: this is very inefficient */ | 278 /* TODO: this is very inefficient */ |
| 232 | 279 |
| 233 // Note: This doesn't remove trailing wildcards, otherwise the result should | 280 // Note: This doesn't remove trailing wildcards, otherwise the result should |
| 234 // be identical to Filter.toRegExp(). | 281 // be identical to Filter.toRegExp(). |
| 235 String result; | 282 OwnedString result; |
| 236 String::value_type prevChar = u'*'; | 283 String::value_type prevChar = u'*'; |
| 237 for (String::size_type i = 0; i < source.length(); ++i) | 284 for (String::size_type i = 0; i < source.length(); ++i) |
| 238 { | 285 { |
| 239 String::value_type currChar = source[i]; | 286 String::value_type currChar = source[i]; |
| 240 switch (currChar) | 287 switch (currChar) |
| 241 { | 288 { |
| 242 case u'*': | 289 case u'*': |
| 243 if (prevChar != u'*') | 290 if (prevChar != u'*') |
| 244 result.append(u".*"_str); | 291 result.append(u".*"_str); |
| 245 break; | 292 break; |
| (...skipping 29 matching lines...) Expand all Loading... |
| 275 !(currChar >= u'A' && currChar <= u'Z') && | 322 !(currChar >= u'A' && currChar <= u'Z') && |
| 276 !(currChar >= u'0' && currChar <= u'9') && | 323 !(currChar >= u'0' && currChar <= u'9') && |
| 277 currChar < 128) | 324 currChar < 128) |
| 278 { | 325 { |
| 279 result.append(u'\\'); | 326 result.append(u'\\'); |
| 280 } | 327 } |
| 281 result.append(currChar); | 328 result.append(currChar); |
| 282 } | 329 } |
| 283 prevChar = currChar; | 330 prevChar = currChar; |
| 284 } | 331 } |
| 285 return std::move(result); | 332 return result; |
| 286 } | 333 } |
| 287 | 334 |
| 288 Filter::Type RegExpFilter::GetType() const | 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const |
| 289 { | 336 { |
| 290 return Type::BLOCKING; | 337 if (!mData.DomainsParsingDone()) |
| 338 { |
| 339 ParseDomains(mData.GetDomainsSource(mText), u'|'); |
| 340 mData.SetDomainsParsingDone(); |
| 341 } |
| 342 return ActiveFilter::GetDomains(); |
| 343 } |
| 344 |
| 345 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const |
| 346 { |
| 347 if (!mData.SitekeyParsingDone()) |
| 348 { |
| 349 ParseSitekeys(mData.GetSitekeysSource(mText)); |
| 350 mData.SetSitekeysParsingDone(); |
| 351 } |
| 352 return ActiveFilter::GetSitekeys(); |
| 291 } | 353 } |
| 292 | 354 |
| 293 bool RegExpFilter::Matches(const String& location, int typeMask, | 355 bool RegExpFilter::Matches(const String& location, int typeMask, |
| 294 String& docDomain, bool thirdParty, const String& sitekey) const | 356 DependentString& docDomain, bool thirdParty, const String& sitekey) const |
| 295 { | 357 { |
| 296 if (!(mContentType & typeMask) || | 358 if (!(mData.mContentType & typeMask) || |
| 297 (mThirdParty == TrippleState::YES && !thirdParty) || | 359 (mData.mThirdParty == TrippleState::YES && !thirdParty) || |
| 298 (mThirdParty == TrippleState::NO && thirdParty) || | 360 (mData.mThirdParty == TrippleState::NO && thirdParty) || |
| 299 !IsActiveOnDomain(docDomain, sitekey)) | 361 !IsActiveOnDomain(docDomain, sitekey)) |
| 300 { | 362 { |
| 301 return false; | 363 return false; |
| 302 } | 364 } |
| 303 | 365 |
| 304 if (!mRegexpId) | 366 if (!mData.RegExpParsingDone()) |
| 305 mRegexpId = GenerateRegExp(RegExpFromSource(mRegexpSource), mMatchCase); | 367 { |
| 306 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location); | 368 const OwnedString pattern(mData.GetRegExpSource(mText)); |
| 307 } | 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase))
; |
| 370 } |
| 371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location); |
| 372 } |
| LEFT | RIGHT |