LEFT | RIGHT |
1 #include <emscripten.h> | |
2 #include <unordered_map> | |
3 | |
4 #include "Filter.h" | 1 #include "Filter.h" |
5 #include "CommentFilter.h" | 2 #include "CommentFilter.h" |
| 3 #include "InvalidFilter.h" |
6 #include "RegExpFilter.h" | 4 #include "RegExpFilter.h" |
| 5 #include "BlockingFilter.h" |
| 6 #include "WhitelistFilter.h" |
| 7 #include "ElemHideBase.h" |
7 #include "ElemHideFilter.h" | 8 #include "ElemHideFilter.h" |
8 #include "ElemHideException.h" | 9 #include "ElemHideException.h" |
9 #include "tools.h" | 10 #include "CSSPropertyFilter.h" |
10 | 11 #include "StringMap.h" |
11 Filter::Filter(const std::u16string& text) | |
12 { | |
13 this->text = text; | |
14 } | |
15 | |
16 const std::u16string Filter::Serialize() | |
17 { | |
18 return ( | |
19 u"[Filter]\n" | |
20 u"text=" + text + u"\n" | |
21 ); | |
22 } | |
23 | 12 |
24 namespace | 13 namespace |
25 { | 14 { |
26 std::unordered_map<std::u16string,Filter*> knownFilters; | 15 StringMap<Filter*> knownFilters(8192); |
27 | 16 |
28 void trim_spaces(std::u16string& str) | 17 void NormalizeWhitespace(DependentString& text) |
29 { | 18 { |
30 size_t pos; | 19 String::size_type start = 0; |
| 20 String::size_type end = text.length(); |
31 | 21 |
32 // Remove leading whitespace | 22 // Remove leading spaces and special characters like line breaks |
33 pos = str.find_first_not_of(u' '); | 23 for (; start < end; start++) |
34 if (pos > 0) | 24 if (text[start] > ' ') |
35 str.erase(0, pos); | 25 break; |
36 | 26 |
37 // Remove trailing whitespace | 27 // Now look for invalid characters inside the string |
38 pos = str.find_last_not_of(u' '); | 28 String::size_type pos; |
39 if (pos < str.length() - 1) | 29 for (pos = start; pos < end; pos++) |
40 str.erase(pos + 1); | 30 if (text[pos] < ' ') |
41 } | 31 break; |
42 | 32 |
43 void remove_spaces(std::u16string& str) | 33 if (pos < end) |
44 { | |
45 for (size_t i = 0, l = str.length(); i < l; ++i) | |
46 { | 34 { |
47 if (str[i] == u' ') | 35 // Found invalid characters, copy all the valid characters while skipping |
| 36 // the invalid ones. |
| 37 String::size_type delta = 1; |
| 38 for (pos = pos + 1; pos < end; pos++) |
48 { | 39 { |
49 str.erase(i, 1); | 40 if (text[pos] < ' ') |
50 --i; | 41 delta++; |
51 --l; | 42 else |
| 43 text[pos - delta] = text[pos]; |
52 } | 44 } |
| 45 end -= delta; |
53 } | 46 } |
| 47 |
| 48 // Remove trailing spaces |
| 49 for (; end > 0; end--) |
| 50 if (text[end - 1] != ' ') |
| 51 break; |
| 52 |
| 53 // Set new string boundaries |
| 54 text.reset(text, start, end - start); |
54 } | 55 } |
55 } | 56 } |
56 | 57 |
57 // API | 58 Filter::Filter(Type type, const String& text) |
58 extern "C" | 59 : mType(type), mText(text) |
59 { | 60 { |
60 Filter* EMSCRIPTEN_KEEPALIVE Filter_FromText(char16_t* textPtr, int textLen) | 61 annotate_address(this, "Filter"); |
| 62 } |
| 63 |
| 64 Filter::~Filter() |
| 65 { |
| 66 knownFilters.erase(mText); |
| 67 } |
| 68 |
| 69 OwnedString Filter::Serialize() const |
| 70 { |
| 71 OwnedString result(u"[Filter]\ntext="_str); |
| 72 result.append(mText); |
| 73 result.append(u'\n'); |
| 74 return result; |
| 75 } |
| 76 |
| 77 Filter* Filter::FromText(DependentString& text) |
| 78 { |
| 79 NormalizeWhitespace(text); |
| 80 if (text.empty()) |
| 81 return nullptr; |
| 82 |
| 83 // Parsing also normalizes the filter text, so it has to be done before the |
| 84 // lookup in knownFilters. |
| 85 union |
61 { | 86 { |
62 std::u16string text(textPtr, textLen); | 87 RegExpFilterData regexp; |
63 auto it = knownFilters.find(text); | 88 ElemHideData elemhide; |
64 if (it != knownFilters.end()) | 89 } data; |
65 return it->second; | 90 DependentString error; |
66 | 91 |
67 Filter* filter = CommentFilter::Create(text); | 92 Filter::Type type = CommentFilter::Parse(text); |
68 if (!filter) | 93 if (type == Filter::Type::UNKNOWN) |
69 filter = ElemHideBase::Create(text); | 94 type = ElemHideBase::Parse(text, data.elemhide); |
70 if (!filter) | 95 if (type == Filter::Type::UNKNOWN) |
71 filter = RegExpFilter::Create(text); | 96 type = RegExpFilter::Parse(text, error, data.regexp); |
72 return knownFilters[text] = filter; | 97 |
| 98 auto knownFilter = knownFilters.find(text); |
| 99 if (knownFilter) |
| 100 { |
| 101 knownFilter->second->AddRef(); |
| 102 return knownFilter->second; |
73 } | 103 } |
74 | 104 |
75 char16_t* EMSCRIPTEN_KEEPALIVE Filter_Normalize(char16_t* textPtr, size_t text
Len, size_t* resultLen) | 105 FilterPtr filter; |
| 106 switch (type) |
76 { | 107 { |
77 std::u16string text(textPtr, textLen); | 108 case Filter::Type::COMMENT: |
78 | 109 filter = new CommentFilter(text); |
79 // Remove special characters like line breaks | 110 break; |
80 for (size_t i = 0, l = text.length(); i < l; ++i) | 111 case Filter::Type::INVALID: |
81 { | 112 filter = new InvalidFilter(text, error); |
82 if (text[i] < u' ') | 113 break; |
83 { | 114 case Filter::Type::BLOCKING: |
84 text.erase(i, 1); | 115 filter = new BlockingFilter(text, data.regexp); |
85 --i; | 116 break; |
86 --l; | 117 case Filter::Type::WHITELIST: |
87 } | 118 filter = new WhitelistFilter(text, data.regexp); |
88 } | 119 break; |
89 | 120 case Filter::Type::ELEMHIDE: |
90 trim_spaces(text); | 121 filter = new ElemHideFilter(text, data.elemhide); |
91 | 122 break; |
92 { | 123 case Filter::Type::ELEMHIDEEXCEPTION: |
93 size_t domainsEnd; | 124 filter = new ElemHideException(text, data.elemhide); |
94 size_t selectorStart; | 125 break; |
95 Filter::Type type = ElemHideBase::Parse(text, &domainsEnd, &selectorStart)
; | 126 case Filter::Type::CSSPROPERTY: |
96 if (type != Filter::Type::UNKNOWN) | 127 filter = new CSSPropertyFilter(text, data.elemhide); |
97 { | 128 if (static_cast<CSSPropertyFilter*>(filter.get())->IsGeneric()) |
98 std::u16string domains = text.substr(0, domainsEnd); | 129 filter = new InvalidFilter(text, u"filter_cssproperty_nodomain"_str); |
99 std::u16string selector = text.substr(selectorStart); | 130 break; |
100 remove_spaces(domains); | 131 default: |
101 trim_spaces(selector); | 132 // This should never happen but just in case |
102 return stringToBuffer(domains + (type == Filter::Type::ELEMHIDEEXCEPTION
? u"#@#" : u"##") + selector, resultLen); | 133 return nullptr; |
103 } | |
104 } | |
105 | |
106 if (CommentFilter::Parse(text) == Filter::Type::UNKNOWN) | |
107 remove_spaces(text); | |
108 return stringToBuffer(text, resultLen); | |
109 } | 134 } |
110 | 135 |
111 char16_t* EMSCRIPTEN_KEEPALIVE Filter_Serialize(Filter* filter, size_t* result
Len) | 136 // This is a hack: we looked up the entry using text but create it using |
112 { | 137 // filter->mText. This works because both are equal at this point. However, |
113 return stringToBuffer(filter->Serialize(), resultLen); | 138 // text refers to a temporary buffer which will go away. |
114 } | 139 enter_context("Adding to known filters"); |
| 140 knownFilter.assign(filter->mText, filter.get()); |
| 141 exit_context(); |
115 | 142 |
116 EXPOSE_READONLY_STRPROPERTY(Filter, text) | 143 return filter.release(); |
117 EXPOSE_READONLY_PROPERTY(Filter, Filter::Type, type) | |
118 } | 144 } |
LEFT | RIGHT |