LEFT | RIGHT |
1 #include <emscripten/bind.h> | |
2 #include <unordered_map> | |
3 | |
4 #include "Filter.h" | 1 #include "Filter.h" |
5 #include "CommentFilter.h" | 2 #include "CommentFilter.h" |
| 3 #include "InvalidFilter.h" |
6 #include "RegExpFilter.h" | 4 #include "RegExpFilter.h" |
| 5 #include "BlockingFilter.h" |
| 6 #include "WhitelistFilter.h" |
| 7 #include "ElemHideBase.h" |
7 #include "ElemHideFilter.h" | 8 #include "ElemHideFilter.h" |
8 #include "ElemHideException.h" | 9 #include "ElemHideException.h" |
| 10 #include "CSSPropertyFilter.h" |
| 11 #include "StringMap.h" |
9 | 12 |
10 namespace | 13 namespace |
11 { | 14 { |
12 std::unordered_map<std::wstring,FilterPtr> knownFilters; | 15 StringMap<Filter*> knownFilters(8192); |
13 | 16 |
14 void trim_spaces(std::wstring& str) | 17 void NormalizeWhitespace(DependentString& text) |
15 { | 18 { |
16 size_t pos; | 19 String::size_type start = 0; |
| 20 String::size_type end = text.length(); |
17 | 21 |
18 // Remove leading whitespace | 22 // Remove leading spaces and special characters like line breaks |
19 pos = str.find_first_not_of(L' '); | 23 for (; start < end; start++) |
20 if (pos > 0) | 24 if (text[start] > ' ') |
21 str.erase(0, pos); | 25 break; |
22 | 26 |
23 // Remove trailing whitespace | 27 // Now look for invalid characters inside the string |
24 pos = str.find_last_not_of(L' '); | 28 String::size_type pos; |
25 if (pos < str.length() - 1) | 29 for (pos = start; pos < end; pos++) |
26 str.erase(pos + 1); | 30 if (text[pos] < ' ') |
27 } | 31 break; |
28 | 32 |
29 void remove_spaces(std::wstring& str) | 33 if (pos < end) |
30 { | |
31 for (size_t i = 0, l = str.length(); i < l; ++i) | |
32 { | 34 { |
33 if (str[i] == L' ') | 35 // Found invalid characters, copy all the valid characters while skipping |
| 36 // the invalid ones. |
| 37 String::size_type delta = 1; |
| 38 for (pos = pos + 1; pos < end; pos++) |
34 { | 39 { |
35 str.erase(i, 1); | 40 if (text[pos] < ' ') |
36 --i; | 41 delta++; |
37 --l; | 42 else |
| 43 text[pos - delta] = text[pos]; |
38 } | 44 } |
| 45 end -= delta; |
39 } | 46 } |
| 47 |
| 48 // Remove trailing spaces |
| 49 for (; end > 0; end--) |
| 50 if (text[end - 1] != ' ') |
| 51 break; |
| 52 |
| 53 // Set new string boundaries |
| 54 text.reset(text, start, end - start); |
40 } | 55 } |
41 } | 56 } |
42 | 57 |
43 Filter::Filter(const std::wstring& text) | 58 Filter::Filter(Type type, const String& text) |
| 59 : mType(type), mText(text) |
44 { | 60 { |
45 this->text = text; | 61 annotate_address(this, "Filter"); |
46 } | 62 } |
47 | 63 |
48 const std::wstring Filter::Serialize() | 64 Filter::~Filter() |
49 { | 65 { |
50 return ( | 66 knownFilters.erase(mText); |
51 L"[Filter]\n" | |
52 L"text=" + text + L"\n" | |
53 ); | |
54 } | 67 } |
55 | 68 |
56 FilterPtr Filter::FromText(const std::wstring& text) | 69 OwnedString Filter::Serialize() const |
57 { | 70 { |
58 auto it = knownFilters.find(text); | 71 OwnedString result(u"[Filter]\ntext="_str); |
59 if (it != knownFilters.end()) | 72 result.append(mText); |
60 return it->second; | 73 result.append(u'\n'); |
61 | |
62 FilterPtr filter = FilterPtr(CommentFilter::Create(text)); | |
63 if (!filter) | |
64 filter.reset(ElemHideBase::Create(text)); | |
65 if (!filter) | |
66 filter.reset(RegExpFilter::Create(text)); | |
67 return knownFilters[text] = filter; | |
68 } | |
69 | |
70 const std::wstring Filter::Normalize(const std::wstring& text) | |
71 { | |
72 std::wstring result(text); | |
73 | |
74 // Remove special characters like line breaks | |
75 for (size_t i = 0, l = result.length(); i < l; ++i) | |
76 { | |
77 if (result[i] < L' ') | |
78 { | |
79 result.erase(i, 1); | |
80 --i; | |
81 --l; | |
82 } | |
83 } | |
84 | |
85 trim_spaces(result); | |
86 | |
87 { | |
88 size_t domainsEnd; | |
89 size_t selectorStart; | |
90 Filter::Type type = ElemHideBase::Parse(result, &domainsEnd, &selectorStart)
; | |
91 if (type != Filter::Type::UNKNOWN) | |
92 { | |
93 std::wstring domains = result.substr(0, domainsEnd); | |
94 std::wstring selector = result.substr(selectorStart); | |
95 remove_spaces(domains); | |
96 trim_spaces(selector); | |
97 return domains + ( | |
98 type == Filter::Type::ELEMHIDEEXCEPTION ? L"#@#" : L"##" | |
99 ) + selector; | |
100 } | |
101 } | |
102 | |
103 if (CommentFilter::Parse(result) == Filter::Type::UNKNOWN) | |
104 remove_spaces(result); | |
105 return result; | 74 return result; |
106 } | 75 } |
107 | 76 |
108 EMSCRIPTEN_BINDINGS(filter) | 77 Filter* Filter::FromText(DependentString& text) |
109 { | 78 { |
110 using namespace emscripten; | 79 NormalizeWhitespace(text); |
111 enum_<Filter::Type>("FilterType") | 80 if (text.empty()) |
112 .value("INVALID", Filter::Type::INVALID) | 81 return nullptr; |
113 .value("COMMENT", Filter::Type::COMMENT) | |
114 .value("BLOCKING", Filter::Type::BLOCKING) | |
115 .value("WHITELIST", Filter::Type::WHITELIST) | |
116 .value("ELEMHIDE", Filter::Type::ELEMHIDE) | |
117 .value("ELEMHIDEEXCEPTION", Filter::Type::ELEMHIDEEXCEPTION) | |
118 .value("CSSPROPERTY", Filter::Type::CSSPROPERTY); | |
119 | 82 |
120 class_<Filter>("Filter") | 83 // Parsing also normalizes the filter text, so it has to be done before the |
121 .property("text", &Filter::GetText) | 84 // lookup in knownFilters. |
122 .property("type", &Filter::GetType) | 85 union |
123 .function("serialize", &Filter::Serialize) | 86 { |
124 .class_function("fromText", &Filter::FromText) | 87 RegExpFilterData regexp; |
125 .class_function("normalize", &Filter::Normalize) | 88 ElemHideData elemhide; |
126 .smart_ptr<FilterPtr>("shared_ptr"); | 89 } data; |
| 90 DependentString error; |
| 91 |
| 92 Filter::Type type = CommentFilter::Parse(text); |
| 93 if (type == Filter::Type::UNKNOWN) |
| 94 type = ElemHideBase::Parse(text, data.elemhide); |
| 95 if (type == Filter::Type::UNKNOWN) |
| 96 type = RegExpFilter::Parse(text, error, data.regexp); |
| 97 |
| 98 auto knownFilter = knownFilters.find(text); |
| 99 if (knownFilter) |
| 100 { |
| 101 knownFilter->second->AddRef(); |
| 102 return knownFilter->second; |
| 103 } |
| 104 |
| 105 FilterPtr filter; |
| 106 switch (type) |
| 107 { |
| 108 case Filter::Type::COMMENT: |
| 109 filter = new CommentFilter(text); |
| 110 break; |
| 111 case Filter::Type::INVALID: |
| 112 filter = new InvalidFilter(text, error); |
| 113 break; |
| 114 case Filter::Type::BLOCKING: |
| 115 filter = new BlockingFilter(text, data.regexp); |
| 116 break; |
| 117 case Filter::Type::WHITELIST: |
| 118 filter = new WhitelistFilter(text, data.regexp); |
| 119 break; |
| 120 case Filter::Type::ELEMHIDE: |
| 121 filter = new ElemHideFilter(text, data.elemhide); |
| 122 break; |
| 123 case Filter::Type::ELEMHIDEEXCEPTION: |
| 124 filter = new ElemHideException(text, data.elemhide); |
| 125 break; |
| 126 case Filter::Type::CSSPROPERTY: |
| 127 filter = new CSSPropertyFilter(text, data.elemhide); |
| 128 if (static_cast<CSSPropertyFilter*>(filter.get())->IsGeneric()) |
| 129 filter = new InvalidFilter(text, u"filter_cssproperty_nodomain"_str); |
| 130 break; |
| 131 default: |
| 132 // This should never happen but just in case |
| 133 return nullptr; |
| 134 } |
| 135 |
| 136 // This is a hack: we looked up the entry using text but create it using |
| 137 // filter->mText. This works because both are equal at this point. However, |
| 138 // text refers to a temporary buffer which will go away. |
| 139 enter_context("Adding to known filters"); |
| 140 knownFilter.assign(filter->mText, filter.get()); |
| 141 exit_context(); |
| 142 |
| 143 return filter.release(); |
127 } | 144 } |
LEFT | RIGHT |