compiled/Filter.cpp - Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++

Delta Between Two Patch Sets: compiled/Filter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)

Left Patch Set: Fixed some bugs and exposed/tested more properties Created Jan. 15, 2016, 3:40 p.m.

Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 #include <emscripten.h>

2 #include <unordered_map>

3

4 #include "Filter.h"	1 #include "Filter.h"

5 #include "CommentFilter.h"	2 #include "CommentFilter.h"

	3 #include "InvalidFilter.h"

6 #include "RegExpFilter.h"	4 #include "RegExpFilter.h"

	5 #include "BlockingFilter.h"

	6 #include "WhitelistFilter.h"

	7 #include "ElemHideBase.h"

7 #include "ElemHideFilter.h"	8 #include "ElemHideFilter.h"

8 #include "ElemHideException.h"	9 #include "ElemHideException.h"

9 #include "tools.h"	10 #include "CSSPropertyFilter.h"

10	11 #include "StringMap.h"

11 Filter::Filter(const std::u16string& text)

12 {

13 this->text = text;

14 }

15

16 const std::u16string Filter::Serialize()

17 {

18 return (

19 u"[Filter]\n"

20 u"text=" + text + u"\n"

21 );

22 }

23	12

24 namespace	13 namespace

25 {	14 {

26 std::unordered_map<std::u16string,Filter*> knownFilters;	15 StringMap<Filter*> knownFilters(8192);

27	16

28 void trim_spaces(std::u16string& str)	17 void NormalizeWhitespace(DependentString& text)

29 {	18 {

30 size_t pos;	19 String::size_type start = 0;

	20 String::size_type end = text.length();

31	21

32 // Remove leading whitespace	22 // Remove leading spaces and special characters like line breaks

33 pos = str.find_first_not_of(u' ');	23 for (; start < end; start++)

34 if (pos > 0)	24 if (text[start] > ' ')

35 str.erase(0, pos);	25 break;

36	26

37 // Remove trailing whitespace	27 // Now look for invalid characters inside the string

38 pos = str.find_last_not_of(u' ');	28 String::size_type pos;

39 if (pos < str.length() - 1)	29 for (pos = start; pos < end; pos++)

40 str.erase(pos + 1);	30 if (text[pos] < ' ')

41 }	31 break;

42	32

43 void remove_spaces(std::u16string& str)	33 if (pos < end)

44 {

45 for (size_t i = 0, l = str.length(); i < l; ++i)

46 {	34 {

47 if (str[i] == u' ')	35 // Found invalid characters, copy all the valid characters while skipping

	36 // the invalid ones.

	37 String::size_type delta = 1;

	38 for (pos = pos + 1; pos < end; pos++)

48 {	39 {

49 str.erase(i, 1);	40 if (text[pos] < ' ')

50 --i;	41 delta++;

51 --l;	42 else

	43 text[pos - delta] = text[pos];

52 }	44 }

	45 end -= delta;

53 }	46 }

	47

	48 // Remove trailing spaces

	49 for (; end > 0; end--)

	50 if (text[end - 1] != ' ')

	51 break;

	52

	53 // Set new string boundaries

	54 text.reset(text, start, end - start);

54 }	55 }

55 }	56 }

56	57

57 // API	58 Filter::Filter(Type type, const String& text)

58 extern "C"	59 : mType(type), mText(text)

59 {	60 {

60 Filter* EMSCRIPTEN_KEEPALIVE Filter_FromText(char16_t* textPtr, int textLen)	61 annotate_address(this, "Filter");
Felix Dahlke 2016/01/15 17:00:36 If this stays (see my comments on the C wrappers a If this stays (see my comments on the C wrappers above), I would much prefer to have a regular Filter::FromText member function and just call that one from here, likewise below.
	62 }

	63

	64 Filter::~Filter()

	65 {

	66 knownFilters.erase(mText);

	67 }

	68

	69 OwnedString Filter::Serialize() const

	70 {

	71 OwnedString result(u"[Filter]\ntext="_str);

	72 result.append(mText);

	73 result.append(u'\n');

	74 return result;

	75 }

	76

	77 Filter* Filter::FromText(DependentString& text)

	78 {

	79 NormalizeWhitespace(text);

	80 if (text.empty())

	81 return nullptr;

	82

	83 // Parsing also normalizes the filter text, so it has to be done before the

	84 // lookup in knownFilters.

	85 union

61 {	86 {

62 std::u16string text(textPtr, textLen);	87 RegExpFilterData regexp;

63 auto it = knownFilters.find(text);	88 ElemHideData elemhide;

64 if (it != knownFilters.end())	89 } data;

65 return it->second;	90 DependentString error;

66	91

67 Filter* filter = CommentFilter::Create(text);	92 Filter::Type type = CommentFilter::Parse(text);

68 if (!filter)	93 if (type == Filter::Type::UNKNOWN)

69 filter = ElemHideBase::Create(text);	94 type = ElemHideBase::Parse(text, data.elemhide);

70 if (!filter)	95 if (type == Filter::Type::UNKNOWN)

71 filter = RegExpFilter::Create(text);	96 type = RegExpFilter::Parse(text, error, data.regexp);

72 return knownFilters[text] = filter;	97

	98 auto knownFilter = knownFilters.find(text);

	99 if (knownFilter)

	100 {

	101 knownFilter->second->AddRef();

	102 return knownFilter->second;

73 }	103 }

74	104

75 char16_t* EMSCRIPTEN_KEEPALIVE Filter_Normalize(char16_t* textPtr, size_t text Len, size_t* resultLen)	105 FilterPtr filter;

	106 switch (type)

76 {	107 {

77 std::u16string text(textPtr, textLen);	108 case Filter::Type::COMMENT:

78	109 filter = new CommentFilter(text);

79 // Remove special characters like line breaks	110 break;

80 for (size_t i = 0, l = text.length(); i < l; ++i)	111 case Filter::Type::INVALID:

81 {	112 filter = new InvalidFilter(text, error);

82 if (text[i] < u' ')	113 break;

83 {	114 case Filter::Type::BLOCKING:

84 text.erase(i, 1);	115 filter = new BlockingFilter(text, data.regexp);

85 --i;	116 break;

86 --l;	117 case Filter::Type::WHITELIST:

87 }	118 filter = new WhitelistFilter(text, data.regexp);

88 }	119 break;

89	120 case Filter::Type::ELEMHIDE:

90 trim_spaces(text);	121 filter = new ElemHideFilter(text, data.elemhide);

91	122 break;

92 {	123 case Filter::Type::ELEMHIDEEXCEPTION:

93 size_t domainsEnd;	124 filter = new ElemHideException(text, data.elemhide);

94 size_t selectorStart;	125 break;

95 Filter::Type type = ElemHideBase::Parse(text, &domainsEnd, &selectorStart) ;	126 case Filter::Type::CSSPROPERTY:

96 if (type != Filter::Type::UNKNOWN)	127 filter = new CSSPropertyFilter(text, data.elemhide);

97 {	128 if (static_cast<CSSPropertyFilter*>(filter.get())->IsGeneric())

98 std::u16string domains = text.substr(0, domainsEnd);	129 filter = new InvalidFilter(text, u"filter_cssproperty_nodomain"_str);

99 std::u16string selector = text.substr(selectorStart);	130 break;

100 remove_spaces(domains);	131 default:

101 trim_spaces(selector);	132 // This should never happen but just in case

102 return stringToBuffer(domains + (type == Filter::Type::ELEMHIDEEXCEPTION ? u"#@#" : u"##") + selector, resultLen);	133 return nullptr;

103 }

104 }

105

106 if (CommentFilter::Parse(text) == Filter::Type::UNKNOWN)

107 remove_spaces(text);

108 return stringToBuffer(text, resultLen);

109 }	134 }

110	135

111 char16_t* EMSCRIPTEN_KEEPALIVE Filter_Serialize(Filter* filter, size_t* result Len)	136 // This is a hack: we looked up the entry using text but create it using

112 {	137 // filter->mText. This works because both are equal at this point. However,

113 return stringToBuffer(filter->Serialize(), resultLen);	138 // text refers to a temporary buffer which will go away.

114 }	139 enter_context("Adding to known filters");

	140 knownFilter.assign(filter->mText, filter.get());

	141 exit_context();

115	142

116 EXPOSE_READONLY_STRPROPERTY(Filter, text)	143 return filter.release();

117 EXPOSE_READONLY_PROPERTY(Filter, Filter::Type, type)

118 }	144 }

LEFT	RIGHT