OLD | NEW |
1 #include "PluginStdAfx.h" | 1 #include "PluginStdAfx.h" |
2 | 2 |
3 #include "PluginFilter.h" | 3 #include "PluginFilter.h" |
4 | 4 |
5 #if (defined PRODUCT_ADBLOCKPLUS) | 5 #if (defined PRODUCT_ADBLOCKPLUS) |
6 #include "PluginSettings.h" | 6 #include "PluginSettings.h" |
7 #include "PluginClient.h" | 7 #include "PluginClient.h" |
8 #include "PluginClientFactory.h" | 8 #include "PluginClientFactory.h" |
9 #endif | 9 #endif |
10 | 10 |
(...skipping 806 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
817 s_criticalSectionFilterMap.Lock(); | 817 s_criticalSectionFilterMap.Lock(); |
818 { | 818 { |
819 for (std::vector<std::string>::iterator it = filters.begin(); it < filters.e
nd(); ++it) | 819 for (std::vector<std::string>::iterator it = filters.begin(); it < filters.e
nd(); ++it) |
820 { | 820 { |
821 CString filter((*it).c_str()); | 821 CString filter((*it).c_str()); |
822 // If the line is not commented out | 822 // If the line is not commented out |
823 if (!filter.Trim().IsEmpty() && filter.GetAt(0) != '!' && filter.GetAt(0)
!= '[') | 823 if (!filter.Trim().IsEmpty() && filter.GetAt(0) != '!' && filter.GetAt(0)
!= '[') |
824 { | 824 { |
825 int filterType = 0; | 825 int filterType = 0; |
826 | 826 |
827 // We need to categorize the filters | |
828 // We have three options, whitelist, block or element hiding | |
829 // See http://adblockplus.org/en/filters for further documentation | 827 // See http://adblockplus.org/en/filters for further documentation |
830 | 828 |
831 // @@ indicates white listing rule | |
832 if (filter.Find(L"@@") == 0) | |
833 { | |
834 filterType = CFilter::filterTypeWhiteList; | |
835 | |
836 filter.Delete(0, 2); | |
837 } | |
838 // If a filter contains ## then it is a element hiding rule | 829 // If a filter contains ## then it is a element hiding rule |
839 else if (filter.Find(L"#") >= 0) | 830 if (filter.Find(L"#") >= 0) |
840 { | 831 { |
841 filterType = CFilter::filterTypeElementHide; | 832 filterType = CFilter::filterTypeElementHide; |
842 } | 833 } |
843 //Anything we do not support here | |
844 else if (filter.Find(L"*") == 0) | |
845 { | |
846 filterType = CFilter::filterTypeUnknown; | |
847 } | |
848 // Else, it is a general rule | |
849 else | |
850 { | |
851 filterType = CFilter::filterTypeBlocking; | |
852 } | |
853 | 834 |
854 try | 835 try |
855 { | 836 { |
856 if (filterType == CFilter::filterTypeElementHide) | 837 if (filterType == CFilter::filterTypeElementHide) |
857 { | 838 { |
858 AddFilterElementHide(filter); | 839 AddFilterElementHide(filter); |
859 } | 840 } |
860 } | 841 } |
861 catch(...) | 842 catch(...) |
862 { | 843 { |
863 //just ignore all errors we might get when adding filters | 844 #ifdef ENABLE_DEBUG_RESULT |
| 845 CPluginDebug::DebugResult(L"Error loading hide filter: " + filter); |
| 846 #endif |
864 } | 847 } |
865 } | 848 } |
866 } | 849 } |
867 } | 850 } |
868 s_criticalSectionFilterMap.Unlock(); | 851 s_criticalSectionFilterMap.Unlock(); |
869 | 852 |
870 return isRead; | 853 return isRead; |
871 } | 854 } |
872 | 855 |
873 void CPluginFilter::ClearFilters() | 856 void CPluginFilter::ClearFilters() |
(...skipping 12 matching lines...) Expand all Loading... |
886 | 869 |
887 m_elementHideTags.clear(); | 870 m_elementHideTags.clear(); |
888 m_elementHideTagsId.clear(); | 871 m_elementHideTagsId.clear(); |
889 m_elementHideTagsClass.clear(); | 872 m_elementHideTagsClass.clear(); |
890 m_elementHideDomains.clear(); | 873 m_elementHideDomains.clear(); |
891 } | 874 } |
892 s_criticalSectionFilterMap.Unlock(); | 875 s_criticalSectionFilterMap.Unlock(); |
893 } | 876 } |
894 | 877 |
895 | 878 |
896 bool CPluginFilter::IsMatchFilter(const CFilter& filter, CString src, const CStr
ing& srcDomain, const CString& domain) const | |
897 { | |
898 // Initial checks | |
899 | |
900 // $match_case | |
901 if (!filter.m_isMatchCase) | |
902 { | |
903 src.MakeLower(); | |
904 } | |
905 | |
906 // $domain | |
907 if (!filter.m_domains.empty()) | |
908 { | |
909 bool bFound = false; | |
910 | |
911 for (std::set<CString>::const_iterator it = filter.m_domains.begin(); !bFoun
d && it != filter.m_domains.end(); ++it) | |
912 { | |
913 bFound = domain == *(it) || IsSubdomain(domain, *it); | |
914 } | |
915 | |
916 if (!bFound) | |
917 { | |
918 return false; | |
919 } | |
920 } | |
921 | |
922 // $domain ~ | |
923 if (!filter.m_domainsNot.empty()) | |
924 { | |
925 for (std::set<CString>::const_iterator it = filter.m_domainsNot.begin(); it
!= filter.m_domainsNot.end(); ++it) | |
926 { | |
927 if (domain == *(it) || IsSubdomain(domain, *it)) | |
928 { | |
929 return false; | |
930 } | |
931 } | |
932 } | |
933 | |
934 // $third_party | |
935 if (filter.m_isThirdParty) | |
936 { | |
937 if (srcDomain == domain || IsSubdomain(srcDomain, domain)) | |
938 { | |
939 return false; | |
940 } | |
941 } | |
942 | |
943 // $third_party ~ | |
944 if (filter.m_isFirstParty) | |
945 { | |
946 if (srcDomain != domain && !IsSubdomain(srcDomain, domain)) | |
947 { | |
948 return false; | |
949 } | |
950 } | |
951 | |
952 // "regex" checks | |
953 | |
954 int startPos = 0; | |
955 int srcLength = src.GetLength(); | |
956 UINT indexEnd = filter.m_stringElements.size() - 1; | |
957 | |
958 for (UINT index = 0; index <= indexEnd; index++) | |
959 { | |
960 if (index == 0 && filter.m_isFromStartDomain) | |
961 { | |
962 CString loweredDomain = srcDomain; | |
963 int domainPos = src.Find(loweredDomain.MakeLower()); | |
964 int lastPos = src.Find('/', domainPos); | |
965 | |
966 bool bFoundDomain = false; | |
967 bool bContinueDomainSearch = true; | |
968 | |
969 while (bContinueDomainSearch) | |
970 { | |
971 if (domainPos == FindMatch(src, filter.m_stringElements[index])) | |
972 { | |
973 bContinueDomainSearch = false; | |
974 bFoundDomain = true; | |
975 } | |
976 else | |
977 { | |
978 domainPos = src.Find('.', domainPos + 1) + 1; | |
979 if (domainPos == 0 || (domainPos >= lastPos && lastPos >= 0)) | |
980 { | |
981 bContinueDomainSearch = false; | |
982 } | |
983 } | |
984 } | |
985 | |
986 if (!bFoundDomain) | |
987 { | |
988 return false; | |
989 } | |
990 } | |
991 | |
992 startPos = FindMatch(src, filter.m_stringElements[index], startPos); | |
993 if (startPos < 0) | |
994 { | |
995 return false; | |
996 } | |
997 | |
998 int length = filter.m_stringElements[index].GetLength(); | |
999 | |
1000 // Check from start | |
1001 if (index == 0 && filter.m_isFromStart && startPos > 0) | |
1002 { | |
1003 return false; | |
1004 } | |
1005 | |
1006 // Check from end | |
1007 if (index == indexEnd && filter.m_isFromEnd && startPos + length != srcLengt
h) | |
1008 { | |
1009 return false; | |
1010 } | |
1011 | |
1012 startPos += length; | |
1013 } | |
1014 | |
1015 return true; | |
1016 } | |
1017 | |
1018 | |
1019 const CFilter* CPluginFilter::MatchFilter(int filterType, const CString& src, in
t contentType, const CString& domain) const | |
1020 { | |
1021 const CFilter* filter = NULL; | |
1022 | |
1023 int startCharacter = 0; | |
1024 int keyLength = 4; | |
1025 | |
1026 CString srcLower = src; | |
1027 srcLower.MakeLower(); | |
1028 int srcLowerLength = srcLower.GetLength(); | |
1029 | |
1030 // Extract src domain | |
1031 DWORD length = 2048; | |
1032 CString srcDomain; | |
1033 | |
1034 if (SUCCEEDED(::UrlGetPart(src, srcDomain.GetBufferSetLength(2048), &length, U
RL_PART_HOSTNAME, 0))) | |
1035 { | |
1036 srcDomain.ReleaseBuffer(); | |
1037 | |
1038 if (srcDomain.Left(4) == L"www.") | |
1039 { | |
1040 srcDomain = srcDomain.Right(srcDomain.GetLength() - 4); | |
1041 } | |
1042 else if (srcDomain.Left(5) == L"www2." || srcDomain.Left(5) == L"www3.") | |
1043 { | |
1044 srcDomain = srcDomain.Right(srcDomain.GetLength() - 5); | |
1045 } | |
1046 } | |
1047 else | |
1048 { | |
1049 srcDomain.ReleaseBuffer(); | |
1050 srcDomain.Empty(); | |
1051 } | |
1052 | |
1053 // Search in filter map | |
1054 s_criticalSectionFilterMap.Lock(); | |
1055 { | |
1056 const TFilterMap* filterMap = m_filterMap[filterType]; | |
1057 | |
1058 if (srcLowerLength >= 7) | |
1059 { | |
1060 if (srcLower.Find(L"http://") == 0) | |
1061 { | |
1062 startCharacter = 7; | |
1063 } | |
1064 else if (srcLower.Find(L"https://") == 0) | |
1065 { | |
1066 startCharacter = 8; | |
1067 } | |
1068 } | |
1069 | |
1070 DWORD dwKey = 0; | |
1071 | |
1072 while (filter == NULL && srcLowerLength >= startCharacter + keyLength) | |
1073 { | |
1074 if (dwKey == 0) | |
1075 { | |
1076 dwKey = (srcLower.GetAt(startCharacter) << 24) | (srcLower.GetAt(startCh
aracter+1) << 16) | (srcLower.GetAt(startCharacter+2) << 8) | srcLower.GetAt(sta
rtCharacter+3); | |
1077 } | |
1078 else | |
1079 { | |
1080 dwKey <<= 8; | |
1081 dwKey |= srcLower.GetAt(startCharacter+3); | |
1082 } | |
1083 | |
1084 TFilterMap::const_iterator foundEntry = filterMap[0].find(dwKey); | |
1085 if (foundEntry != filterMap[0].end()) | |
1086 { | |
1087 if (((foundEntry->second.m_contentType & contentType) || foundEntry->sec
ond.m_contentType == CFilter::contentTypeAny) && IsMatchFilter(foundEntry->secon
d, src, srcDomain, domain)) | |
1088 { | |
1089 filter = &(foundEntry->second); | |
1090 break; | |
1091 } | |
1092 } | |
1093 | |
1094 // No match - increment the start character | |
1095 startCharacter++; | |
1096 } | |
1097 | |
1098 // Second list | |
1099 if (filter == NULL) | |
1100 { | |
1101 dwKey = 0; | |
1102 startCharacter = 0; | |
1103 | |
1104 if (srcLowerLength >= 7) | |
1105 { | |
1106 if (srcLower.Find(L"http://") == 0) | |
1107 { | |
1108 startCharacter = 7; | |
1109 } | |
1110 else if (srcLower.Find(L"https://") == 0) | |
1111 { | |
1112 startCharacter = 8; | |
1113 } | |
1114 } | |
1115 | |
1116 while (filter == NULL && srcLowerLength >= startCharacter + keyLength) | |
1117 { | |
1118 if (dwKey == 0) | |
1119 { | |
1120 dwKey = (srcLower.GetAt(startCharacter) << 24) | (srcLower.GetAt(start
Character+1) << 16) | (srcLower.GetAt(startCharacter+2) << 8) | srcLower.GetAt(s
tartCharacter+3); | |
1121 } | |
1122 else | |
1123 { | |
1124 dwKey <<= 8; | |
1125 dwKey |= srcLower.GetAt(startCharacter+3); | |
1126 } | |
1127 | |
1128 TFilterMap::const_iterator foundEntry = filterMap[1].find(dwKey); | |
1129 if (foundEntry != filterMap[1].end()) | |
1130 { | |
1131 if (((foundEntry->second.m_contentType & contentType) || foundEntry->s
econd.m_contentType == CFilter::contentTypeAny) && IsMatchFilter(foundEntry->sec
ond, src, srcDomain, domain)) | |
1132 { | |
1133 filter = &(foundEntry->second); | |
1134 break; | |
1135 } | |
1136 } | |
1137 | |
1138 // No match - increment the start character | |
1139 startCharacter++; | |
1140 } | |
1141 } | |
1142 | |
1143 // Search in default filter map (try all filters) | |
1144 if (filter == NULL) | |
1145 { | |
1146 for (TFilterMapDefault::const_iterator it = m_filterMapDefault[filterType]
.begin(); it != m_filterMapDefault[filterType].end(); ++it) | |
1147 { | |
1148 if (((it->m_contentType & contentType) || it->m_contentType == CFilter::
contentTypeAny) && IsMatchFilter(*it, src, srcDomain, domain)) | |
1149 { | |
1150 filter = &(*it); | |
1151 break; | |
1152 } | |
1153 } | |
1154 } | |
1155 | |
1156 } | |
1157 s_criticalSectionFilterMap.Unlock(); | |
1158 | |
1159 return filter; | |
1160 } | |
1161 | |
1162 | |
1163 bool CPluginFilter::ShouldWhiteList(CString src) const | 879 bool CPluginFilter::ShouldWhiteList(CString src) const |
1164 { | 880 { |
1165 // We should not block the empty string, so all filtering does not make sense | 881 // We should not block the empty string, so all filtering does not make sense |
1166 // Therefore we just return | 882 // Therefore we just return |
1167 if (src.Trim().IsEmpty()) | 883 if (src.Trim().IsEmpty()) |
1168 { | 884 { |
1169 return false; | 885 return false; |
1170 } | 886 } |
1171 | 887 |
1172 const CFilter* filter = MatchFilter(CFilter::filterTypeWhiteList, src, CFilter
::contentTypeDocument, ""); | 888 //TODO: Implement whitelisting check from libadblockplus here |
1173 | 889 return false; |
1174 return filter ? true : false; | |
1175 } | 890 } |
1176 | 891 |
1177 | 892 |
1178 bool CPluginFilter::ShouldBlock(CString src, int contentType, const CString& dom
ain, bool addDebug) const | 893 bool CPluginFilter::ShouldBlock(CString src, int contentType, const CString& dom
ain, bool addDebug) const |
1179 { | 894 { |
1180 // We should not block the empty string, so all filtering does not make sense | 895 // We should not block the empty string, so all filtering does not make sense |
1181 // Therefore we just return | 896 // Therefore we just return |
1182 if (src.Trim().IsEmpty()) | 897 if (src.Trim().IsEmpty()) |
1183 { | 898 { |
1184 return false; | 899 return false; |
1185 } | 900 } |
1186 | 901 |
1187 CPluginSettings* settings = CPluginSettings::GetInstance(); | 902 CPluginSettings* settings = CPluginSettings::GetInstance(); |
1188 | 903 |
1189 CString type; | 904 CString type; |
1190 if (addDebug) | 905 if (addDebug) |
1191 { | 906 { |
1192 type = "???"; | 907 type = "???"; |
1193 | 908 |
1194 std::map<int,CString>::const_iterator it = m_contentMapText.find(contentType
); | 909 std::map<int,CString>::const_iterator it = m_contentMapText.find(contentType
); |
1195 if (it != m_contentMapText.end()) | 910 if (it != m_contentMapText.end()) |
1196 { | 911 { |
1197 type = it->second; | 912 type = it->second; |
1198 } | 913 } |
1199 } | 914 } |
1200 | 915 |
1201 CPluginClient* client = CPluginClient::GetInstance(); | 916 CPluginClient* client = CPluginClient::GetInstance(); |
1202 AdblockPlus::FilterEngine* filterEngine = client->GetFilterEngine(); | 917 AdblockPlus::FilterEngine* filterEngine = client->GetFilterEngine(); |
1203 | 918 |
1204 // src.OemToCharA(); | 919 //TODO: Make sure if the content type names are in sync with libadblockplus |
| 920 std::string contentTypeString = CT2A(type, CP_UTF8); |
1205 | 921 |
1206 std::string contentTypeString = ""; | 922 CT2CA srcAnsi(src, CP_UTF8); |
| 923 std::string url(srcAnsi); |
1207 | 924 |
1208 CT2CA srcAnsi(src); | 925 //TODO: figure out domain passing for whitelisting |
1209 std::string url(srcAnsi); | |
1210 if (filterEngine->Matches(url, contentTypeString)) | 926 if (filterEngine->Matches(url, contentTypeString)) |
1211 { | 927 { |
1212 if (addDebug) | 928 if (addDebug) |
1213 { | 929 { |
1214 DEBUG_FILTER("Filter::ShouldBlock " + type + " YES") | 930 DEBUG_FILTER("Filter::ShouldBlock " + type + " YES") |
1215 | 931 |
1216 #ifdef ENABLE_DEBUG_RESULT | 932 #ifdef ENABLE_DEBUG_RESULT |
1217 CPluginDebug::DebugResultBlocking(type, src); | 933 CPluginDebug::DebugResultBlocking(type, src); |
1218 #endif | 934 #endif |
1219 } | 935 } |
1220 return true; | 936 return true; |
1221 } | 937 } |
1222 return false; | 938 return false; |
1223 | |
1224 //The following is for reference only | |
1225 | |
1226 const CFilter* blockFilter = MatchFilter(CFilter::filterTypeBlocking, src, con
tentType, domain); | |
1227 if (blockFilter) | |
1228 { | |
1229 const CFilter* whiteFilter = MatchFilter(CFilter::filterTypeWhiteList, src,
contentType, domain); | |
1230 if (whiteFilter) | |
1231 { | |
1232 if (addDebug) | |
1233 { | |
1234 DEBUG_FILTER("Filter::ShouldBlock " + type + " NO src:" + src + " - whi
telist:\"" + whiteFilter->m_filterText + "\""); | |
1235 } | |
1236 blockFilter = NULL; | |
1237 } | |
1238 else if (addDebug) | |
1239 { | |
1240 DEBUG_FILTER("Filter::ShouldBlock " + type + " YES src:" + src + " - \"" +
blockFilter->m_filterText + "\"") | |
1241 | |
1242 #ifdef ENABLE_DEBUG_RESULT | |
1243 CPluginDebug::DebugResultBlocking(type, src); | |
1244 #endif | |
1245 } | |
1246 } | |
1247 else if (addDebug) | |
1248 { | |
1249 DEBUG_FILTER("Filter::ShouldBlock " + type + " NO src:" + src) | |
1250 } | |
1251 | |
1252 return blockFilter ? true : false; | |
1253 } | 939 } |
1254 | 940 |
1255 int CPluginFilter::FindMatch(const CString& src, CString filterPart, int srcStar
tPos) const | 941 int CPluginFilter::FindMatch(const CString& src, CString filterPart, int srcStar
tPos) const |
1256 { | 942 { |
1257 int filterCurrentPos = filterPart.Find('^'); | 943 int filterCurrentPos = filterPart.Find('^'); |
1258 if (filterCurrentPos >= 0) | 944 if (filterCurrentPos >= 0) |
1259 { | 945 { |
1260 int srcLength = src.GetLength(); | 946 int srcLength = src.GetLength(); |
1261 int srcFilterPos = -1; | 947 int srcFilterPos = -1; |
1262 int srcCurrentPos = srcStartPos; | 948 int srcCurrentPos = srcStartPos; |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1396 if (pos > 0 && domain.GetLength() + pos == subdomain.GetLength()) | 1082 if (pos > 0 && domain.GetLength() + pos == subdomain.GetLength()) |
1397 { | 1083 { |
1398 if (subdomain.GetAt(pos - 1) == '.') | 1084 if (subdomain.GetAt(pos - 1) == '.') |
1399 { | 1085 { |
1400 return true; | 1086 return true; |
1401 } | 1087 } |
1402 } | 1088 } |
1403 | 1089 |
1404 return false; | 1090 return false; |
1405 } | 1091 } |
OLD | NEW |