OLD | NEW |
(Empty) | |
| 1 library(RPostgreSQL) |
| 2 library(magrittr) |
| 3 library(data.table) |
| 4 library(dplyr) |
| 5 library(AnomalyDetection) |
| 6 library(ggplot2) |
| 7 library(grid) |
| 8 download_devbuilds_list <- bbbi::download_devbuilds_list |
| 9 |
| 10 # determines if recent devbuilds are working correctly |
| 11 |
| 12 |
| 13 # TODO: which are current devbuilds of: adblockplusasus, adblockplussbrowser, co
ntentblockers |
| 14 # TODO "recent" time period from development cycle |
| 15 # TODO include even older builds (time it takes to adopt new versions) for analy
ses on combined data set? |
| 16 # TODO include application devbuild versions |
| 17 |
| 18 |
| 19 # settings |
| 20 # include all builds from within the following time period |
| 21 # (+ the next older one) |
| 22 n_days_recent <- 6 |
| 23 min_builddate <- Sys.Date() - n_days_recent |
| 24 min_builddate_posix <- as.POSIXlt(paste(min_builddate, "00:00:00"), tz = "UTC") |
| 25 # consider download data from within the following time period for comparison wi
th current data |
| 26 n_days_comparison <- 90 |
| 27 |
| 28 # for some checks, we focus on the most recent devbuilds |
| 29 data_recent <- download_devbuilds_list() %>% data.table |
| 30 recent_devbuilds <- data_recent[date >= min_builddate] |
| 31 tmp <- anti_join(data_recent, recent_devbuilds) |
| 32 tmp <- tmp[order(-datetime)][, .SD[1], by = "addonName"] |
| 33 recent_devbuilds_plus <- rbind(recent_devbuilds, tmp)[order(-datetime)] |
| 34 |
| 35 |
| 36 # devbuilds have been prefiltered into a seperate table for performance improvem
ents |
| 37 con <- src_postgres(db = "kpi", host = "localhost", port = 5432, user = "sporz") |
| 38 |
| 39 mindate <- min_builddate - n_days_comparison # inclusive |
| 40 maxdate <- Sys.Date() # exclusive |
| 41 |
| 42 query <- . %>% |
| 43 tbl("devbuilds") %>% |
| 44 filter(date >= local(mindate) & |
| 45 date < local(maxdate)) %>% |
| 46 group_by(date, addonName, addonVersion, resource, application, downloadCount
, dayssince) %>% |
| 47 summarize(N = sum(N)) |
| 48 |
| 49 data_devbuilds <- con %>% query %>% collect %>% data.table |
| 50 # TODO remove after development |
| 51 path <- "data_devbuilds.csv" |
| 52 write.csv(data_devbuilds, path, row.names = FALSE) |
| 53 data_devbuilds <- fread(path)[, date := as.Date(date)] |
| 54 |
| 55 # CHECKS |
| 56 # chrome, probably ffox; ie without antiadblockfilters |
| 57 # dc 0, exceptionrules ~ notifications ~ antiadblockfilters ~ easylist combined |
| 58 # dc 5, exceptionrules > notifications * 0.9, antiadblockfilters ~ notifications
, easylist * 3 [mind multiple!] ~ notifications |
| 59 |
| 60 detect_anomalies.twitter <- function(data_check, ...){ |
| 61 tmp <- data_check[, .(date, N)] %>% data.frame |
| 62 res <- AnomalyDetectionTs(tmp, max_anoms=0.2, alpha = 0.05, |
| 63 threshold = 'p95', direction='both', longterm = TR
UE, |
| 64 piecewise_median_period_weeks = 4, ...) |
| 65 return(res) |
| 66 } |
| 67 |
| 68 num_anomalies.twitter <- function(data_check, min_builddate_posix, ...){ |
| 69 res <- detect_anomalies.twitter(data_check, ...) |
| 70 if (nrow(res$anoms) > 0){ |
| 71 res$anoms <- subset(res$anoms, |
| 72 res$anoms$timestamp >= min_builddate_posix |
| 73 #res$anoms$anoms >= 50 |
| 74 ) |
| 75 } |
| 76 return(nrow(res$anoms)) |
| 77 } |
| 78 |
| 79 data_devbuilds[, date := as.POSIXct(paste(date, "00:00:00"), tz = "UTC")] |
| 80 |
| 81 |
| 82 results <- data.table() |
| 83 |
| 84 |
| 85 # want to check several time series for anomalies |
| 86 # build corresponding combinations of addonnames and resources |
| 87 check_combinations <- data.frame() |
| 88 |
| 89 check_addonnames <- c("adblockplus", |
| 90 "adblockpluschrome") |
| 91 check_resources <- c("/exceptionrules.txt", |
| 92 "/notification.json", |
| 93 "/antiadblockfilters.txt", |
| 94 "easylist default") |
| 95 tmp <- expand.grid(check_addonnames, check_resources) %>% |
| 96 set_names(c("addonName", "resource")) |
| 97 check_combinations <- rbind(check_combinations, tmp) |
| 98 |
| 99 addonName <- "adblockplusie" |
| 100 check_resources <- c("/exceptionrules.txt", |
| 101 "/notification.json", |
| 102 "easylist default") |
| 103 tmp <- data.frame(addonName, resource = check_resources) |
| 104 check_combinations <- rbind(check_combinations, tmp) |
| 105 |
| 106 # CHECK - twitter on downloadCount 5 |
| 107 data_check5 <- data_devbuilds[downloadCount == 5] |
| 108 data_check5 <- merge(data_check5, check_combinations, by = c("addonName", "resou
rce")) |
| 109 data_check5[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName
:= "adblockpluschrome"] |
| 110 data_check5 <- data_check5[!(application %in% c("adblockbrowser", "adblockbrowse
rios"))] |
| 111 data_check5 <- data_check5[, .(N = sum(N)), by = "date,addonName,resource"] |
| 112 tmp <- data_check5[, .(test = 'twitter5', num_anomalies = .SD %>% |
| 113 num_anomalies.twitter(min_builddate_posix)), |
| 114 by = "addonName,resource"] |
| 115 results <- rbind(results, tmp) |
| 116 tmp <- data_check5[, .(test = '-twitter5', num_anomalies = .SD %>% |
| 117 mutate(N = -N) %>% |
| 118 num_anomalies.twitter(min_builddate_posix)), |
| 119 by = "addonName,resource"] |
| 120 results <- rbind(results, tmp) |
| 121 |
| 122 # CHECK - twitter on downloadCount 0, dayssince -1 |
| 123 data_check0 <- data_devbuilds[downloadCount == 0 & dayssince == -1] |
| 124 data_check0 <- merge(data_check0, check_combinations, by = c("addonName", "resou
rce")) |
| 125 data_check0[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName
:= "adblockpluschrome"] |
| 126 data_check0 <- data_check0[!(application %in% c("adblockbrowser", "adblockbrowse
rios"))] |
| 127 data_check0 <- data_check0[, .(N = sum(N)), by = "date,addonName,resource"] |
| 128 tmp <- data_check0[, .(test = 'twitter0', num_anomalies = .SD %>% |
| 129 num_anomalies.twitter(min_builddate_posix)), |
| 130 by = "addonName,resource"] |
| 131 results <- rbind(results, tmp) |
| 132 tmp <- data_check0[, .(test = '-twitter0', num_anomalies = .SD %>% |
| 133 mutate(N = -N) %>% |
| 134 num_anomalies.twitter(min_builddate_posix)), |
| 135 by = "addonName,resource"] |
| 136 results <- rbind(results, tmp) |
| 137 |
| 138 results[, isok := num_anomalies == 0] |
| 139 plot_colors <- c("light green", "light coral") |
| 140 plot_values <- c(TRUE, FALSE) |
| 141 results[, isok := factor(isok, plot_values)] |
| 142 |
| 143 |
| 144 # show results as colored overview matrix |
| 145 myplot <- ggplot(results) + |
| 146 aes(fill = isok) + |
| 147 facet_wrap(~test+addonName+resource) + |
| 148 geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) + |
| 149 geom_text(aes(x=0.5, y=0.5, |
| 150 label=paste(sep = "\n", test, addonName, resource)), |
| 151 color="black", inherit.aes=FALSE, parse=FALSE) + |
| 152 scale_fill_manual(values = plot_colors) + |
| 153 title("twitter anomaly detection") + |
| 154 theme(axis.line=element_blank(), |
| 155 axis.text.x=element_blank(), |
| 156 axis.text.y=element_blank(), |
| 157 axis.ticks=element_blank(), |
| 158 axis.title.x=element_blank(), |
| 159 axis.title.y=element_blank(), |
| 160 legend.position="none", |
| 161 panel.background=element_blank(), |
| 162 panel.border=element_blank(), |
| 163 panel.grid.major=element_blank(), |
| 164 panel.grid.minor=element_blank(), |
| 165 plot.background=element_blank()) |
| 166 gt = ggplotGrob(myplot) |
| 167 panels <- grep("panel", gt$layout$name) |
| 168 top <- unique(gt$layout$t[panels]) |
| 169 gt = gt[-(top-1), ] |
| 170 grid.newpage() |
| 171 grid.draw(gt) |
| 172 |
| 173 |
| 174 # create option to show details |
| 175 res_bad <- results[isok == FALSE] |
| 176 res_bad |
| 177 |
| 178 plotlist <- list() |
| 179 for (idx in 1:nrow(res_bad)){ |
| 180 print(res_bad[idx]$test) |
| 181 data_source <- switch(res_bad[idx]$test, |
| 182 twitter5 = data_check5, |
| 183 twitter0 = data_check0, |
| 184 `-twitter5` = data_check5 %>% mutate(N = -N), |
| 185 `-twitter0` = data_check0 %>% mutate(N = -N)) |
| 186 data_cur <- data_source[addonName == res_bad[idx]$addonName & |
| 187 resource == res_bad[idx]$resource] |
| 188 res <- detect_anomalies.twitter(data_cur, plot = TRUE) |
| 189 plotlist[[length(plotlist) + 1]] <- res$plot + |
| 190 xlab(paste(res_bad[idx]$addonName, |
| 191 res_bad[idx]$resource, |
| 192 res_bad[idx]$test)) |
| 193 } |
| 194 |
| 195 plotlist[1] |
| 196 plotlist[2] |
| 197 plotlist[3] |
| 198 plotlist[4] |
| 199 plotlist[5] |
| 200 plotlist[6] |
| 201 plotlist[7] |
| 202 |
| 203 |
| 204 # want to check for no downloads for recent devbuilds with common applications |
| 205 # build corresponding combinations of addonnames, resources, applications |
| 206 # TODO add applicationversion to database and combinations |
| 207 check_combinations <- data.frame() |
| 208 |
| 209 check_addonnames <- c("adblockplus") |
| 210 check_resources <- c("/exceptionrules.txt", |
| 211 "/notification.json", |
| 212 "/antiadblockfilters.txt", |
| 213 "easylist default") |
| 214 check_applications <- c("firefox", |
| 215 "fennec2", |
| 216 "thunderbird", |
| 217 "seamonkey") |
| 218 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| 219 set_names(c("addonName", "resource", "application")) |
| 220 check_combinations <- rbind(check_combinations, tmp) |
| 221 |
| 222 check_addonnames <- c("adblockpluschrome") |
| 223 check_resources <- c("/exceptionrules.txt", |
| 224 "/notification.json", |
| 225 "/antiadblockfilters.txt", |
| 226 "easylist default") |
| 227 check_applications <- c("chrome", |
| 228 "iron", |
| 229 "chromium") |
| 230 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| 231 set_names(c("addonName", "resource", "application")) |
| 232 check_combinations <- rbind(check_combinations, tmp) |
| 233 |
| 234 check_addonnames <- "adblockplusie" |
| 235 check_resources <- c("/exceptionrules.txt", |
| 236 "/notification.json", |
| 237 "easylist default") |
| 238 check_applications <- c("msie32", |
| 239 "msie64") |
| 240 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| 241 set_names(c("addonName", "resource", "application")) |
| 242 check_combinations <- rbind(check_combinations, tmp) |
| 243 |
| 244 check_addonnames <- "adblockplusopera" |
| 245 check_resources <- c("/exceptionrules.txt", |
| 246 "/notification.json", |
| 247 "/antiadblockfilters.txt", |
| 248 "easylist default") |
| 249 check_applications <- c("opera") |
| 250 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| 251 set_names(c("addonName", "resource", "application")) |
| 252 check_combinations <- rbind(check_combinations, tmp) |
| 253 |
| 254 check_addonnames <- "adblockplussafari" |
| 255 check_resources <- c("/exceptionrules.txt", |
| 256 "/notification.json", |
| 257 "/antiadblockfilters.txt", |
| 258 "easylist default") |
| 259 check_applications <- c("safari") |
| 260 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| 261 set_names(c("addonName", "resource", "application")) |
| 262 check_combinations <- rbind(check_combinations, tmp) |
| 263 |
| 264 check_addonnames <- "adblockplussbrowser" |
| 265 check_resources <- c("/exceptionrules.txt", |
| 266 "/notification.json", |
| 267 "/antiadblockfilters.txt", |
| 268 "easylist default") |
| 269 check_applications <- c("sbrowser") |
| 270 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| 271 set_names(c("addonName", "resource", "application")) |
| 272 check_combinations <- rbind(check_combinations, tmp) |
| 273 |
| 274 # allow one full day to pick up a new version |
| 275 recent_join <- recent_devbuilds[date < Sys.Date() - 1, .(addonVersion, addonName
)] |
| 276 check_combinations <- merge(check_combinations, recent_join) |
| 277 |
| 278 # TODO CHECK - no downloads for recent devbuilds |
| 279 data_tmp <- data_devbuilds[!(application %in% c("adblockbrowser", "adblockbrowse
rios"))] |
| 280 results_dwnl <- merge(data_tmp, check_combinations, all.y = TRUE, |
| 281 by = c("addonName", "resource", "application", "addonVe
rsion")) |
| 282 results_dwnl[is.na(N), N := 0] |
| 283 results_dwnl <- results_dwnl[, .(N = sum(N)), by = "addonVersion,addonName,appli
cation,resource"][order(addonVersion,addonName,application,resource)] |
| 284 results_dwnl[, isok := N > 0] |
| 285 results_dwnl[, isok := factor(isok, plot_values)] |
| 286 |
| 287 results_dwnl |
| 288 |
| 289 |
| 290 # show results as colored overview matrix |
| 291 myplot <- ggplot(results_dwnl) + |
| 292 aes(fill = isok) + |
| 293 facet_wrap(~addonVersion+addonName+application+resource) + |
| 294 geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) + |
| 295 geom_text(aes(x=0.5, y=0.5, |
| 296 label=paste(sep = "\n", addonVersion, addonName, application,
resource)), |
| 297 color="black", inherit.aes=FALSE, parse=FALSE) + |
| 298 scale_fill_manual(values = plot_colors) + |
| 299 title("any downloads") + |
| 300 theme(axis.line=element_blank(), |
| 301 axis.text.x=element_blank(), |
| 302 axis.text.y=element_blank(), |
| 303 axis.ticks=element_blank(), |
| 304 axis.title.x=element_blank(), |
| 305 axis.title.y=element_blank(), |
| 306 legend.position="none", |
| 307 panel.background=element_blank(), |
| 308 panel.border=element_blank(), |
| 309 panel.grid.major=element_blank(), |
| 310 panel.grid.minor=element_blank(), |
| 311 plot.background=element_blank()) |
| 312 gt = ggplotGrob(myplot) |
| 313 panels <- grep("panel", gt$layout$name) |
| 314 top <- unique(gt$layout$t[panels]) |
| 315 gt = gt[-(top-1), ] |
| 316 grid.newpage() |
| 317 grid.draw(gt) |
OLD | NEW |