Index: automated/check_devbuilds/check_devbuilds_auto.R |
diff --git a/automated/check_devbuilds/check_devbuilds_auto.R b/automated/check_devbuilds/check_devbuilds_auto.R |
new file mode 100644 |
index 0000000000000000000000000000000000000000..5550b69918f076d36138cf2e7676c141437b5334 |
--- /dev/null |
+++ b/automated/check_devbuilds/check_devbuilds_auto.R |
@@ -0,0 +1,317 @@ |
+library(RPostgreSQL) |
+library(magrittr) |
+library(data.table) |
+library(dplyr) |
+library(AnomalyDetection) |
+library(ggplot2) |
+library(grid) |
+download_devbuilds_list <- bbbi::download_devbuilds_list |
+ |
+# determines if recent devbuilds are working correctly |
+ |
+ |
+# TODO: which are current devbuilds of: adblockplusasus, adblockplussbrowser, contentblockers |
+# TODO "recent" time period from development cycle |
+# TODO include even older builds (time it takes to adopt new versions) for analyses on combined data set? |
+# TODO include application devbuild versions |
+ |
+ |
+# settings |
+# include all builds from within the following time period |
+# (+ the next older one) |
+n_days_recent <- 6 |
+min_builddate <- Sys.Date() - n_days_recent |
+min_builddate_posix <- as.POSIXlt(paste(min_builddate, "00:00:00"), tz = "UTC") |
+# consider download data from within the following time period for comparison with current data |
+n_days_comparison <- 90 |
+ |
+# for some checks, we focus on the most recent devbuilds |
+data_recent <- download_devbuilds_list() %>% data.table |
+recent_devbuilds <- data_recent[date >= min_builddate] |
+tmp <- anti_join(data_recent, recent_devbuilds) |
+tmp <- tmp[order(-datetime)][, .SD[1], by = "addonName"] |
+recent_devbuilds_plus <- rbind(recent_devbuilds, tmp)[order(-datetime)] |
+ |
+ |
+# devbuilds have been prefiltered into a seperate table for performance improvements |
+con <- src_postgres(db = "kpi", host = "localhost", port = 5432, user = "sporz") |
+ |
+mindate <- min_builddate - n_days_comparison # inclusive |
+maxdate <- Sys.Date() # exclusive |
+ |
+query <- . %>% |
+ tbl("devbuilds") %>% |
+ filter(date >= local(mindate) & |
+ date < local(maxdate)) %>% |
+ group_by(date, addonName, addonVersion, resource, application, downloadCount, dayssince) %>% |
+ summarize(N = sum(N)) |
+ |
+data_devbuilds <- con %>% query %>% collect %>% data.table |
+# TODO remove after development |
+path <- "data_devbuilds.csv" |
+write.csv(data_devbuilds, path, row.names = FALSE) |
+data_devbuilds <- fread(path)[, date := as.Date(date)] |
+ |
+# CHECKS |
+# chrome, probably ffox; ie without antiadblockfilters |
+# dc 0, exceptionrules ~ notifications ~ antiadblockfilters ~ easylist combined |
+# dc 5, exceptionrules > notifications * 0.9, antiadblockfilters ~ notifications, easylist * 3 [mind multiple!] ~ notifications |
+ |
+detect_anomalies.twitter <- function(data_check, ...){ |
+ tmp <- data_check[, .(date, N)] %>% data.frame |
+ res <- AnomalyDetectionTs(tmp, max_anoms=0.2, alpha = 0.05, |
+ threshold = 'p95', direction='both', longterm = TRUE, |
+ piecewise_median_period_weeks = 4, ...) |
+ return(res) |
+} |
+ |
+num_anomalies.twitter <- function(data_check, min_builddate_posix, ...){ |
+ res <- detect_anomalies.twitter(data_check, ...) |
+ if (nrow(res$anoms) > 0){ |
+ res$anoms <- subset(res$anoms, |
+ res$anoms$timestamp >= min_builddate_posix |
+ #res$anoms$anoms >= 50 |
+ ) |
+ } |
+ return(nrow(res$anoms)) |
+} |
+ |
+data_devbuilds[, date := as.POSIXct(paste(date, "00:00:00"), tz = "UTC")] |
+ |
+ |
+results <- data.table() |
+ |
+ |
+# want to check several time series for anomalies |
+# build corresponding combinations of addonnames and resources |
+check_combinations <- data.frame() |
+ |
+check_addonnames <- c("adblockplus", |
+ "adblockpluschrome") |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "/antiadblockfilters.txt", |
+ "easylist default") |
+tmp <- expand.grid(check_addonnames, check_resources) %>% |
+ set_names(c("addonName", "resource")) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+addonName <- "adblockplusie" |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "easylist default") |
+tmp <- data.frame(addonName, resource = check_resources) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+# CHECK - twitter on downloadCount 5 |
+data_check5 <- data_devbuilds[downloadCount == 5] |
+data_check5 <- merge(data_check5, check_combinations, by = c("addonName", "resource")) |
+data_check5[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"] |
+data_check5 <- data_check5[!(application %in% c("adblockbrowser", "adblockbrowserios"))] |
+data_check5 <- data_check5[, .(N = sum(N)), by = "date,addonName,resource"] |
+tmp <- data_check5[, .(test = 'twitter5', num_anomalies = .SD %>% |
+ num_anomalies.twitter(min_builddate_posix)), |
+ by = "addonName,resource"] |
+results <- rbind(results, tmp) |
+tmp <- data_check5[, .(test = '-twitter5', num_anomalies = .SD %>% |
+ mutate(N = -N) %>% |
+ num_anomalies.twitter(min_builddate_posix)), |
+ by = "addonName,resource"] |
+results <- rbind(results, tmp) |
+ |
+# CHECK - twitter on downloadCount 0, dayssince -1 |
+data_check0 <- data_devbuilds[downloadCount == 0 & dayssince == -1] |
+data_check0 <- merge(data_check0, check_combinations, by = c("addonName", "resource")) |
+data_check0[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"] |
+data_check0 <- data_check0[!(application %in% c("adblockbrowser", "adblockbrowserios"))] |
+data_check0 <- data_check0[, .(N = sum(N)), by = "date,addonName,resource"] |
+tmp <- data_check0[, .(test = 'twitter0', num_anomalies = .SD %>% |
+ num_anomalies.twitter(min_builddate_posix)), |
+ by = "addonName,resource"] |
+results <- rbind(results, tmp) |
+tmp <- data_check0[, .(test = '-twitter0', num_anomalies = .SD %>% |
+ mutate(N = -N) %>% |
+ num_anomalies.twitter(min_builddate_posix)), |
+ by = "addonName,resource"] |
+results <- rbind(results, tmp) |
+ |
+results[, isok := num_anomalies == 0] |
+plot_colors <- c("light green", "light coral") |
+plot_values <- c(TRUE, FALSE) |
+results[, isok := factor(isok, plot_values)] |
+ |
+ |
+# show results as colored overview matrix |
+myplot <- ggplot(results) + |
+ aes(fill = isok) + |
+ facet_wrap(~test+addonName+resource) + |
+ geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) + |
+ geom_text(aes(x=0.5, y=0.5, |
+ label=paste(sep = "\n", test, addonName, resource)), |
+ color="black", inherit.aes=FALSE, parse=FALSE) + |
+ scale_fill_manual(values = plot_colors) + |
+ title("twitter anomaly detection") + |
+ theme(axis.line=element_blank(), |
+ axis.text.x=element_blank(), |
+ axis.text.y=element_blank(), |
+ axis.ticks=element_blank(), |
+ axis.title.x=element_blank(), |
+ axis.title.y=element_blank(), |
+ legend.position="none", |
+ panel.background=element_blank(), |
+ panel.border=element_blank(), |
+ panel.grid.major=element_blank(), |
+ panel.grid.minor=element_blank(), |
+ plot.background=element_blank()) |
+gt = ggplotGrob(myplot) |
+panels <- grep("panel", gt$layout$name) |
+top <- unique(gt$layout$t[panels]) |
+gt = gt[-(top-1), ] |
+grid.newpage() |
+grid.draw(gt) |
+ |
+ |
+# create option to show details |
+res_bad <- results[isok == FALSE] |
+res_bad |
+ |
+plotlist <- list() |
+for (idx in 1:nrow(res_bad)){ |
+ print(res_bad[idx]$test) |
+ data_source <- switch(res_bad[idx]$test, |
+ twitter5 = data_check5, |
+ twitter0 = data_check0, |
+ `-twitter5` = data_check5 %>% mutate(N = -N), |
+ `-twitter0` = data_check0 %>% mutate(N = -N)) |
+ data_cur <- data_source[addonName == res_bad[idx]$addonName & |
+ resource == res_bad[idx]$resource] |
+ res <- detect_anomalies.twitter(data_cur, plot = TRUE) |
+ plotlist[[length(plotlist) + 1]] <- res$plot + |
+ xlab(paste(res_bad[idx]$addonName, |
+ res_bad[idx]$resource, |
+ res_bad[idx]$test)) |
+} |
+ |
+plotlist[1] |
+plotlist[2] |
+plotlist[3] |
+plotlist[4] |
+plotlist[5] |
+plotlist[6] |
+plotlist[7] |
+ |
+ |
+# want to check for no downloads for recent devbuilds with common applications |
+# build corresponding combinations of addonnames, resources, applications |
+# TODO add applicationversion to database and combinations |
+check_combinations <- data.frame() |
+ |
+check_addonnames <- c("adblockplus") |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "/antiadblockfilters.txt", |
+ "easylist default") |
+check_applications <- c("firefox", |
+ "fennec2", |
+ "thunderbird", |
+ "seamonkey") |
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
+ set_names(c("addonName", "resource", "application")) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+check_addonnames <- c("adblockpluschrome") |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "/antiadblockfilters.txt", |
+ "easylist default") |
+check_applications <- c("chrome", |
+ "iron", |
+ "chromium") |
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
+ set_names(c("addonName", "resource", "application")) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+check_addonnames <- "adblockplusie" |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "easylist default") |
+check_applications <- c("msie32", |
+ "msie64") |
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
+ set_names(c("addonName", "resource", "application")) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+check_addonnames <- "adblockplusopera" |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "/antiadblockfilters.txt", |
+ "easylist default") |
+check_applications <- c("opera") |
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
+ set_names(c("addonName", "resource", "application")) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+check_addonnames <- "adblockplussafari" |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "/antiadblockfilters.txt", |
+ "easylist default") |
+check_applications <- c("safari") |
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
+ set_names(c("addonName", "resource", "application")) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+check_addonnames <- "adblockplussbrowser" |
+check_resources <- c("/exceptionrules.txt", |
+ "/notification.json", |
+ "/antiadblockfilters.txt", |
+ "easylist default") |
+check_applications <- c("sbrowser") |
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
+ set_names(c("addonName", "resource", "application")) |
+check_combinations <- rbind(check_combinations, tmp) |
+ |
+# allow one full day to pick up a new version |
+recent_join <- recent_devbuilds[date < Sys.Date() - 1, .(addonVersion, addonName)] |
+check_combinations <- merge(check_combinations, recent_join) |
+ |
+# TODO CHECK - no downloads for recent devbuilds |
+data_tmp <- data_devbuilds[!(application %in% c("adblockbrowser", "adblockbrowserios"))] |
+results_dwnl <- merge(data_tmp, check_combinations, all.y = TRUE, |
+ by = c("addonName", "resource", "application", "addonVersion")) |
+results_dwnl[is.na(N), N := 0] |
+results_dwnl <- results_dwnl[, .(N = sum(N)), by = "addonVersion,addonName,application,resource"][order(addonVersion,addonName,application,resource)] |
+results_dwnl[, isok := N > 0] |
+results_dwnl[, isok := factor(isok, plot_values)] |
+ |
+results_dwnl |
+ |
+ |
+# show results as colored overview matrix |
+myplot <- ggplot(results_dwnl) + |
+ aes(fill = isok) + |
+ facet_wrap(~addonVersion+addonName+application+resource) + |
+ geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) + |
+ geom_text(aes(x=0.5, y=0.5, |
+ label=paste(sep = "\n", addonVersion, addonName, application, resource)), |
+ color="black", inherit.aes=FALSE, parse=FALSE) + |
+ scale_fill_manual(values = plot_colors) + |
+ title("any downloads") + |
+ theme(axis.line=element_blank(), |
+ axis.text.x=element_blank(), |
+ axis.text.y=element_blank(), |
+ axis.ticks=element_blank(), |
+ axis.title.x=element_blank(), |
+ axis.title.y=element_blank(), |
+ legend.position="none", |
+ panel.background=element_blank(), |
+ panel.border=element_blank(), |
+ panel.grid.major=element_blank(), |
+ panel.grid.minor=element_blank(), |
+ plot.background=element_blank()) |
+gt = ggplotGrob(myplot) |
+panels <- grep("panel", gt$layout$name) |
+top <- unique(gt$layout$t[panels]) |
+gt = gt[-(top-1), ] |
+grid.newpage() |
+grid.draw(gt) |