| Index: automated/check_devbuilds/check_devbuilds_auto.R |
| diff --git a/automated/check_devbuilds/check_devbuilds_auto.R b/automated/check_devbuilds/check_devbuilds_auto.R |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..5550b69918f076d36138cf2e7676c141437b5334 |
| --- /dev/null |
| +++ b/automated/check_devbuilds/check_devbuilds_auto.R |
| @@ -0,0 +1,317 @@ |
| +library(RPostgreSQL) |
| +library(magrittr) |
| +library(data.table) |
| +library(dplyr) |
| +library(AnomalyDetection) |
| +library(ggplot2) |
| +library(grid) |
| +download_devbuilds_list <- bbbi::download_devbuilds_list |
| + |
| +# determines if recent devbuilds are working correctly |
| + |
| + |
| +# TODO: which are current devbuilds of: adblockplusasus, adblockplussbrowser, contentblockers |
| +# TODO "recent" time period from development cycle |
| +# TODO include even older builds (time it takes to adopt new versions) for analyses on combined data set? |
| +# TODO include application devbuild versions |
| + |
| + |
| +# settings |
| +# include all builds from within the following time period |
| +# (+ the next older one) |
| +n_days_recent <- 6 |
| +min_builddate <- Sys.Date() - n_days_recent |
| +min_builddate_posix <- as.POSIXlt(paste(min_builddate, "00:00:00"), tz = "UTC") |
| +# consider download data from within the following time period for comparison with current data |
| +n_days_comparison <- 90 |
| + |
| +# for some checks, we focus on the most recent devbuilds |
| +data_recent <- download_devbuilds_list() %>% data.table |
| +recent_devbuilds <- data_recent[date >= min_builddate] |
| +tmp <- anti_join(data_recent, recent_devbuilds) |
| +tmp <- tmp[order(-datetime)][, .SD[1], by = "addonName"] |
| +recent_devbuilds_plus <- rbind(recent_devbuilds, tmp)[order(-datetime)] |
| + |
| + |
| +# devbuilds have been prefiltered into a seperate table for performance improvements |
| +con <- src_postgres(db = "kpi", host = "localhost", port = 5432, user = "sporz") |
| + |
| +mindate <- min_builddate - n_days_comparison # inclusive |
| +maxdate <- Sys.Date() # exclusive |
| + |
| +query <- . %>% |
| + tbl("devbuilds") %>% |
| + filter(date >= local(mindate) & |
| + date < local(maxdate)) %>% |
| + group_by(date, addonName, addonVersion, resource, application, downloadCount, dayssince) %>% |
| + summarize(N = sum(N)) |
| + |
| +data_devbuilds <- con %>% query %>% collect %>% data.table |
| +# TODO remove after development |
| +path <- "data_devbuilds.csv" |
| +write.csv(data_devbuilds, path, row.names = FALSE) |
| +data_devbuilds <- fread(path)[, date := as.Date(date)] |
| + |
| +# CHECKS |
| +# chrome, probably ffox; ie without antiadblockfilters |
| +# dc 0, exceptionrules ~ notifications ~ antiadblockfilters ~ easylist combined |
| +# dc 5, exceptionrules > notifications * 0.9, antiadblockfilters ~ notifications, easylist * 3 [mind multiple!] ~ notifications |
| + |
| +detect_anomalies.twitter <- function(data_check, ...){ |
| + tmp <- data_check[, .(date, N)] %>% data.frame |
| + res <- AnomalyDetectionTs(tmp, max_anoms=0.2, alpha = 0.05, |
| + threshold = 'p95', direction='both', longterm = TRUE, |
| + piecewise_median_period_weeks = 4, ...) |
| + return(res) |
| +} |
| + |
| +num_anomalies.twitter <- function(data_check, min_builddate_posix, ...){ |
| + res <- detect_anomalies.twitter(data_check, ...) |
| + if (nrow(res$anoms) > 0){ |
| + res$anoms <- subset(res$anoms, |
| + res$anoms$timestamp >= min_builddate_posix |
| + #res$anoms$anoms >= 50 |
| + ) |
| + } |
| + return(nrow(res$anoms)) |
| +} |
| + |
| +data_devbuilds[, date := as.POSIXct(paste(date, "00:00:00"), tz = "UTC")] |
| + |
| + |
| +results <- data.table() |
| + |
| + |
| +# want to check several time series for anomalies |
| +# build corresponding combinations of addonnames and resources |
| +check_combinations <- data.frame() |
| + |
| +check_addonnames <- c("adblockplus", |
| + "adblockpluschrome") |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "/antiadblockfilters.txt", |
| + "easylist default") |
| +tmp <- expand.grid(check_addonnames, check_resources) %>% |
| + set_names(c("addonName", "resource")) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +addonName <- "adblockplusie" |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "easylist default") |
| +tmp <- data.frame(addonName, resource = check_resources) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +# CHECK - twitter on downloadCount 5 |
| +data_check5 <- data_devbuilds[downloadCount == 5] |
| +data_check5 <- merge(data_check5, check_combinations, by = c("addonName", "resource")) |
| +data_check5[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"] |
| +data_check5 <- data_check5[!(application %in% c("adblockbrowser", "adblockbrowserios"))] |
| +data_check5 <- data_check5[, .(N = sum(N)), by = "date,addonName,resource"] |
| +tmp <- data_check5[, .(test = 'twitter5', num_anomalies = .SD %>% |
| + num_anomalies.twitter(min_builddate_posix)), |
| + by = "addonName,resource"] |
| +results <- rbind(results, tmp) |
| +tmp <- data_check5[, .(test = '-twitter5', num_anomalies = .SD %>% |
| + mutate(N = -N) %>% |
| + num_anomalies.twitter(min_builddate_posix)), |
| + by = "addonName,resource"] |
| +results <- rbind(results, tmp) |
| + |
| +# CHECK - twitter on downloadCount 0, dayssince -1 |
| +data_check0 <- data_devbuilds[downloadCount == 0 & dayssince == -1] |
| +data_check0 <- merge(data_check0, check_combinations, by = c("addonName", "resource")) |
| +data_check0[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"] |
| +data_check0 <- data_check0[!(application %in% c("adblockbrowser", "adblockbrowserios"))] |
| +data_check0 <- data_check0[, .(N = sum(N)), by = "date,addonName,resource"] |
| +tmp <- data_check0[, .(test = 'twitter0', num_anomalies = .SD %>% |
| + num_anomalies.twitter(min_builddate_posix)), |
| + by = "addonName,resource"] |
| +results <- rbind(results, tmp) |
| +tmp <- data_check0[, .(test = '-twitter0', num_anomalies = .SD %>% |
| + mutate(N = -N) %>% |
| + num_anomalies.twitter(min_builddate_posix)), |
| + by = "addonName,resource"] |
| +results <- rbind(results, tmp) |
| + |
| +results[, isok := num_anomalies == 0] |
| +plot_colors <- c("light green", "light coral") |
| +plot_values <- c(TRUE, FALSE) |
| +results[, isok := factor(isok, plot_values)] |
| + |
| + |
| +# show results as colored overview matrix |
| +myplot <- ggplot(results) + |
| + aes(fill = isok) + |
| + facet_wrap(~test+addonName+resource) + |
| + geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) + |
| + geom_text(aes(x=0.5, y=0.5, |
| + label=paste(sep = "\n", test, addonName, resource)), |
| + color="black", inherit.aes=FALSE, parse=FALSE) + |
| + scale_fill_manual(values = plot_colors) + |
| + title("twitter anomaly detection") + |
| + theme(axis.line=element_blank(), |
| + axis.text.x=element_blank(), |
| + axis.text.y=element_blank(), |
| + axis.ticks=element_blank(), |
| + axis.title.x=element_blank(), |
| + axis.title.y=element_blank(), |
| + legend.position="none", |
| + panel.background=element_blank(), |
| + panel.border=element_blank(), |
| + panel.grid.major=element_blank(), |
| + panel.grid.minor=element_blank(), |
| + plot.background=element_blank()) |
| +gt = ggplotGrob(myplot) |
| +panels <- grep("panel", gt$layout$name) |
| +top <- unique(gt$layout$t[panels]) |
| +gt = gt[-(top-1), ] |
| +grid.newpage() |
| +grid.draw(gt) |
| + |
| + |
| +# create option to show details |
| +res_bad <- results[isok == FALSE] |
| +res_bad |
| + |
| +plotlist <- list() |
| +for (idx in 1:nrow(res_bad)){ |
| + print(res_bad[idx]$test) |
| + data_source <- switch(res_bad[idx]$test, |
| + twitter5 = data_check5, |
| + twitter0 = data_check0, |
| + `-twitter5` = data_check5 %>% mutate(N = -N), |
| + `-twitter0` = data_check0 %>% mutate(N = -N)) |
| + data_cur <- data_source[addonName == res_bad[idx]$addonName & |
| + resource == res_bad[idx]$resource] |
| + res <- detect_anomalies.twitter(data_cur, plot = TRUE) |
| + plotlist[[length(plotlist) + 1]] <- res$plot + |
| + xlab(paste(res_bad[idx]$addonName, |
| + res_bad[idx]$resource, |
| + res_bad[idx]$test)) |
| +} |
| + |
| +plotlist[1] |
| +plotlist[2] |
| +plotlist[3] |
| +plotlist[4] |
| +plotlist[5] |
| +plotlist[6] |
| +plotlist[7] |
| + |
| + |
| +# want to check for no downloads for recent devbuilds with common applications |
| +# build corresponding combinations of addonnames, resources, applications |
| +# TODO add applicationversion to database and combinations |
| +check_combinations <- data.frame() |
| + |
| +check_addonnames <- c("adblockplus") |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "/antiadblockfilters.txt", |
| + "easylist default") |
| +check_applications <- c("firefox", |
| + "fennec2", |
| + "thunderbird", |
| + "seamonkey") |
| +tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| + set_names(c("addonName", "resource", "application")) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +check_addonnames <- c("adblockpluschrome") |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "/antiadblockfilters.txt", |
| + "easylist default") |
| +check_applications <- c("chrome", |
| + "iron", |
| + "chromium") |
| +tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| + set_names(c("addonName", "resource", "application")) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +check_addonnames <- "adblockplusie" |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "easylist default") |
| +check_applications <- c("msie32", |
| + "msie64") |
| +tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| + set_names(c("addonName", "resource", "application")) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +check_addonnames <- "adblockplusopera" |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "/antiadblockfilters.txt", |
| + "easylist default") |
| +check_applications <- c("opera") |
| +tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| + set_names(c("addonName", "resource", "application")) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +check_addonnames <- "adblockplussafari" |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "/antiadblockfilters.txt", |
| + "easylist default") |
| +check_applications <- c("safari") |
| +tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| + set_names(c("addonName", "resource", "application")) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +check_addonnames <- "adblockplussbrowser" |
| +check_resources <- c("/exceptionrules.txt", |
| + "/notification.json", |
| + "/antiadblockfilters.txt", |
| + "easylist default") |
| +check_applications <- c("sbrowser") |
| +tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>% |
| + set_names(c("addonName", "resource", "application")) |
| +check_combinations <- rbind(check_combinations, tmp) |
| + |
| +# allow one full day to pick up a new version |
| +recent_join <- recent_devbuilds[date < Sys.Date() - 1, .(addonVersion, addonName)] |
| +check_combinations <- merge(check_combinations, recent_join) |
| + |
| +# TODO CHECK - no downloads for recent devbuilds |
| +data_tmp <- data_devbuilds[!(application %in% c("adblockbrowser", "adblockbrowserios"))] |
| +results_dwnl <- merge(data_tmp, check_combinations, all.y = TRUE, |
| + by = c("addonName", "resource", "application", "addonVersion")) |
| +results_dwnl[is.na(N), N := 0] |
| +results_dwnl <- results_dwnl[, .(N = sum(N)), by = "addonVersion,addonName,application,resource"][order(addonVersion,addonName,application,resource)] |
| +results_dwnl[, isok := N > 0] |
| +results_dwnl[, isok := factor(isok, plot_values)] |
| + |
| +results_dwnl |
| + |
| + |
| +# show results as colored overview matrix |
| +myplot <- ggplot(results_dwnl) + |
| + aes(fill = isok) + |
| + facet_wrap(~addonVersion+addonName+application+resource) + |
| + geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) + |
| + geom_text(aes(x=0.5, y=0.5, |
| + label=paste(sep = "\n", addonVersion, addonName, application, resource)), |
| + color="black", inherit.aes=FALSE, parse=FALSE) + |
| + scale_fill_manual(values = plot_colors) + |
| + title("any downloads") + |
| + theme(axis.line=element_blank(), |
| + axis.text.x=element_blank(), |
| + axis.text.y=element_blank(), |
| + axis.ticks=element_blank(), |
| + axis.title.x=element_blank(), |
| + axis.title.y=element_blank(), |
| + legend.position="none", |
| + panel.background=element_blank(), |
| + panel.border=element_blank(), |
| + panel.grid.major=element_blank(), |
| + panel.grid.minor=element_blank(), |
| + plot.background=element_blank()) |
| +gt = ggplotGrob(myplot) |
| +panels <- grep("panel", gt$layout$name) |
| +top <- unique(gt$layout$t[panels]) |
| +gt = gt[-(top-1), ] |
| +grid.newpage() |
| +grid.draw(gt) |