Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: automated/check_devbuilds/check_devbuilds_auto.R

Issue 29336297: automated devbuilds
Patch Set: Created Feb. 12, 2016, 1:56 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: automated/check_devbuilds/check_devbuilds_auto.R
diff --git a/automated/check_devbuilds/check_devbuilds_auto.R b/automated/check_devbuilds/check_devbuilds_auto.R
new file mode 100644
index 0000000000000000000000000000000000000000..5550b69918f076d36138cf2e7676c141437b5334
--- /dev/null
+++ b/automated/check_devbuilds/check_devbuilds_auto.R
@@ -0,0 +1,317 @@
+library(RPostgreSQL)
+library(magrittr)
+library(data.table)
+library(dplyr)
+library(AnomalyDetection)
+library(ggplot2)
+library(grid)
+download_devbuilds_list <- bbbi::download_devbuilds_list
+
+# determines if recent devbuilds are working correctly
+
+
+# TODO: which are current devbuilds of: adblockplusasus, adblockplussbrowser, contentblockers
+# TODO "recent" time period from development cycle
+# TODO include even older builds (time it takes to adopt new versions) for analyses on combined data set?
+# TODO include application devbuild versions
+
+
+# settings
+# include all builds from within the following time period
+# (+ the next older one)
+n_days_recent <- 6
+min_builddate <- Sys.Date() - n_days_recent
+min_builddate_posix <- as.POSIXlt(paste(min_builddate, "00:00:00"), tz = "UTC")
+# consider download data from within the following time period for comparison with current data
+n_days_comparison <- 90
+
+# for some checks, we focus on the most recent devbuilds
+data_recent <- download_devbuilds_list() %>% data.table
+recent_devbuilds <- data_recent[date >= min_builddate]
+tmp <- anti_join(data_recent, recent_devbuilds)
+tmp <- tmp[order(-datetime)][, .SD[1], by = "addonName"]
+recent_devbuilds_plus <- rbind(recent_devbuilds, tmp)[order(-datetime)]
+
+
+# devbuilds have been prefiltered into a seperate table for performance improvements
+con <- src_postgres(db = "kpi", host = "localhost", port = 5432, user = "sporz")
+
+mindate <- min_builddate - n_days_comparison # inclusive
+maxdate <- Sys.Date() # exclusive
+
+query <- . %>%
+ tbl("devbuilds") %>%
+ filter(date >= local(mindate) &
+ date < local(maxdate)) %>%
+ group_by(date, addonName, addonVersion, resource, application, downloadCount, dayssince) %>%
+ summarize(N = sum(N))
+
+data_devbuilds <- con %>% query %>% collect %>% data.table
+# TODO remove after development
+path <- "data_devbuilds.csv"
+write.csv(data_devbuilds, path, row.names = FALSE)
+data_devbuilds <- fread(path)[, date := as.Date(date)]
+
+# CHECKS
+# chrome, probably ffox; ie without antiadblockfilters
+# dc 0, exceptionrules ~ notifications ~ antiadblockfilters ~ easylist combined
+# dc 5, exceptionrules > notifications * 0.9, antiadblockfilters ~ notifications, easylist * 3 [mind multiple!] ~ notifications
+
+detect_anomalies.twitter <- function(data_check, ...){
+ tmp <- data_check[, .(date, N)] %>% data.frame
+ res <- AnomalyDetectionTs(tmp, max_anoms=0.2, alpha = 0.05,
+ threshold = 'p95', direction='both', longterm = TRUE,
+ piecewise_median_period_weeks = 4, ...)
+ return(res)
+}
+
+num_anomalies.twitter <- function(data_check, min_builddate_posix, ...){
+ res <- detect_anomalies.twitter(data_check, ...)
+ if (nrow(res$anoms) > 0){
+ res$anoms <- subset(res$anoms,
+ res$anoms$timestamp >= min_builddate_posix
+ #res$anoms$anoms >= 50
+ )
+ }
+ return(nrow(res$anoms))
+}
+
+data_devbuilds[, date := as.POSIXct(paste(date, "00:00:00"), tz = "UTC")]
+
+
+results <- data.table()
+
+
+# want to check several time series for anomalies
+# build corresponding combinations of addonnames and resources
+check_combinations <- data.frame()
+
+check_addonnames <- c("adblockplus",
+ "adblockpluschrome")
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "/antiadblockfilters.txt",
+ "easylist default")
+tmp <- expand.grid(check_addonnames, check_resources) %>%
+ set_names(c("addonName", "resource"))
+check_combinations <- rbind(check_combinations, tmp)
+
+addonName <- "adblockplusie"
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "easylist default")
+tmp <- data.frame(addonName, resource = check_resources)
+check_combinations <- rbind(check_combinations, tmp)
+
+# CHECK - twitter on downloadCount 5
+data_check5 <- data_devbuilds[downloadCount == 5]
+data_check5 <- merge(data_check5, check_combinations, by = c("addonName", "resource"))
+data_check5[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"]
+data_check5 <- data_check5[!(application %in% c("adblockbrowser", "adblockbrowserios"))]
+data_check5 <- data_check5[, .(N = sum(N)), by = "date,addonName,resource"]
+tmp <- data_check5[, .(test = 'twitter5', num_anomalies = .SD %>%
+ num_anomalies.twitter(min_builddate_posix)),
+ by = "addonName,resource"]
+results <- rbind(results, tmp)
+tmp <- data_check5[, .(test = '-twitter5', num_anomalies = .SD %>%
+ mutate(N = -N) %>%
+ num_anomalies.twitter(min_builddate_posix)),
+ by = "addonName,resource"]
+results <- rbind(results, tmp)
+
+# CHECK - twitter on downloadCount 0, dayssince -1
+data_check0 <- data_devbuilds[downloadCount == 0 & dayssince == -1]
+data_check0 <- merge(data_check0, check_combinations, by = c("addonName", "resource"))
+data_check0[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"]
+data_check0 <- data_check0[!(application %in% c("adblockbrowser", "adblockbrowserios"))]
+data_check0 <- data_check0[, .(N = sum(N)), by = "date,addonName,resource"]
+tmp <- data_check0[, .(test = 'twitter0', num_anomalies = .SD %>%
+ num_anomalies.twitter(min_builddate_posix)),
+ by = "addonName,resource"]
+results <- rbind(results, tmp)
+tmp <- data_check0[, .(test = '-twitter0', num_anomalies = .SD %>%
+ mutate(N = -N) %>%
+ num_anomalies.twitter(min_builddate_posix)),
+ by = "addonName,resource"]
+results <- rbind(results, tmp)
+
+results[, isok := num_anomalies == 0]
+plot_colors <- c("light green", "light coral")
+plot_values <- c(TRUE, FALSE)
+results[, isok := factor(isok, plot_values)]
+
+
+# show results as colored overview matrix
+myplot <- ggplot(results) +
+ aes(fill = isok) +
+ facet_wrap(~test+addonName+resource) +
+ geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) +
+ geom_text(aes(x=0.5, y=0.5,
+ label=paste(sep = "\n", test, addonName, resource)),
+ color="black", inherit.aes=FALSE, parse=FALSE) +
+ scale_fill_manual(values = plot_colors) +
+ title("twitter anomaly detection") +
+ theme(axis.line=element_blank(),
+ axis.text.x=element_blank(),
+ axis.text.y=element_blank(),
+ axis.ticks=element_blank(),
+ axis.title.x=element_blank(),
+ axis.title.y=element_blank(),
+ legend.position="none",
+ panel.background=element_blank(),
+ panel.border=element_blank(),
+ panel.grid.major=element_blank(),
+ panel.grid.minor=element_blank(),
+ plot.background=element_blank())
+gt = ggplotGrob(myplot)
+panels <- grep("panel", gt$layout$name)
+top <- unique(gt$layout$t[panels])
+gt = gt[-(top-1), ]
+grid.newpage()
+grid.draw(gt)
+
+
+# create option to show details
+res_bad <- results[isok == FALSE]
+res_bad
+
+plotlist <- list()
+for (idx in 1:nrow(res_bad)){
+ print(res_bad[idx]$test)
+ data_source <- switch(res_bad[idx]$test,
+ twitter5 = data_check5,
+ twitter0 = data_check0,
+ `-twitter5` = data_check5 %>% mutate(N = -N),
+ `-twitter0` = data_check0 %>% mutate(N = -N))
+ data_cur <- data_source[addonName == res_bad[idx]$addonName &
+ resource == res_bad[idx]$resource]
+ res <- detect_anomalies.twitter(data_cur, plot = TRUE)
+ plotlist[[length(plotlist) + 1]] <- res$plot +
+ xlab(paste(res_bad[idx]$addonName,
+ res_bad[idx]$resource,
+ res_bad[idx]$test))
+}
+
+plotlist[1]
+plotlist[2]
+plotlist[3]
+plotlist[4]
+plotlist[5]
+plotlist[6]
+plotlist[7]
+
+
+# want to check for no downloads for recent devbuilds with common applications
+# build corresponding combinations of addonnames, resources, applications
+# TODO add applicationversion to database and combinations
+check_combinations <- data.frame()
+
+check_addonnames <- c("adblockplus")
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "/antiadblockfilters.txt",
+ "easylist default")
+check_applications <- c("firefox",
+ "fennec2",
+ "thunderbird",
+ "seamonkey")
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
+ set_names(c("addonName", "resource", "application"))
+check_combinations <- rbind(check_combinations, tmp)
+
+check_addonnames <- c("adblockpluschrome")
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "/antiadblockfilters.txt",
+ "easylist default")
+check_applications <- c("chrome",
+ "iron",
+ "chromium")
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
+ set_names(c("addonName", "resource", "application"))
+check_combinations <- rbind(check_combinations, tmp)
+
+check_addonnames <- "adblockplusie"
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "easylist default")
+check_applications <- c("msie32",
+ "msie64")
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
+ set_names(c("addonName", "resource", "application"))
+check_combinations <- rbind(check_combinations, tmp)
+
+check_addonnames <- "adblockplusopera"
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "/antiadblockfilters.txt",
+ "easylist default")
+check_applications <- c("opera")
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
+ set_names(c("addonName", "resource", "application"))
+check_combinations <- rbind(check_combinations, tmp)
+
+check_addonnames <- "adblockplussafari"
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "/antiadblockfilters.txt",
+ "easylist default")
+check_applications <- c("safari")
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
+ set_names(c("addonName", "resource", "application"))
+check_combinations <- rbind(check_combinations, tmp)
+
+check_addonnames <- "adblockplussbrowser"
+check_resources <- c("/exceptionrules.txt",
+ "/notification.json",
+ "/antiadblockfilters.txt",
+ "easylist default")
+check_applications <- c("sbrowser")
+tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
+ set_names(c("addonName", "resource", "application"))
+check_combinations <- rbind(check_combinations, tmp)
+
+# allow one full day to pick up a new version
+recent_join <- recent_devbuilds[date < Sys.Date() - 1, .(addonVersion, addonName)]
+check_combinations <- merge(check_combinations, recent_join)
+
+# TODO CHECK - no downloads for recent devbuilds
+data_tmp <- data_devbuilds[!(application %in% c("adblockbrowser", "adblockbrowserios"))]
+results_dwnl <- merge(data_tmp, check_combinations, all.y = TRUE,
+ by = c("addonName", "resource", "application", "addonVersion"))
+results_dwnl[is.na(N), N := 0]
+results_dwnl <- results_dwnl[, .(N = sum(N)), by = "addonVersion,addonName,application,resource"][order(addonVersion,addonName,application,resource)]
+results_dwnl[, isok := N > 0]
+results_dwnl[, isok := factor(isok, plot_values)]
+
+results_dwnl
+
+
+# show results as colored overview matrix
+myplot <- ggplot(results_dwnl) +
+ aes(fill = isok) +
+ facet_wrap(~addonVersion+addonName+application+resource) +
+ geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) +
+ geom_text(aes(x=0.5, y=0.5,
+ label=paste(sep = "\n", addonVersion, addonName, application, resource)),
+ color="black", inherit.aes=FALSE, parse=FALSE) +
+ scale_fill_manual(values = plot_colors) +
+ title("any downloads") +
+ theme(axis.line=element_blank(),
+ axis.text.x=element_blank(),
+ axis.text.y=element_blank(),
+ axis.ticks=element_blank(),
+ axis.title.x=element_blank(),
+ axis.title.y=element_blank(),
+ legend.position="none",
+ panel.background=element_blank(),
+ panel.border=element_blank(),
+ panel.grid.major=element_blank(),
+ panel.grid.minor=element_blank(),
+ plot.background=element_blank())
+gt = ggplotGrob(myplot)
+panels <- grep("panel", gt$layout$name)
+top <- unique(gt$layout$t[panels])
+gt = gt[-(top-1), ]
+grid.newpage()
+grid.draw(gt)
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld