Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: automated/check_devbuilds/check_devbuilds_auto.R

Issue 29336297: automated devbuilds
Patch Set: Created Feb. 12, 2016, 1:56 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 library(RPostgreSQL)
2 library(magrittr)
3 library(data.table)
4 library(dplyr)
5 library(AnomalyDetection)
6 library(ggplot2)
7 library(grid)
8 download_devbuilds_list <- bbbi::download_devbuilds_list
9
10 # determines if recent devbuilds are working correctly
11
12
13 # TODO: which are current devbuilds of: adblockplusasus, adblockplussbrowser, co ntentblockers
14 # TODO "recent" time period from development cycle
15 # TODO include even older builds (time it takes to adopt new versions) for analy ses on combined data set?
16 # TODO include application devbuild versions
17
18
19 # settings
20 # include all builds from within the following time period
21 # (+ the next older one)
22 n_days_recent <- 6
23 min_builddate <- Sys.Date() - n_days_recent
24 min_builddate_posix <- as.POSIXlt(paste(min_builddate, "00:00:00"), tz = "UTC")
25 # consider download data from within the following time period for comparison wi th current data
26 n_days_comparison <- 90
27
28 # for some checks, we focus on the most recent devbuilds
29 data_recent <- download_devbuilds_list() %>% data.table
30 recent_devbuilds <- data_recent[date >= min_builddate]
31 tmp <- anti_join(data_recent, recent_devbuilds)
32 tmp <- tmp[order(-datetime)][, .SD[1], by = "addonName"]
33 recent_devbuilds_plus <- rbind(recent_devbuilds, tmp)[order(-datetime)]
34
35
36 # devbuilds have been prefiltered into a seperate table for performance improvem ents
37 con <- src_postgres(db = "kpi", host = "localhost", port = 5432, user = "sporz")
38
39 mindate <- min_builddate - n_days_comparison # inclusive
40 maxdate <- Sys.Date() # exclusive
41
42 query <- . %>%
43 tbl("devbuilds") %>%
44 filter(date >= local(mindate) &
45 date < local(maxdate)) %>%
46 group_by(date, addonName, addonVersion, resource, application, downloadCount , dayssince) %>%
47 summarize(N = sum(N))
48
49 data_devbuilds <- con %>% query %>% collect %>% data.table
50 # TODO remove after development
51 path <- "data_devbuilds.csv"
52 write.csv(data_devbuilds, path, row.names = FALSE)
53 data_devbuilds <- fread(path)[, date := as.Date(date)]
54
55 # CHECKS
56 # chrome, probably ffox; ie without antiadblockfilters
57 # dc 0, exceptionrules ~ notifications ~ antiadblockfilters ~ easylist combined
58 # dc 5, exceptionrules > notifications * 0.9, antiadblockfilters ~ notifications , easylist * 3 [mind multiple!] ~ notifications
59
60 detect_anomalies.twitter <- function(data_check, ...){
61 tmp <- data_check[, .(date, N)] %>% data.frame
62 res <- AnomalyDetectionTs(tmp, max_anoms=0.2, alpha = 0.05,
63 threshold = 'p95', direction='both', longterm = TR UE,
64 piecewise_median_period_weeks = 4, ...)
65 return(res)
66 }
67
68 num_anomalies.twitter <- function(data_check, min_builddate_posix, ...){
69 res <- detect_anomalies.twitter(data_check, ...)
70 if (nrow(res$anoms) > 0){
71 res$anoms <- subset(res$anoms,
72 res$anoms$timestamp >= min_builddate_posix
73 #res$anoms$anoms >= 50
74 )
75 }
76 return(nrow(res$anoms))
77 }
78
79 data_devbuilds[, date := as.POSIXct(paste(date, "00:00:00"), tz = "UTC")]
80
81
82 results <- data.table()
83
84
85 # want to check several time series for anomalies
86 # build corresponding combinations of addonnames and resources
87 check_combinations <- data.frame()
88
89 check_addonnames <- c("adblockplus",
90 "adblockpluschrome")
91 check_resources <- c("/exceptionrules.txt",
92 "/notification.json",
93 "/antiadblockfilters.txt",
94 "easylist default")
95 tmp <- expand.grid(check_addonnames, check_resources) %>%
96 set_names(c("addonName", "resource"))
97 check_combinations <- rbind(check_combinations, tmp)
98
99 addonName <- "adblockplusie"
100 check_resources <- c("/exceptionrules.txt",
101 "/notification.json",
102 "easylist default")
103 tmp <- data.frame(addonName, resource = check_resources)
104 check_combinations <- rbind(check_combinations, tmp)
105
106 # CHECK - twitter on downloadCount 5
107 data_check5 <- data_devbuilds[downloadCount == 5]
108 data_check5 <- merge(data_check5, check_combinations, by = c("addonName", "resou rce"))
109 data_check5[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"]
110 data_check5 <- data_check5[!(application %in% c("adblockbrowser", "adblockbrowse rios"))]
111 data_check5 <- data_check5[, .(N = sum(N)), by = "date,addonName,resource"]
112 tmp <- data_check5[, .(test = 'twitter5', num_anomalies = .SD %>%
113 num_anomalies.twitter(min_builddate_posix)),
114 by = "addonName,resource"]
115 results <- rbind(results, tmp)
116 tmp <- data_check5[, .(test = '-twitter5', num_anomalies = .SD %>%
117 mutate(N = -N) %>%
118 num_anomalies.twitter(min_builddate_posix)),
119 by = "addonName,resource"]
120 results <- rbind(results, tmp)
121
122 # CHECK - twitter on downloadCount 0, dayssince -1
123 data_check0 <- data_devbuilds[downloadCount == 0 & dayssince == -1]
124 data_check0 <- merge(data_check0, check_combinations, by = c("addonName", "resou rce"))
125 data_check0[addonName %in% c("adblockplusopera", "adblockplussafari"), addonName := "adblockpluschrome"]
126 data_check0 <- data_check0[!(application %in% c("adblockbrowser", "adblockbrowse rios"))]
127 data_check0 <- data_check0[, .(N = sum(N)), by = "date,addonName,resource"]
128 tmp <- data_check0[, .(test = 'twitter0', num_anomalies = .SD %>%
129 num_anomalies.twitter(min_builddate_posix)),
130 by = "addonName,resource"]
131 results <- rbind(results, tmp)
132 tmp <- data_check0[, .(test = '-twitter0', num_anomalies = .SD %>%
133 mutate(N = -N) %>%
134 num_anomalies.twitter(min_builddate_posix)),
135 by = "addonName,resource"]
136 results <- rbind(results, tmp)
137
138 results[, isok := num_anomalies == 0]
139 plot_colors <- c("light green", "light coral")
140 plot_values <- c(TRUE, FALSE)
141 results[, isok := factor(isok, plot_values)]
142
143
144 # show results as colored overview matrix
145 myplot <- ggplot(results) +
146 aes(fill = isok) +
147 facet_wrap(~test+addonName+resource) +
148 geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) +
149 geom_text(aes(x=0.5, y=0.5,
150 label=paste(sep = "\n", test, addonName, resource)),
151 color="black", inherit.aes=FALSE, parse=FALSE) +
152 scale_fill_manual(values = plot_colors) +
153 title("twitter anomaly detection") +
154 theme(axis.line=element_blank(),
155 axis.text.x=element_blank(),
156 axis.text.y=element_blank(),
157 axis.ticks=element_blank(),
158 axis.title.x=element_blank(),
159 axis.title.y=element_blank(),
160 legend.position="none",
161 panel.background=element_blank(),
162 panel.border=element_blank(),
163 panel.grid.major=element_blank(),
164 panel.grid.minor=element_blank(),
165 plot.background=element_blank())
166 gt = ggplotGrob(myplot)
167 panels <- grep("panel", gt$layout$name)
168 top <- unique(gt$layout$t[panels])
169 gt = gt[-(top-1), ]
170 grid.newpage()
171 grid.draw(gt)
172
173
174 # create option to show details
175 res_bad <- results[isok == FALSE]
176 res_bad
177
178 plotlist <- list()
179 for (idx in 1:nrow(res_bad)){
180 print(res_bad[idx]$test)
181 data_source <- switch(res_bad[idx]$test,
182 twitter5 = data_check5,
183 twitter0 = data_check0,
184 `-twitter5` = data_check5 %>% mutate(N = -N),
185 `-twitter0` = data_check0 %>% mutate(N = -N))
186 data_cur <- data_source[addonName == res_bad[idx]$addonName &
187 resource == res_bad[idx]$resource]
188 res <- detect_anomalies.twitter(data_cur, plot = TRUE)
189 plotlist[[length(plotlist) + 1]] <- res$plot +
190 xlab(paste(res_bad[idx]$addonName,
191 res_bad[idx]$resource,
192 res_bad[idx]$test))
193 }
194
195 plotlist[1]
196 plotlist[2]
197 plotlist[3]
198 plotlist[4]
199 plotlist[5]
200 plotlist[6]
201 plotlist[7]
202
203
204 # want to check for no downloads for recent devbuilds with common applications
205 # build corresponding combinations of addonnames, resources, applications
206 # TODO add applicationversion to database and combinations
207 check_combinations <- data.frame()
208
209 check_addonnames <- c("adblockplus")
210 check_resources <- c("/exceptionrules.txt",
211 "/notification.json",
212 "/antiadblockfilters.txt",
213 "easylist default")
214 check_applications <- c("firefox",
215 "fennec2",
216 "thunderbird",
217 "seamonkey")
218 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
219 set_names(c("addonName", "resource", "application"))
220 check_combinations <- rbind(check_combinations, tmp)
221
222 check_addonnames <- c("adblockpluschrome")
223 check_resources <- c("/exceptionrules.txt",
224 "/notification.json",
225 "/antiadblockfilters.txt",
226 "easylist default")
227 check_applications <- c("chrome",
228 "iron",
229 "chromium")
230 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
231 set_names(c("addonName", "resource", "application"))
232 check_combinations <- rbind(check_combinations, tmp)
233
234 check_addonnames <- "adblockplusie"
235 check_resources <- c("/exceptionrules.txt",
236 "/notification.json",
237 "easylist default")
238 check_applications <- c("msie32",
239 "msie64")
240 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
241 set_names(c("addonName", "resource", "application"))
242 check_combinations <- rbind(check_combinations, tmp)
243
244 check_addonnames <- "adblockplusopera"
245 check_resources <- c("/exceptionrules.txt",
246 "/notification.json",
247 "/antiadblockfilters.txt",
248 "easylist default")
249 check_applications <- c("opera")
250 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
251 set_names(c("addonName", "resource", "application"))
252 check_combinations <- rbind(check_combinations, tmp)
253
254 check_addonnames <- "adblockplussafari"
255 check_resources <- c("/exceptionrules.txt",
256 "/notification.json",
257 "/antiadblockfilters.txt",
258 "easylist default")
259 check_applications <- c("safari")
260 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
261 set_names(c("addonName", "resource", "application"))
262 check_combinations <- rbind(check_combinations, tmp)
263
264 check_addonnames <- "adblockplussbrowser"
265 check_resources <- c("/exceptionrules.txt",
266 "/notification.json",
267 "/antiadblockfilters.txt",
268 "easylist default")
269 check_applications <- c("sbrowser")
270 tmp <- expand.grid(check_addonnames, check_resources, check_applications) %>%
271 set_names(c("addonName", "resource", "application"))
272 check_combinations <- rbind(check_combinations, tmp)
273
274 # allow one full day to pick up a new version
275 recent_join <- recent_devbuilds[date < Sys.Date() - 1, .(addonVersion, addonName )]
276 check_combinations <- merge(check_combinations, recent_join)
277
278 # TODO CHECK - no downloads for recent devbuilds
279 data_tmp <- data_devbuilds[!(application %in% c("adblockbrowser", "adblockbrowse rios"))]
280 results_dwnl <- merge(data_tmp, check_combinations, all.y = TRUE,
281 by = c("addonName", "resource", "application", "addonVe rsion"))
282 results_dwnl[is.na(N), N := 0]
283 results_dwnl <- results_dwnl[, .(N = sum(N)), by = "addonVersion,addonName,appli cation,resource"][order(addonVersion,addonName,application,resource)]
284 results_dwnl[, isok := N > 0]
285 results_dwnl[, isok := factor(isok, plot_values)]
286
287 results_dwnl
288
289
290 # show results as colored overview matrix
291 myplot <- ggplot(results_dwnl) +
292 aes(fill = isok) +
293 facet_wrap(~addonVersion+addonName+application+resource) +
294 geom_rect(xmin = 0, xmax = 1, ymin = 0, ymax = 1) +
295 geom_text(aes(x=0.5, y=0.5,
296 label=paste(sep = "\n", addonVersion, addonName, application, resource)),
297 color="black", inherit.aes=FALSE, parse=FALSE) +
298 scale_fill_manual(values = plot_colors) +
299 title("any downloads") +
300 theme(axis.line=element_blank(),
301 axis.text.x=element_blank(),
302 axis.text.y=element_blank(),
303 axis.ticks=element_blank(),
304 axis.title.x=element_blank(),
305 axis.title.y=element_blank(),
306 legend.position="none",
307 panel.background=element_blank(),
308 panel.border=element_blank(),
309 panel.grid.major=element_blank(),
310 panel.grid.minor=element_blank(),
311 plot.background=element_blank())
312 gt = ggplotGrob(myplot)
313 panels <- grep("panel", gt$layout$name)
314 top <- unique(gt$layout$t[panels])
315 gt = gt[-(top-1), ]
316 grid.newpage()
317 grid.draw(gt)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld