Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: R/filter_devbuilds.R

Issue 29336289: devbuild
Patch Set: Created Feb. 12, 2016, 1:53 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « R/download_devbuilds_list.R ('k') | inst/download_default_easylist_subscriptions.sh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #' Filters devbuilds.
2 #' @description
3 #' Uses dplyr. To be used with data.frame or connection object.
4 #' returns data.frame with columns:
5 #' date, addonName, addonVersion, resource, application, downloadCount, dayssinc e
6 #' @param dataframe or connection object with table
7 #' @param mindate included, defaults to 2000-01-01
8 #' @param maxdate excluded, defaults to today
9 #' @param settingsfile yamlfile with the same format as the default one
10 #' @keywords devbuilds filter
11 #' @import magrittr RPostgreSQL yaml
12 #' @importFrom dplyr filter group_by summarize collect
13 #' @export
14
15 filter_devbuilds <-
16 function(input,
17 mindate = '2000-01-01',
18 maxdate = as.character(Sys.Date()),
19 redshift = FALSE,
20 settingsfile = system.file("filter_devbuilds_conf.yaml", package = "b bbi", mustWork = TRUE)
21 ) {
22 # focus on most used resources
23 settings <- yaml.load_file(settingsfile)
24 settings$resources_easy <- download_default_easylist_subscriptions()
25 settings$resources <- c(settings$resources_main, settings$resources_easy)
26
27 # we want an operator that will work for dplyr sql as well as for data.frame
28 # sql is already implemented by dplyr with RPostgreSQL
29 `%~%` <- function(x,y) grepl(y,x)
30
31 # input can be dataframe or connection object.
32 # some further code will not work with dates in factor format
33 if ( is.data.frame(input) && is.factor(input$date) ) {
34 input <- input %>% mutate(date = as.character(date))
35 }
36
37 # connection can be to postgresql or redshift
38 # redshift needs double slashes in regexp and only has lowercase table names
39 if ( redshift ) {
40 query_sum <- . %>%
41 group_by(date, addonname, addonversion, resource, application, downl oadcount, dayssince) %>%
42 summarize(n = sum(n))
43 query <- . %>%
44 filter(date >= local(mindate) &&
45 date < local(maxdate) &&
46 resource %in% local(settings$resources) &&
47 ((addonname %in% local(settings$addonNames_4dot) &&
48 addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*\\\\.[0-9]*( -.*)?$') ||
49 (addonname == local(settings$addonNames_3dot) &&
50 addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') ||
51 (addonname == local(settings$addonNames_all))) &&
52 !(addonversion %~% '.*signed.*')
53 ) %>%
54 query_sum
55 } else {
56 query_sum <- . %>%
57 group_by(date, addonName, addonVersion, resource, application, downl oadCount, dayssince) %>%
58 summarize(N = sum(N))
59 query <- . %>%
60 filter(date >= local(mindate) &&
61 date < local(maxdate) &&
62 resource %in% local(settings$resources) &&
63 ((addonName %in% local(settings$addonNames_4dot) &&
64 addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$ ') ||
65 (addonName == local(settings$addonNames_3dot) &&
66 addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') ||
67 (addonName == local(settings$addonNames_all))) &&
68 !(addonVersion %~% '.*signed.*')
69 ) %>%
70 query_sum
71 }
72
73 result <- input %>% query %>% collect %>% ungroup # necessary for further c ode
74 # focus on most important resources:
75 # one of the easylists is delivered by default, therefore can be combined
76 # however, users can add more lists (for downloadCount > 0)
77 result <- result %>%
78 mutate(resource = ifelse(resource %in% settings$resources_main, resource , "easylist default")) %>%
79 query_sum
80
81 # redshift only has lowercase table names
82 if ( redshift ) {
83 result <- result %>%
84 rename(addonName = addonname) %>%
85 rename(addonVersion = addonversion) %>%
86 rename(downloadCount = downloadcount) %>%
87 rename(N = n)
88 }
89
90 return(result)
91 }
OLDNEW
« no previous file with comments | « R/download_devbuilds_list.R ('k') | inst/download_default_easylist_subscriptions.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld