Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: R/filter_devbuilds.R

Issue 29336289: devbuild
Patch Set: Created Feb. 12, 2016, 1:53 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « R/download_devbuilds_list.R ('k') | inst/download_default_easylist_subscriptions.sh » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: R/filter_devbuilds.R
diff --git a/R/filter_devbuilds.R b/R/filter_devbuilds.R
new file mode 100644
index 0000000000000000000000000000000000000000..a8453ff8f288e0488dd78662f957572d4f619d12
--- /dev/null
+++ b/R/filter_devbuilds.R
@@ -0,0 +1,91 @@
+#' Filters devbuilds.
+#' @description
+#' Uses dplyr. To be used with data.frame or connection object.
+#' returns data.frame with columns:
+#' date, addonName, addonVersion, resource, application, downloadCount, dayssince
+#' @param dataframe or connection object with table
+#' @param mindate included, defaults to 2000-01-01
+#' @param maxdate excluded, defaults to today
+#' @param settingsfile yamlfile with the same format as the default one
+#' @keywords devbuilds filter
+#' @import magrittr RPostgreSQL yaml
+#' @importFrom dplyr filter group_by summarize collect
+#' @export
+
+filter_devbuilds <-
+ function(input,
+ mindate = '2000-01-01',
+ maxdate = as.character(Sys.Date()),
+ redshift = FALSE,
+ settingsfile = system.file("filter_devbuilds_conf.yaml", package = "bbbi", mustWork = TRUE)
+ ) {
+ # focus on most used resources
+ settings <- yaml.load_file(settingsfile)
+ settings$resources_easy <- download_default_easylist_subscriptions()
+ settings$resources <- c(settings$resources_main, settings$resources_easy)
+
+ # we want an operator that will work for dplyr sql as well as for data.frame
+ # sql is already implemented by dplyr with RPostgreSQL
+ `%~%` <- function(x,y) grepl(y,x)
+
+ # input can be dataframe or connection object.
+ # some further code will not work with dates in factor format
+ if ( is.data.frame(input) && is.factor(input$date) ) {
+ input <- input %>% mutate(date = as.character(date))
+ }
+
+ # connection can be to postgresql or redshift
+ # redshift needs double slashes in regexp and only has lowercase table names
+ if ( redshift ) {
+ query_sum <- . %>%
+ group_by(date, addonname, addonversion, resource, application, downloadcount, dayssince) %>%
+ summarize(n = sum(n))
+ query <- . %>%
+ filter(date >= local(mindate) &&
+ date < local(maxdate) &&
+ resource %in% local(settings$resources) &&
+ ((addonname %in% local(settings$addonNames_4dot) &&
+ addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') ||
+ (addonname == local(settings$addonNames_3dot) &&
+ addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') ||
+ (addonname == local(settings$addonNames_all))) &&
+ !(addonversion %~% '.*signed.*')
+ ) %>%
+ query_sum
+ } else {
+ query_sum <- . %>%
+ group_by(date, addonName, addonVersion, resource, application, downloadCount, dayssince) %>%
+ summarize(N = sum(N))
+ query <- . %>%
+ filter(date >= local(mindate) &&
+ date < local(maxdate) &&
+ resource %in% local(settings$resources) &&
+ ((addonName %in% local(settings$addonNames_4dot) &&
+ addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') ||
+ (addonName == local(settings$addonNames_3dot) &&
+ addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') ||
+ (addonName == local(settings$addonNames_all))) &&
+ !(addonVersion %~% '.*signed.*')
+ ) %>%
+ query_sum
+ }
+
+ result <- input %>% query %>% collect %>% ungroup # necessary for further code
+ # focus on most important resources:
+ # one of the easylists is delivered by default, therefore can be combined
+ # however, users can add more lists (for downloadCount > 0)
+ result <- result %>%
+ mutate(resource = ifelse(resource %in% settings$resources_main, resource, "easylist default")) %>%
+ query_sum
+
+ # redshift only has lowercase table names
+ if ( redshift ) {
+ result <- result %>%
+ rename(addonName = addonname) %>%
+ rename(addonVersion = addonversion) %>%
+ rename(downloadCount = downloadcount) %>%
+ rename(N = n)
+ }
+
+ return(result)
+}
« no previous file with comments | « R/download_devbuilds_list.R ('k') | inst/download_default_easylist_subscriptions.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld