Index: R/filter_devbuilds.R |
diff --git a/R/filter_devbuilds.R b/R/filter_devbuilds.R |
new file mode 100644 |
index 0000000000000000000000000000000000000000..a8453ff8f288e0488dd78662f957572d4f619d12 |
--- /dev/null |
+++ b/R/filter_devbuilds.R |
@@ -0,0 +1,91 @@ |
+#' Filters devbuilds. |
+#' @description |
+#' Uses dplyr. To be used with data.frame or connection object. |
+#' returns data.frame with columns: |
+#' date, addonName, addonVersion, resource, application, downloadCount, dayssince |
+#' @param dataframe or connection object with table |
+#' @param mindate included, defaults to 2000-01-01 |
+#' @param maxdate excluded, defaults to today |
+#' @param settingsfile yamlfile with the same format as the default one |
+#' @keywords devbuilds filter |
+#' @import magrittr RPostgreSQL yaml |
+#' @importFrom dplyr filter group_by summarize collect |
+#' @export |
+ |
+filter_devbuilds <- |
+ function(input, |
+ mindate = '2000-01-01', |
+ maxdate = as.character(Sys.Date()), |
+ redshift = FALSE, |
+ settingsfile = system.file("filter_devbuilds_conf.yaml", package = "bbbi", mustWork = TRUE) |
+ ) { |
+ # focus on most used resources |
+ settings <- yaml.load_file(settingsfile) |
+ settings$resources_easy <- download_default_easylist_subscriptions() |
+ settings$resources <- c(settings$resources_main, settings$resources_easy) |
+ |
+ # we want an operator that will work for dplyr sql as well as for data.frame |
+ # sql is already implemented by dplyr with RPostgreSQL |
+ `%~%` <- function(x,y) grepl(y,x) |
+ |
+ # input can be dataframe or connection object. |
+ # some further code will not work with dates in factor format |
+ if ( is.data.frame(input) && is.factor(input$date) ) { |
+ input <- input %>% mutate(date = as.character(date)) |
+ } |
+ |
+ # connection can be to postgresql or redshift |
+ # redshift needs double slashes in regexp and only has lowercase table names |
+ if ( redshift ) { |
+ query_sum <- . %>% |
+ group_by(date, addonname, addonversion, resource, application, downloadcount, dayssince) %>% |
+ summarize(n = sum(n)) |
+ query <- . %>% |
+ filter(date >= local(mindate) && |
+ date < local(maxdate) && |
+ resource %in% local(settings$resources) && |
+ ((addonname %in% local(settings$addonNames_4dot) && |
+ addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') || |
+ (addonname == local(settings$addonNames_3dot) && |
+ addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') || |
+ (addonname == local(settings$addonNames_all))) && |
+ !(addonversion %~% '.*signed.*') |
+ ) %>% |
+ query_sum |
+ } else { |
+ query_sum <- . %>% |
+ group_by(date, addonName, addonVersion, resource, application, downloadCount, dayssince) %>% |
+ summarize(N = sum(N)) |
+ query <- . %>% |
+ filter(date >= local(mindate) && |
+ date < local(maxdate) && |
+ resource %in% local(settings$resources) && |
+ ((addonName %in% local(settings$addonNames_4dot) && |
+ addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') || |
+ (addonName == local(settings$addonNames_3dot) && |
+ addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') || |
+ (addonName == local(settings$addonNames_all))) && |
+ !(addonVersion %~% '.*signed.*') |
+ ) %>% |
+ query_sum |
+ } |
+ |
+ result <- input %>% query %>% collect %>% ungroup # necessary for further code |
+ # focus on most important resources: |
+ # one of the easylists is delivered by default, therefore can be combined |
+ # however, users can add more lists (for downloadCount > 0) |
+ result <- result %>% |
+ mutate(resource = ifelse(resource %in% settings$resources_main, resource, "easylist default")) %>% |
+ query_sum |
+ |
+ # redshift only has lowercase table names |
+ if ( redshift ) { |
+ result <- result %>% |
+ rename(addonName = addonname) %>% |
+ rename(addonVersion = addonversion) %>% |
+ rename(downloadCount = downloadcount) %>% |
+ rename(N = n) |
+ } |
+ |
+ return(result) |
+} |