| Index: R/filter_devbuilds.R |
| diff --git a/R/filter_devbuilds.R b/R/filter_devbuilds.R |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..a8453ff8f288e0488dd78662f957572d4f619d12 |
| --- /dev/null |
| +++ b/R/filter_devbuilds.R |
| @@ -0,0 +1,91 @@ |
| +#' Filters devbuilds. |
| +#' @description |
| +#' Uses dplyr. To be used with data.frame or connection object. |
| +#' returns data.frame with columns: |
| +#' date, addonName, addonVersion, resource, application, downloadCount, dayssince |
| +#' @param dataframe or connection object with table |
| +#' @param mindate included, defaults to 2000-01-01 |
| +#' @param maxdate excluded, defaults to today |
| +#' @param settingsfile yamlfile with the same format as the default one |
| +#' @keywords devbuilds filter |
| +#' @import magrittr RPostgreSQL yaml |
| +#' @importFrom dplyr filter group_by summarize collect |
| +#' @export |
| + |
| +filter_devbuilds <- |
| + function(input, |
| + mindate = '2000-01-01', |
| + maxdate = as.character(Sys.Date()), |
| + redshift = FALSE, |
| + settingsfile = system.file("filter_devbuilds_conf.yaml", package = "bbbi", mustWork = TRUE) |
| + ) { |
| + # focus on most used resources |
| + settings <- yaml.load_file(settingsfile) |
| + settings$resources_easy <- download_default_easylist_subscriptions() |
| + settings$resources <- c(settings$resources_main, settings$resources_easy) |
| + |
| + # we want an operator that will work for dplyr sql as well as for data.frame |
| + # sql is already implemented by dplyr with RPostgreSQL |
| + `%~%` <- function(x,y) grepl(y,x) |
| + |
| + # input can be dataframe or connection object. |
| + # some further code will not work with dates in factor format |
| + if ( is.data.frame(input) && is.factor(input$date) ) { |
| + input <- input %>% mutate(date = as.character(date)) |
| + } |
| + |
| + # connection can be to postgresql or redshift |
| + # redshift needs double slashes in regexp and only has lowercase table names |
| + if ( redshift ) { |
| + query_sum <- . %>% |
| + group_by(date, addonname, addonversion, resource, application, downloadcount, dayssince) %>% |
| + summarize(n = sum(n)) |
| + query <- . %>% |
| + filter(date >= local(mindate) && |
| + date < local(maxdate) && |
| + resource %in% local(settings$resources) && |
| + ((addonname %in% local(settings$addonNames_4dot) && |
| + addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') || |
| + (addonname == local(settings$addonNames_3dot) && |
| + addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') || |
| + (addonname == local(settings$addonNames_all))) && |
| + !(addonversion %~% '.*signed.*') |
| + ) %>% |
| + query_sum |
| + } else { |
| + query_sum <- . %>% |
| + group_by(date, addonName, addonVersion, resource, application, downloadCount, dayssince) %>% |
| + summarize(N = sum(N)) |
| + query <- . %>% |
| + filter(date >= local(mindate) && |
| + date < local(maxdate) && |
| + resource %in% local(settings$resources) && |
| + ((addonName %in% local(settings$addonNames_4dot) && |
| + addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') || |
| + (addonName == local(settings$addonNames_3dot) && |
| + addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') || |
| + (addonName == local(settings$addonNames_all))) && |
| + !(addonVersion %~% '.*signed.*') |
| + ) %>% |
| + query_sum |
| + } |
| + |
| + result <- input %>% query %>% collect %>% ungroup # necessary for further code |
| + # focus on most important resources: |
| + # one of the easylists is delivered by default, therefore can be combined |
| + # however, users can add more lists (for downloadCount > 0) |
| + result <- result %>% |
| + mutate(resource = ifelse(resource %in% settings$resources_main, resource, "easylist default")) %>% |
| + query_sum |
| + |
| + # redshift only has lowercase table names |
| + if ( redshift ) { |
| + result <- result %>% |
| + rename(addonName = addonname) %>% |
| + rename(addonVersion = addonversion) %>% |
| + rename(downloadCount = downloadcount) %>% |
| + rename(N = n) |
| + } |
| + |
| + return(result) |
| +} |