OLD | NEW |
(Empty) | |
| 1 #' Filters devbuilds. |
| 2 #' @description |
| 3 #' Uses dplyr. To be used with data.frame or connection object. |
| 4 #' returns data.frame with columns: |
| 5 #' date, addonName, addonVersion, resource, application, downloadCount, dayssinc
e |
| 6 #' @param dataframe or connection object with table |
| 7 #' @param mindate included, defaults to 2000-01-01 |
| 8 #' @param maxdate excluded, defaults to today |
| 9 #' @param settingsfile yamlfile with the same format as the default one |
| 10 #' @keywords devbuilds filter |
| 11 #' @import magrittr RPostgreSQL yaml |
| 12 #' @importFrom dplyr filter group_by summarize collect |
| 13 #' @export |
| 14 |
| 15 filter_devbuilds <- |
| 16 function(input, |
| 17 mindate = '2000-01-01', |
| 18 maxdate = as.character(Sys.Date()), |
| 19 redshift = FALSE, |
| 20 settingsfile = system.file("filter_devbuilds_conf.yaml", package = "b
bbi", mustWork = TRUE) |
| 21 ) { |
| 22 # focus on most used resources |
| 23 settings <- yaml.load_file(settingsfile) |
| 24 settings$resources_easy <- download_default_easylist_subscriptions() |
| 25 settings$resources <- c(settings$resources_main, settings$resources_easy) |
| 26 |
| 27 # we want an operator that will work for dplyr sql as well as for data.frame |
| 28 # sql is already implemented by dplyr with RPostgreSQL |
| 29 `%~%` <- function(x,y) grepl(y,x) |
| 30 |
| 31 # input can be dataframe or connection object. |
| 32 # some further code will not work with dates in factor format |
| 33 if ( is.data.frame(input) && is.factor(input$date) ) { |
| 34 input <- input %>% mutate(date = as.character(date)) |
| 35 } |
| 36 |
| 37 # connection can be to postgresql or redshift |
| 38 # redshift needs double slashes in regexp and only has lowercase table names |
| 39 if ( redshift ) { |
| 40 query_sum <- . %>% |
| 41 group_by(date, addonname, addonversion, resource, application, downl
oadcount, dayssince) %>% |
| 42 summarize(n = sum(n)) |
| 43 query <- . %>% |
| 44 filter(date >= local(mindate) && |
| 45 date < local(maxdate) && |
| 46 resource %in% local(settings$resources) && |
| 47 ((addonname %in% local(settings$addonNames_4dot) && |
| 48 addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*\\\\.[0-9]*(
-.*)?$') || |
| 49 (addonname == local(settings$addonNames_3dot) && |
| 50 addonversion %~% '^[0-9]*\\\\.[0-9]*\\\\.[0-9]*(-.*)?$') || |
| 51 (addonname == local(settings$addonNames_all))) && |
| 52 !(addonversion %~% '.*signed.*') |
| 53 ) %>% |
| 54 query_sum |
| 55 } else { |
| 56 query_sum <- . %>% |
| 57 group_by(date, addonName, addonVersion, resource, application, downl
oadCount, dayssince) %>% |
| 58 summarize(N = sum(N)) |
| 59 query <- . %>% |
| 60 filter(date >= local(mindate) && |
| 61 date < local(maxdate) && |
| 62 resource %in% local(settings$resources) && |
| 63 ((addonName %in% local(settings$addonNames_4dot) && |
| 64 addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$
') || |
| 65 (addonName == local(settings$addonNames_3dot) && |
| 66 addonVersion %~% '^[0-9]*\\.[0-9]*\\.[0-9]*(-.*)?$') || |
| 67 (addonName == local(settings$addonNames_all))) && |
| 68 !(addonVersion %~% '.*signed.*') |
| 69 ) %>% |
| 70 query_sum |
| 71 } |
| 72 |
| 73 result <- input %>% query %>% collect %>% ungroup # necessary for further c
ode |
| 74 # focus on most important resources: |
| 75 # one of the easylists is delivered by default, therefore can be combined |
| 76 # however, users can add more lists (for downloadCount > 0) |
| 77 result <- result %>% |
| 78 mutate(resource = ifelse(resource %in% settings$resources_main, resource
, "easylist default")) %>% |
| 79 query_sum |
| 80 |
| 81 # redshift only has lowercase table names |
| 82 if ( redshift ) { |
| 83 result <- result %>% |
| 84 rename(addonName = addonname) %>% |
| 85 rename(addonVersion = addonversion) %>% |
| 86 rename(downloadCount = downloadcount) %>% |
| 87 rename(N = n) |
| 88 } |
| 89 |
| 90 return(result) |
| 91 } |
OLD | NEW |