diff --git a/.Rbuildignore b/.Rbuildignore index 73513b2..fa4bfd8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,4 @@ ^scratch\.R$ ^scratch$ ^data-raw$ +^jarl\.toml$ diff --git a/DESCRIPTION b/DESCRIPTION index 1dee4e4..d96f7e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: vimcheck Title: Diagnostics for Vaccine Impact Modelling Consortium Burden and Impact Estimates -Version: 0.0.3 +Version: 0.0.4 Authors@R: c( person("Pratik", "Gupte", , "p.gupte24@imperial.ac.uk", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5294-7819")), @@ -24,13 +24,17 @@ Depends: Imports: checkmate, cli, + diffdf, dplyr, forcats, ggplot2, + ggridges, glue, + here, readr, rlang, scales, + stats, stringr, tidyr Suggests: @@ -48,4 +52,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/NAMESPACE b/NAMESPACE index 9bcbd19..cb7c3f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,20 +1,56 @@ # Generated by roxygen2: do not edit by hand +export(COLNAMES_INTEREST_PRESSURE_TEST) +export(COLNAMES_KEY_PRESSURE_TEST) +export(COLOUR_VIMC) +export(DEF_TOUCHSTONE_NEW) +export(DEF_TOUCHSTONE_OLD) +export(DEF_TOUCHSTONE_OLD_OLD) +export(EXCLUDED_DISEASES) +export(IMPACT_OUTCOMES) +export(MAX_TS_MONTH) +export(MAX_TS_YEAR) +export(MIN_TS_MONTH) +export(MIN_TS_YEAR) +export(N_TS_MIN_CHARS) +export(N_TS_YEAR_CHARS) export(basic_burden_sanity) +export(burden_outcome_names) export(check_demography_alignment) +export(colnames_df_missing_cols) +export(colnames_plot_demog_compare) +export(compare_natl_subreg) export(file_dict_colnames) +export(filter_excluded_diseases_ts) +export(filter_invalid_trajectories) +export(filter_recent_ts) +export(flag_duplicates) +export(flag_large_diffs) +export(gen_combined_df) +export(gen_national_iqr) +export(generate_diffs) export(plot_age_patterns) export(plot_compare_demography) export(plot_coverage_set) +export(plot_cumul) +export(plot_diff) export(plot_fvp) export(plot_global_burden) export(plot_global_burden_decades) +export(plot_modelling_group_variation) +export(plot_sig_diff) +export(plot_vaccine_gavi) export(prep_plot_age) export(prep_plot_burden_decades) export(prep_plot_coverage_set) +export(prep_plot_cumul) export(prep_plot_demography) export(prep_plot_fvp) export(prep_plot_global_burden) +export(prep_plot_mod_grp_varn) +export(prep_plot_vax_gavi) +export(save_outputs) +export(scenario_data_colnames) export(theme_vimc) export(theme_vimc_noxaxis) export(validate_complete_incoming_files) @@ -26,6 +62,8 @@ importFrom(ggplot2,facet_grid) importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_col) importFrom(ggplot2,geom_hline) +importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_segment) importFrom(ggplot2,ggplot) importFrom(ggplot2,label_wrap_gen) importFrom(ggplot2,labeller) @@ -33,5 +71,7 @@ importFrom(ggplot2,labs) importFrom(ggplot2,scale_fill_distiller) importFrom(ggplot2,scale_x_continuous) importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,theme) importFrom(ggplot2,vars) +importFrom(rlang,":=") importFrom(rlang,.data) diff --git a/NEWS.md b/NEWS.md index 81b8136..fcaa798 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# vimcheck 0.0.4 + +- Added impact diagnostics functions in `R/fn_impact_diagnostics.R`. + +- Added test data originally used in [vimpact](https://github.com/vimc/vimpact) for use with vimcheck. + +- Added plotting preparation functions and plotting functions in `R/fn_plotting_prep_impact_diagnostics.R` and `R/fn_plotting_impact_diagnostics.R`. + +- Added dependencies _diffdf_ and _here_. + +- Added vignette on design decisions and pressure testing functions. + # vimcheck 0.0.3 - Separated data-prep for plotting from plotting functions. diff --git a/R/constants.R b/R/constants.R index 03cd537..e4c3532 100644 --- a/R/constants.R +++ b/R/constants.R @@ -1,10 +1,17 @@ #' Package constants #' +#' @description +#' Constant values used in _vimcheck_. See the **Examples** section for the +#' constant values. +#' #' @name constants #' @rdname constants #' #' @keywords constants #' +#' @examples +#' file_dict_colnames +#' #' @export file_dict_colnames <- c( "scenario_type", @@ -15,6 +22,11 @@ file_dict_colnames <- c( ) #' @name constants +#' +#' @examples +#' scenario_data_colnames +#' +#' @export scenario_data_colnames <- c( "scenario_type", "scenario_type_description", @@ -23,6 +35,11 @@ scenario_data_colnames <- c( ) #' @name constants +#' +#' @examples +#' burden_outcome_names +#' +#' @export burden_outcome_names <- c( "cases", "deaths", @@ -37,6 +54,11 @@ burden_outcome_names <- c( ) #' @name constants +#' +#' @examples +#' colnames_plot_demog_compare +#' +#' @export colnames_plot_demog_compare <- c( "variable", "scenario", @@ -46,3 +68,150 @@ colnames_plot_demog_compare <- c( "value", "value_millions" ) + +#' @name constants +#' +#' @examples +#' colnames_df_missing_cols +#' +#' @export +colnames_df_missing_cols <- c( + "country_name", + "vaccine", + "activity_type", + "year", + "modelling_group" +) + +#' @name constants +#' +#' @examples +#' COLNAMES_KEY_PRESSURE_TEST +#' +#' @export +COLNAMES_KEY_PRESSURE_TEST <- c( + "country", + "country_name", + "vaccine", + "activity_type", + "year", + "disease", + "modelling_group" +) + +#' @name constants +#' +#' @examples +#' COLNAMES_INTEREST_PRESSURE_TEST +#' +#' @export +COLNAMES_INTEREST_PRESSURE_TEST <- union( + COLNAMES_KEY_PRESSURE_TEST, + c( + "fvps", + "target_population", + "coverage", + "deaths_averted", + "dalys_averted", + "deaths_averted_rate", + "dalys_averted_rate" + ) +) + +#' @name constants +#' +#' @examples +#' IMPACT_OUTCOMES +#' +#' @export +IMPACT_OUTCOMES <- c("deaths_averted", "dalys_averted") + +IMPACT_GROUP_VARS <- c("activity_type", "vaccine") + +#' @name constants +#' +#' @examples +#' EXCLUDED_DISEASES +#' +#' @export +EXCLUDED_DISEASES <- c("Hib", "PCV", "Rota", "JE") + +#' @name constants +#' +#' @examples +#' N_TS_MIN_CHARS +#' +#' @export +N_TS_MIN_CHARS <- 6L + +#' @name constants +#' +#' @examples +#' N_TS_YEAR_CHARS +#' +#' @export +N_TS_YEAR_CHARS <- 4L + +#' @name constants +#' +#' @examples +#' MIN_TS_YEAR +#' +#' @export +MIN_TS_YEAR <- 2000 + +#' @name constants +#' +#' @examples +#' MAX_TS_YEAR +#' +#' @export +MAX_TS_YEAR <- 2100 + +#' @name constants +#' +#' @examples +#' MIN_TS_MONTH +#' +#' @export +MIN_TS_MONTH <- 1 + +#' @name constants +#' +#' @examples +#' MAX_TS_MONTH +#' +#' @export +MAX_TS_MONTH <- 12 + +#' @name constants +#' +#' @examples +#' DEF_TOUCHSTONE_OLD +#' +#' @export +DEF_TOUCHSTONE_OLD <- "201910" + +#' @name constants +#' +#' @examples +#' DEF_TOUCHSTONE_NEW +#' +#' @export +DEF_TOUCHSTONE_NEW <- "202310" + +#' @name constants +#' +#' @examples +#' DEF_TOUCHSTONE_OLD_OLD +#' +#' @export +DEF_TOUCHSTONE_OLD_OLD <- "202110" + +#' @name constants +#' +#' @examples +#' COLOUR_VIMC +#' +#' @export +COLOUR_VIMC <- "#008080" diff --git a/R/data_who_subregions.R b/R/data_who_subregions.R new file mode 100644 index 0000000..abc796a --- /dev/null +++ b/R/data_who_subregions.R @@ -0,0 +1,23 @@ +#' Data on countries in WHO subregions +#' +#' Data that groups countries into WHO subregions. +#' +#' @format ## `who_subregions` +#' A data frame with 249 rows and 9 columns: +#' \describe{ +#' \item{choice_subregion}{Sub-region three character code with added letter +#' identifier.} +#' \item{country_name}{Country name in long form.} +#' \item{country}{ISO 3 character country code.} +#' \item{Global.Name}{A constant, 'World'.} +#' \item{Region.Name}{Continent-scale region identifier.} +#' \item{Sub.region}{Sub-continental scale region identifier.} +#' \item{Intermediate.Region.Name}{Alternative regional scale identifier.} +#' \item{subregion}{Sub-regional identifier.} +#' \item{vimc117}{Logical identifier.} +#' } +#' +#' @keywords data +#' +#' @source Prepared by the VIMC secretariat. +"who_subregions" diff --git a/R/example_data.R b/R/example_data.R index e2cf380..c6a1d7b 100644 --- a/R/example_data.R +++ b/R/example_data.R @@ -114,3 +114,28 @@ #' #' @source Prepared by the VIMC secretariat. "eg_fvps" + +#' Example of impact data +#' +#' Example of vaccine impact data taken from data used to test \pkg{vimpact}. +#' This data is primarily used for testing here too. +#' +#' @format ## `eg_impact` +#' A data frame with 5396 rows and 9 columns: +#' \describe{ +#' \item{disease}{Disease name.} +#' \item{vaccine}{Vaccine identifier.} +#' \item{modelling_group}{Modelling group name.} +#' \item{country}{Country ISO 3-character code.} +#' \item{country_name}{Country name.} +#' \item{year}{Year for which impacts are modelled.} +#' \item{activity_type}{Activity type: either "routine" or "campaign."} +#' \item{burden_outcome}{Name of the burden outcome; one of "deaths_averted" +#' or "dalys_averted".} +#' \item{impact}{Value of the impact}. +#' } +#' +#' @keywords data +#' +#' @source Prepared by the VIMC secretariat. +"eg_impact" diff --git a/R/burden_diagnostics.R b/R/fn_burden_diagnostics.R similarity index 98% rename from R/burden_diagnostics.R rename to R/fn_burden_diagnostics.R index 0986acf..f77f09c 100644 --- a/R/burden_diagnostics.R +++ b/R/fn_burden_diagnostics.R @@ -17,7 +17,7 @@ #' Prints a message to screen informing the user whether any action has been #' taken. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export validate_file_dict_template <- function( @@ -121,7 +121,7 @@ validate_file_dict_template <- function( #' @return A `` of the scenario file dictionary in `path_burden` if all #' checks pass. Otherwise, exits with informative errors on failed checks. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export validate_complete_incoming_files <- function( @@ -212,7 +212,7 @@ validate_complete_incoming_files <- function( #' @return A named list of checks carried out on `burden_set` to compare it #' against `template`, with information on missing and extra data. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export validate_template_alignment <- function(burden_set, template) { @@ -276,7 +276,7 @@ validate_template_alignment <- function(burden_set, template) { #' @return A `` giving the alignment, i.e., percentage difference of #' modelled population size from the WPP-derived population estimates. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export check_demography_alignment <- function( @@ -344,7 +344,7 @@ check_demography_alignment <- function( #' @return A character vector of messages generated by checks on burden #' estimates, with the length of the vector depending on how many checks fail. #' -#' @keywords diagnostics +#' @keywords burden_diagnostics #' #' @export basic_burden_sanity <- function(burden) { diff --git a/R/fn_helpers.R b/R/fn_helpers.R new file mode 100644 index 0000000..73dcde3 --- /dev/null +++ b/R/fn_helpers.R @@ -0,0 +1,185 @@ +#' Make data for a no-vaccination scenario +#' +#' @name helpers +#' @rdname helpers +#' +#' @description +#' Helper functions for burden diagnostics. +#' +#' @inheritParams validate_file_dict_template +#' +#' @keywords internal +#' +#' @return +#' +#' - `make_novax_scenario()` returns a tibble with the minimum required column +#' names, and entries corresponding to a 'no-vaccination' scenario for +#' `disease`. +make_novax_scenario <- function(disease) { + v <- c( + "novac", + "No Vaccination", + glue::glue("{disease}-no-vaccination"), + "No vaccination", + "no-vaccination.csv" + ) + + # internal function without input checking + df_ <- dplyr::tibble( + variable = file_dict_colnames, + value = v + ) + + tidyr::pivot_wider( + df_, + names_from = "variable" + ) +} + +#' Adaptively round numerics +#' +#' @param x A numeric vector. +#' +#' @param large_threshold A single number for the threshold over which numbers +#' are to be considered 'large'. +#' +#' @param small_sigfig A single number for the number of significant digits for +#' 'small' numbers. +#' +#' @param large_digits A single number for the number of places to which 'large' +#' numbers should be rounded. +#' +#' @return `x` rounded to either `large_digits` or to `small_sigfig`. +#' +#' @keywords internal +adaptive_round <- function( + x, + large_threshold = 1, + small_sigfig = 2, + large_digits = 1 +) { + # basic checks for numeric + checkmate::assert_numeric(x, finite = TRUE, any.missing = FALSE) + checkmate::assert_number(large_threshold) + checkmate::assert_count(small_sigfig, positive = TRUE) + checkmate::assert_count(large_digits, positive = TRUE) + + ifelse( + abs(x) >= large_threshold, + round(x, large_digits), + signif(x, small_sigfig) + ) +} + +#' Round numeric columns of a data.frame +#' +#' @param df A data.frame. +#' +#' @keywords internal +round_numeric <- function(df) { + checkmate::assert_data_frame( + df, + min.rows = 1L, + min.cols = 1L + ) + + dplyr::mutate( + df, + dplyr::across( + .cols = dplyr::where(is.numeric) & + !dplyr::matches("year", ignore.case = TRUE), + .fns = adaptive_round + ) + ) +} + +#' Check and return touchstone year-month +#' +#' @param x A string for the touchstone identifier. +#' +#' @return The first 6 characters of `x` converted to a numeric. Also has side +#' effects of erroring if conditions on `x` are not met. +#' +#' @keywords internal +validate_ts_year <- function(x) { + has_n_chars <- checkmate::test_string( + x, + min.chars = N_TS_MIN_CHARS + ) + if (!has_n_chars) { + n_chars <- nchar(x) # nolint used in cli + cli::cli_abort( + "Touchstone year should be a string with at least {N_TS_MIN_CHARS} \ + characters, but got class {.cls {class(x)}} with {n_chars} characters." + ) + } + + inferred_year <- as.numeric(substr(x, 1, N_TS_YEAR_CHARS)) + is_good_year <- checkmate::test_number( + inferred_year, + lower = MIN_TS_YEAR, + upper = MAX_TS_YEAR, + finite = TRUE + ) + + if (!is_good_year) { + cli::cli_abort( + "Touchstone year string has an inferred year of \ + {.strong {inferred_year}} but expected an year in the range \ + [{MIN_TS_YEAR}, {MAX_TS_YEAR}]." + ) + } + + inferred_month <- as.numeric( + substr(x, N_TS_YEAR_CHARS + 1, N_TS_YEAR_CHARS + 2) + ) + is_good_month <- checkmate::test_number( + inferred_month, + lower = MIN_TS_MONTH, + upper = MAX_TS_MONTH, + finite = TRUE + ) + + if (!is_good_month) { + cli::cli_abort( + "Touchstone month string has an inferred month of \ + {.strong {inferred_month}} but expected a month in the range \ + [{MIN_TS_MONTH}, {MAX_TS_MONTH}]." + ) + } + + # return year-month as string + as.numeric(substr(x, 1, N_TS_MIN_CHARS)) +} + +#' Add campaign id to dataframe +#' +#' @param df A data.frame. +#' +#' @param key_cols A character vector of columns in `df` by which the data are +#' to be grouped. +#' +#' @return `df` with a campaign identifier as a numeric. +#' +#' @keywords internal +add_campaign_id <- function(df, key_cols) { + checkmate::assert_data_frame(df, min.cols = length(key_cols)) + checkmate::assert_character(key_cols, any.missing = FALSE) + + has_cols <- checkmate::test_names( + names(df), + must.include = key_cols + ) + if (!has_cols) { + missing_cols <- setdiff(key_cols, colnames(df)) # nolint used in cli + cli::cli_abort( + "Expected {.code df} to have columns {.str {key_cols}} but columns \ + {.str {missing_cols}} are missing." + ) + } + + df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(key_cols))) + df <- dplyr::mutate(df, campaign_id = dplyr::row_number()) + + dplyr::ungroup(df) +} diff --git a/R/fn_impact_diagnostics.R b/R/fn_impact_diagnostics.R new file mode 100644 index 0000000..95575c2 --- /dev/null +++ b/R/fn_impact_diagnostics.R @@ -0,0 +1,827 @@ +#' Filter data for touchstones or diseases +#' +#' @name filter_impact_data +#' @rdname filter_impact_data +#' +#' @description +#' A pair of helper functions allowing filtering out of recent touchstone values +#' and excluded diseases. +#' +#' @param df A `` holding impact data. This data.frame is not +#' checked for contents +#' +#' @param threshold A six-digit number that is checked as a valid touchstone +#' identifier (YYYYMM format) using [validate_ts_year()]. Defaults to +#' [DEF_TOUCHSTONE_NEW] (`"202310"`). +#' +#' @keywords impact_diagnostics +#' +#' @return A filtered ``. +#' +#' - `filter_recent_ts()` returns `df` with rows where the touchstone condition +#' is not met excluded. +#' +#' - `filter_excluded_diseases_ts()` returns `df` with rows where rows relating +#' to the [EXCLUDED_DISEASES], when the touchstone year in `df` is less than the +#' `threshold`, excluded. +#' +#' - `flag_duplicates()` returns `df` with duplicated combinations of +#' `key_cols` flagged using the column `n_key` (or a user-defined name). +#' +#' - `filter_invalid_trajectories()` returns `df` with bad outcome trajectories +#' (`NA` to non-`NA`) removed. +#' +#' @export +filter_recent_ts <- function(df, threshold = DEF_TOUCHSTONE_NEW) { + # NOTE: exact min cols to be updated - fn implies at least 2 + checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 2L) + checkmate::assert_names( + names(df), + must.include = "touchstone" + ) + threshold <- validate_ts_year(threshold) # apply same rule as data ts year + + touchstone_year <- unique(df[["touchstone"]]) + + ts_number <- validate_ts_year(touchstone_year) # see R/helpers.R + + df <- tibble::as_tibble(df) + + # NOTE: consider converting to Date and checking - numeric comparison + # works okay for now + if (ts_number >= threshold) { + dplyr::filter( + df, + .data$scenario_type == "default" + ) + } else { + df + } +} + +#' @name filter_impact_data +#' +#' @export +filter_excluded_diseases_ts <- function( + df, + threshold = DEF_TOUCHSTONE_OLD_OLD +) { + checkmate::assert_data_frame(df, min.rows = 1L, min.cols = 1L) + checkmate::assert_names( + names(df), + must.include = "touchstone" + ) + + threshold <- validate_ts_year(threshold) + + touchstone_year <- unique(df$touchstone) + ts_number <- validate_ts_year(touchstone_year) + + df <- tibble::as_tibble(df) + + if (ts_number <= threshold) { + dplyr::filter(df, !.data$disease %in% EXCLUDED_DISEASES) + } else { + df + } +} + +#' @name filter_impact_data +#' +#' @param key_cols Key columns in `df` to check for duplicates. +#' +#' @export +flag_duplicates <- function(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) { + checkmate::assert_data_frame(df, min.cols = length(key_cols), min.rows = 1L) + checkmate::assert_character(key_cols) + + has_cols <- checkmate::test_names( + colnames(df), + must.include = key_cols + ) + if (!has_cols) { + missing_cols <- setdiff(colnames(df), key_cols) # nolint used in cli + cli::cli_abort( + "Expected {.code df} to have columns {.str {key_cols}}, but columns \ + {.str {.strong {missing_cols}}} were missing!" + ) + } + + # data may have a `burden_outcome` column which should not be counted as + # a duplicate + df <- dplyr::add_count( + df, + dplyr::across(dplyr::all_of(c(key_cols, "burden_outcome"))), + name = "n_key" + ) + + if (any(df$n_key > 1)) { + n_duplicates <- sum(df$n_key > 1) # nolint used below + cli::cli_warn( + "{n_duplicates} duplicates found in data; please check for plausibility!" + ) + } + + tibble::as_tibble(df) +} + +#' @name filter_impact_data +#' +#' @param prev_data A `` holding data from a previous touchstone for +#' the same scenarios as `df`. +#' +#' @param outcome A string giving the outcome of interest; may be one of +#' `"deaths_averted"` or `"dalys_averted"`. +#' +#' @export +filter_invalid_trajectories <- function( + df, + prev_data, + outcome = c("deaths_averted", "dalys_averted") +) { + checkmate::assert_data_frame(df, min.cols = 1L, min.rows = 1L) + + checkmate::assert_data_frame( + prev_data, + min.rows = nrow(df) + ) + + outcome <- rlang::arg_match(outcome) + + has_cols <- checkmate::test_names( + colnames(df), + must.include = c(COLNAMES_KEY_PRESSURE_TEST, outcome) + ) + if (!has_cols) { + missing_cols <- setdiff(colnames(df), COLNAMES_KEY_PRESSURE_TEST) # nolint + cli::cli_abort( + "Expected {.code df} to have columns \ + {.str {COLNAMES_KEY_PRESSURE_TEST}}, but columns \ + {.str {.strong {missing_cols}}} were missing!" + ) + } + + has_cols <- checkmate::test_names( + colnames(prev_data), + must.include = c(COLNAMES_KEY_PRESSURE_TEST, outcome) + ) + if (!has_cols) { + missing_cols <- setdiff(colnames(prev_data), COLNAMES_KEY_PRESSURE_TEST) + cli::cli_abort( + "Expected {.code prev_data} to have columns \ + {.str {COLNAMES_KEY_PRESSURE_TEST}}, but columns \ + {.str {.strong {missing_cols}}} were missing!" + ) + } + + prev_df <- dplyr::select( + prev_data, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + dplyr::all_of(outcome) + ) + prev_df <- dplyr::rename(prev_df, outcome_prev = {{ outcome }}) + + current_df <- dplyr::select( + df, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + dplyr::all_of(outcome) + ) + current_df <- dplyr::rename(current_df, outcome_cur = {{ outcome }}) + + result <- dplyr::inner_join( + prev_df, + current_df, + by = COLNAMES_KEY_PRESSURE_TEST + ) + + # `,` replaces `&` for dplyr syntax + result <- dplyr::filter( + result, + !is.na(.data$outcome_prev), + is.na(.data$outcome_cur) + ) + + tibble::as_tibble(result) +} + +#' Explore significant changes in deaths and DALYs +#' +#' @param prev_df A `` of impact estimates from the previous +#' touchstone. +#' +#' @param curr_df A `` of impact estimates for the current +#' touchstone. +#' +#' @param interest_cols A character vector of columns to check for differences. +#' Defaults to [COLNAMES_INTEREST_PRESSURE_TEST]. +#' +#' @param key_cols A character vector of columns to use when assigning campaign +#' identifiers. Passed to [add_campaign_id()], defaults to +#' [COLNAMES_KEY_PRESSURE_TEST]. +#' +#' @param touchstone A six character string that can be converted to a six digit +#' numeric giving a touchstone identifier in `YYYYMM` format. +#' +#' @return A list of tibbles of differences between `prev_df` and `curr_df`, +#' with one list element per element of `interest_cols`. +#' +#' @keywords impact_diagnostics +#' +#' @export +generate_diffs <- function( + prev_df, + curr_df, + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST, + touchstone = DEF_TOUCHSTONE_OLD +) { + checkmate::assert_data_frame( + prev_df, + min.rows = 1L, + min.cols = length(interest_cols) + ) + checkmate::assert_data_frame( + curr_df, + min.rows = 1L, + min.cols = length(interest_cols) + ) + + checkmate::assert_character(interest_cols, min.len = 1L) + checkmate::assert_character(key_cols, min.len = 1L) + + # check interest cols in dfs. key cols are check in `add_campaign_id` + checkmate::assert_names( + colnames(prev_df), + must.include = c(interest_cols, "support_type", "coverage") + ) + checkmate::assert_names( + colnames(curr_df), + must.include = c(interest_cols, "support_type", "coverage") + ) + + touchstone <- validate_ts_year(touchstone) + + # fix for erroneous duplicated YF data in 201910 dataset + if (touchstone == DEF_TOUCHSTONE_OLD) { + prev_df <- dplyr::filter( + prev_df, + !(.data$disease == "YF" & + .data$support_type == "other" & + .data$coverage == 0) + ) + } + + # Fix for multiple campaigns per year (i.e. not true duplicates) + # only applicable for 2019 true non-duplicates. + prev_df <- add_campaign_id(prev_df, key_cols) + curr_df <- add_campaign_id(curr_df, key_cols) + + diff_keys <- c(key_cols, "campaign_id") + cols_needed <- union(diff_keys, interest_cols) + + df_diff <- diffdf::diffdf( + prev_df[, cols_needed], + curr_df[, cols_needed], + keys = diff_keys + ) + + # diffdf's Vardiff_* returns a tibble, no need to convert + changes <- stats::setNames( + lapply(interest_cols, function(v) { + nm <- glue::glue("VarDiff_{v}") + if (nm %in% names(df_diff)) df_diff[[nm]] else NULL + }), + interest_cols + ) + + changes # a list of tibbles +} + +#' Generate IQR for key outcomes +#' +#' @keywords impact_diagnostics +#' +#' @param df A data.frame of impact estimates. +#' +#' @param group_cols A character vector of grouping columns. Defaults to +#' "country", "vaccine", "activity_type". +#' +#' @param value_cols A character vector of value columns. Defaults to +#' "deaths_averted" and "dalys_averted". +#' +#' @param prefix A string for the prefix applied to every IQR summary column. +#' Defaults to "national_iqr". +#' +#' @return A `` with the inter-quartile range of the columns +#' in `value_cols`, with the column name constructed as `{prefix}_{value_col}` +#' using string interpolation. +#' +#' @export +gen_national_iqr <- function( + df, + group_cols = c("country", "vaccine", "activity_type"), + value_cols = c("deaths_averted", "dalys_averted"), + prefix = "national_iqr" +) { + checkmate::assert_data_frame( + df, + min.rows = 1L, + min.cols = length(c(group_cols, value_cols)) + ) + checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE) + + # NOTE: restricting value columns to deaths and dalys averted + checkmate::assert_subset( + value_cols, + c("deaths_averted", "dalys_averted") + ) + + checkmate::assert_string(prefix) + + checkmate::assert_names( + colnames(df), + must.include = union(group_cols, value_cols) + ) + + df <- tibble::as_tibble(df) + + # long-winded syntax to pass grouping variables as char vec + df <- dplyr::group_by(df, dplyr::across(dplyr::all_of(group_cols))) + df <- dplyr::summarise( + df, + dplyr::across( + dplyr::all_of(value_cols), + function(x) { + stats::IQR(x, na.rm = TRUE) + }, + .names = "{prefix}_{.col}" + ), + .groups = "drop" + ) + + df +} + +#' Flag significant changes in impact estimates +#' +#' @description Calculates and flags whether the difference in impact estimates +#' between touchstones is greater than expected. A row is flagged if the +#' difference is greater than `threshold` \eqn{\times} the inter-quartile range +#' for cases where the IQR is greater than zero. +#' +#' @param changes_list A list of data.frames with one element per variable of +#' interest (see `variable`). Usually generated using [generate_diffs()]. +#' +#' @param iqr_df A data.frame of inter-quartile differences generated using +#' [gen_national_iqr()]. +#' +#' @param variable A string specifying the variable of interest. Must be one of +#' "deaths_averted" or "dalys_averted", and must be present as a name and +#' element of `changes_list`. +#' +#' @inheritParams gen_national_iqr +#' +#' @param threshold A single numeric value for the IQR multiplier. Defaults to +#' 100. +#' +#' @param touchstone_old The previous touchstone identifier. Defaults to +#' [DEF_TOUCHSTONE_OLD_OLD]. +#' +#' @param touchstone_new The new touchstone identifier. Defaults to +#' [DEF_TOUCHSTONE_NEW]. +#' +#' @return A filtered data.frame of differences in impact estimates flagged +#' as too large. Rows with differences within tolerance are removed. +#' +#' @keywords impact_diagnostics +#' +#' @export +flag_large_diffs <- function( + changes_list, + iqr_df, + variable = c("deaths_averted", "dalys_averted"), + group_cols = c("country", "vaccine", "activity_type"), + threshold = 100, + touchstone_old = DEF_TOUCHSTONE_OLD_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) { + checkmate::assert_list(changes_list, c("data.frame", "NULL")) + checkmate::assert_data_frame( + iqr_df, + min.rows = 1L, + min.cols = length(group_cols) + ) + + variable <- rlang::arg_match(variable) + checkmate::assert_character(group_cols, min.len = 1L, any.missing = FALSE) + + checkmate::assert_number(threshold, lower = 1.0, finite = TRUE) + + touchstone_old <- validate_ts_year(touchstone_old) + touchstone_new <- validate_ts_year(touchstone_new) + + # cross checking + has_var <- variable %in% names(changes_list) + if (!has_var) { + cli::cli_abort( + "Expected list {.code changes_list} to have an element with the name \ + {.str {variable}}, but it does not." + ) + } + df_compare <- tibble::as_tibble(changes_list[[variable]]) + + checkmate::assert_names( + colnames(df_compare), + must.include = group_cols + ) + checkmate::assert_names( + colnames(iqr_df), + must.include = group_cols + ) + + iqr_col <- glue::glue("national_iqr_{variable}") + + df_compare <- dplyr::mutate( + df_compare, + diff = .data$COMPARE - .data$BASE + ) + + iqr_df <- dplyr::select( + iqr_df, + dplyr::all_of(group_cols), + dplyr::all_of(iqr_col) + ) + + df_compare <- dplyr::left_join( + df_compare, + iqr_df, + by = group_cols + ) + + df_compare <- dplyr::mutate( + df_compare, + flag = abs(.data$diff) > threshold * .data[[iqr_col]] & .data[[iqr_col]] > 0 + ) + + df_compare <- dplyr::filter(df_compare, .data$flag) + + cols_to_select <- c( + "country", + "country_name", + "year", + "vaccine", + "modelling_group", + "activity_type", + "BASE", + "COMPARE", + "diff" + ) + + df_compare <- dplyr::select( + df_compare, + {{ cols_to_select }} + ) + + rename_lookup <- c("BASE", "COMPARE") + names(rename_lookup) <- c( + as.character(touchstone_old), + as.character(touchstone_new) + ) + df_compare <- dplyr::rename( + df_compare, + dplyr::all_of(rename_lookup) + ) + + df_compare <- dplyr::arrange(df_compare, dplyr::desc(diff)) + + df_compare +} + +#' Combine and align data from two touchstones +#' +#' @description +#' Generates a full join of two data.frames, selecting for columns of interest. +#' +#' @param prev_dat A data.frame of impact estimates corresponding to an earlier +#' touchstone. +#' +#' @param df_clean A data.frame of impact estimates corresponding to a more +#' recent touchstone. +#' +#' @param interest_cols A character vector of columns of interest. Defaults to +#' [COLNAMES_INTEREST_PRESSURE_TEST]. +#' +#' @param key_cols A character vector of columns of interest. Defaults to +#' [COLNAMES_KEY_PRESSURE_TEST]. +#' +#' @return A data.frame which is a full join of `prev_dat` and `df_clean`. +#' Columns are disambiguated with the suffixes `"_old"` and `"_new"`. +#' +#' @keywords impact_diagnostics +#' +#' @export +gen_combined_df <- function( + prev_dat, + df_clean, + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST +) { + n_expected_cols <- length(union(interest_cols, key_cols)) + checkmate::assert_data_frame( + prev_dat, + min.cols = n_expected_cols, + min.rows = 1L + ) + + checkmate::assert_data_frame( + df_clean, + min.cols = n_expected_cols, + min.rows = 1L + ) + + checkmate::assert_subset( + interest_cols, + COLNAMES_INTEREST_PRESSURE_TEST + ) + checkmate::assert_subset( + key_cols, + COLNAMES_KEY_PRESSURE_TEST + ) + + cols_to_select <- c( + "country", + "country_name", + "disease", + "vaccine", + "activity_type", + "year", + "modelling_group", + "deaths_averted_old", + "deaths_averted_new", + "dalys_averted_old", + "dalys_averted_new" + ) + + checkmate::assert_names( + colnames(prev_dat), + must.include = c(interest_cols, key_cols, "touchstone") + ) + checkmate::assert_names( + colnames(df_clean), + must.include = c(interest_cols, key_cols, "touchstone") + ) + + # check touchstones + ts_old <- validate_ts_year(unique(prev_dat$touchstone)) + ts_new <- validate_ts_year(unique(df_clean$touchstone)) + + if (ts_old == ts_new) { + cli::cli_abort( + "Touchstones for previous data and current data are the same: {ts_old}, \ + please check datasets!" + ) + } + + prev_df <- dplyr::select(prev_dat, {{ interest_cols }}) + cur_df <- dplyr::select(df_clean, {{ interest_cols }}) + + combined <- dplyr::full_join( + prev_df, + cur_df, + by = key_cols, + suffix = c("_old", "_new") + ) + + combined <- tibble::as_tibble(combined) + + checkmate::assert_names( + colnames(combined), + must.include = cols_to_select + ) + + combined <- dplyr::select( + combined, + dplyr::all_of(cols_to_select) + ) + + combined +} + +#' Compare sub-regional and national estimates +#' +#' @param df A data.frame with sub-region level data on vaccination impact +#' outcomes. +#' +#' @param outcome A string for the outcome of interest. May be one of +#' `"deaths_averted_rate"` or `"dalys_averted_rate"`. +#' +#' @param activity_filter A string for the type of vaccination activity. May be +#' one of `"campaign"` or `"routine"`. +#' +#' @return A data.frame of sub-regional vaccination impact estimates where the +#' impact is considered to be outside the tolerance limit. +#' +#' @keywords impact_diagnostics +#' +#' @export +compare_natl_subreg <- function( + df, + outcome = c("deaths_averted_rate", "dalys_averted_rate"), + activity_filter = c("campaign", "routine") +) { + checkmate::assert_data_frame( + df, + min.rows = 1L, + min.cols = length( + c(outcome, "subregion", COLNAMES_KEY_PRESSURE_TEST) + ) + ) + outcome <- rlang::arg_match(outcome) + activity_filter <- rlang::arg_match(activity_filter) + + checkmate::assert_names( + names(df), + must.include = c(outcome, "subregion", COLNAMES_KEY_PRESSURE_TEST) + ) + + df <- tibble::as_tibble(df) + + df <- dplyr::filter(df, .data$activity_type == activity_filter) + df <- dplyr::select( + df, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + "subregion", + !!outcome + ) + + # first get national summary + national_summary <- dplyr::select( + df, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + "subregion", + !!outcome + ) + national_summary <- dplyr::rename( + national_summary, + national_value = !!outcome + ) + + # next get sub-regional summary + subregional_summary <- + dplyr::group_by(df, .data$subregion, .data$disease, .data$activity_type) + + subregional_summary <- dplyr::summarise( + subregional_summary, + subregional_mean = mean(.data[[outcome]], na.rm = TRUE), + subregional_iqr = stats::IQR(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) + + comparison <- dplyr::left_join( + national_summary, + subregional_summary, + by = c("subregion", "disease") + ) + comparison <- dplyr::mutate( + comparison, + outcome = outcome, + difference = .data$national_value - .data$subregional_mean, + iqr_score = abs(.data$difference) / .data$subregional_iqr + ) + + dynamic_threshold <- stats::quantile( + comparison$iqr_score, + 0.99, + na.rm = TRUE + ) + + comparison <- dplyr::mutate( + comparison, + flag_iqr = .data$iqr_score > dynamic_threshold & .data$subregional_iqr > 0 + ) + comparison <- dplyr::filter(comparison, .data$flag_iqr) + + cols_to_select <- c( + "country_name", + "vaccine", + "year", + "modelling_group", + "national_value", + "subregional_mean", + "subregional_iqr", + "difference", + "iqr_score" + ) + comparison <- dplyr::select(comparison, {{ cols_to_select }}) + comparison <- dplyr::arrange(comparison, dplyr::desc(.data$iqr_score)) + + comparison +} + +#' Save pressure-testing diagnostics to local file +#' +#' @description +#' Save pressure-testing diagnostics data.frames to local compressed files in +#' the `.Rds` format. Input data.frames are generated by other package functions +#' and are not checked here. +#' +#' @param missing_in_current A data.frame. +#' +#' @param missing_deaths A data.frame that is the output of +#' [filter_invalid_trajectories()] with the outcome `"deaths_averted"`. +#' +#' @param missing_dalys A data.frame that is the output of +#' [filter_invalid_trajectories()] with the outcome `"dalys_averted"`. +#' +#' @param changes_deaths A data.frame that is the output of [flag_large_diffs()] +#' with the outcome `"deaths_averted"`. +#' +#' @param changes_dalys A data.frame that is the output of [flag_large_diffs()] +#' with the outcome `"dalys_averted"`. +#' +#' @param subregional_flags_deaths_camp A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"deaths_averted_rate"` for the +#' `"campaign"` activity type. +#' +#' @param subregional_flags_deaths_rout A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"deaths_averted_rate"` for the +#' `"routine"` activity type. +#' +#' @param subregional_flags_dalys_camp A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the +#' `"campaign"` activity type. +#' +#' @param subregional_flags_dalys_rout A data.frame that is the output of +#' [compare_natl_subreg()] with the outcome `"dalys_averted_rate"` for the +#' `"campaign"` activity type. +#' +#' @param output_dir A writeable directory. Defaults to "./outputs". +#' +#' @return None. Called for the convenience side-effect of saving data.frames as +#' `.Rds` format. +#' +#' @keywords impact_diagnostics +#' +#' @export +save_outputs <- function( + missing_in_current, + missing_deaths, + missing_dalys, + changes_deaths, + changes_dalys, + subregional_flags_deaths_camp, + subregional_flags_deaths_rout, + subregional_flags_dalys_camp, + subregional_flags_dalys_rout, + output_dir = here::here("outputs") +) { + # NOTE: not checking most input args as these are generated from other pkg fns + + output_dir_exists <- dir.exists(output_dir) + if (!output_dir_exists) { + cli::cli_abort( + "Expected output directory {.arg {output_dir}} but it does not exist!" + ) + } + + # NOTE: consider writing to agnostic format e.g. CSV + missing_in_current <- dplyr::select( + missing_in_current, + {{ colnames_df_missing_cols }} + ) + + filenames <- c( + "missing_in_current", + "missing_deaths", + "missing_dalys", + "changes_deaths", + "changes_dalys", + "subregional_flags_deaths_camp", + "subregional_flags_deaths_rout", + "subregional_flags_dalys_camp", + "subregional_flags_dalys_rout" + ) + + df_list <- list( + missing_in_current, + missing_deaths, + missing_dalys, + changes_deaths, + changes_dalys, + subregional_flags_deaths_camp, + subregional_flags_deaths_rout, + subregional_flags_dalys_camp, + subregional_flags_dalys_rout + ) + + Map( + df_list, + filenames, + f = function(df, name) { + saveRDS( + round_numeric(df), + file.path(output_dir, glue::glue("{name}.Rds")) + ) + } + ) +} diff --git a/R/plotting.R b/R/fn_plotting_burden_diagnostics.R similarity index 77% rename from R/plotting.R rename to R/fn_plotting_burden_diagnostics.R index 9d137ac..b9a30a6 100644 --- a/R/plotting.R +++ b/R/fn_plotting_burden_diagnostics.R @@ -1,72 +1,7 @@ -#' Plotting theme for vimcheck -#' -#' @description -#' A simple plotting theme building on [ggplot2::theme_bw()]. -#' -#' @name plotting_theme -#' @rdname plotting_theme -#' -#' @param x_text_angle The angle for X-axis labels. Defaults to 45 degrees. -#' -#' @param y_text_angle The angle for Y-axis labels. Defaults to 0 degrees. -#' -#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to -#' [ggplot2::theme()]. These will be applied in addition to, or in place of, -#' pre-existing elements defined by this theme. See the examples for this -#' theme's appearance. -#' -#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects. -#' -#' @keywords plotting -#' -#' @examples -#' # using an inbuilt dataset -#' data(mtcars) -#' -#' # standard theme -#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + -#' ggplot2::geom_point() + -#' theme_vimc() -#' -#' # with X-axis suppression -#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + -#' ggplot2::geom_point() + -#' theme_vimc_noxaxis() -#' -#' @export -theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { - ggplot2::theme_bw() + - ggplot2::theme( - axis.text.x = ggplot2::element_text( - size = 10, - angle = x_text_angle - ), - strip.text.y = ggplot2::element_text( - angle = y_text_angle - ), - plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"), - ... - ) -} - -#' @name plotting_theme -#' -#' @importFrom ggplot2 '%+replace%' -#' -#' @export -theme_vimc_noxaxis <- function() { - theme_vimc() %+replace% - ggplot2::theme( - axis.title.x = ggplot2::element_blank(), - axis.text.x = ggplot2::element_blank(), - axis.ticks.x = ggplot2::element_blank() - ) -} - #' Plot burden and impact diagnostics #' -#' @name plotting -#' @rdname plotting +#' @name plot_burden_diagnostics +#' @rdname plot_burden_diagnostics #' #' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid #' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars @@ -75,9 +10,9 @@ theme_vimc_noxaxis <- function() { #' @importFrom rlang .data #' #' @description -#' Plotting functions for burden and impact diagnostics. All functions operate +#' Plotting functions for burden diagnostics. All functions operate #' on data prepared for plotting by a corresponding -#' [plotting-preparation function][plotting_prep]. +#' [plotting-preparation function][plot_prep_burden_diagnostics]. #' #' @param fig_number The figure number displayed in the plot title. #' @@ -121,7 +56,7 @@ plot_compare_demography <- function(data, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param burden_age A `` with the minimum column names #' "age", "value_millions", "burden_outcome", and "scenario"; expected to be the @@ -154,7 +89,7 @@ plot_age_patterns <- function(burden_age, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param burden_decades A `` giving the burden by decade, up to #' `year_max`; expected to be the output of [prep_plot_burden_decades()]. @@ -182,7 +117,7 @@ plot_global_burden_decades <- function(burden_decades, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param burden_data This is expected to be a `` from a #' nested-`` constructed using [prep_plot_global_burden()]. @@ -226,7 +161,7 @@ plot_global_burden <- function(burden_data, outcome_name, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param coverage_set A `` that is the output of #' [prep_plot_coverage_set()]. @@ -271,7 +206,7 @@ plot_coverage_set <- function(coverage_set, fig_number) { g } -#' @name plotting +#' @name plot_burden_diagnostics #' #' @param fvp_data A `` of estimates of fully-vaccinated persons (FVPs) #' per scenario, with scenarios as factors in order of the number of diff --git a/R/fn_plotting_helpers.R b/R/fn_plotting_helpers.R new file mode 100644 index 0000000..4d14cce --- /dev/null +++ b/R/fn_plotting_helpers.R @@ -0,0 +1,64 @@ +#' Plotting theme for vimcheck +#' +#' @description +#' A simple plotting theme building on [ggplot2::theme_bw()]. +#' +#' @name plotting_theme +#' @rdname plotting_theme +#' +#' @param x_text_angle The angle for X-axis labels. Defaults to 45 degrees. +#' +#' @param y_text_angle The angle for Y-axis labels. Defaults to 0 degrees. +#' +#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to +#' [ggplot2::theme()]. These will be applied in addition to, or in place of, +#' pre-existing elements defined by this theme. See the examples for this +#' theme's appearance. +#' +#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects. +#' +#' @keywords plotting +#' +#' @examples +#' # using an inbuilt dataset +#' data(mtcars) +#' +#' # standard theme +#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + +#' ggplot2::geom_point() + +#' theme_vimc() +#' +#' # with X-axis suppression +#' ggplot2::ggplot(mtcars, ggplot2::aes(disp, mpg)) + +#' ggplot2::geom_point() + +#' theme_vimc_noxaxis() +#' +#' @export +theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { + ggplot2::theme_bw() + + ggplot2::theme( + axis.text.x = ggplot2::element_text( + size = 10, + angle = x_text_angle + ), + strip.text.y = ggplot2::element_text( + angle = y_text_angle + ), + plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"), + ... + ) +} + +#' @name plotting_theme +#' +#' @importFrom ggplot2 '%+replace%' +#' +#' @export +theme_vimc_noxaxis <- function() { + theme_vimc() %+replace% + ggplot2::theme( + axis.title.x = ggplot2::element_blank(), + axis.text.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank() + ) +} diff --git a/R/fn_plotting_impact_diagnostics.R b/R/fn_plotting_impact_diagnostics.R new file mode 100644 index 0000000..273628e --- /dev/null +++ b/R/fn_plotting_impact_diagnostics.R @@ -0,0 +1,286 @@ +#' Create impact diagnostics plots +#' +#' @description +#' Functions that create impact diagnostics plots (or plotting objects). All +#' functions are associated with one other upstream data processing function, +#' and can be used in a pipe with that function. Where appropriate, outcome +#' selection and label preparation is automated to reduce function arguments. +#' +#' @name plot_impact_diagnostics +#' @rdname plot_impact_diagnostics +#' +#' @importFrom ggplot2 ggplot aes geom_col geom_hline facet_wrap facet_grid +#' scale_fill_distiller scale_x_continuous scale_y_continuous labs vars +#' labeller label_wrap_gen theme geom_segment geom_point +#' +#' @importFrom rlang .data +#' +#' @description +#' Plotting functions for impact diagnostics. See +#' [plotting-preparation functions][plot_prep_impact_diagnostics] for a set of +#' helper functions that prepare impact diagnostics for plotting. See the +#' details of the `data` argument for functions that help to prepare the data. +#' +#' @param data A data.frame suitable for plotting. +#' +#' - `plot_sig_diff()`: Output of [`flag_large_diff()`][flag_large_diffs]. +#' +#' - `plot_diff()`: Output of [`gen_combined_df()`][gen_combined_df]. +#' +#' - `plot_modelling_group_variation()`: Output of +#' [`plot_prep_mod_grp_varn()`][plot_prep_impact_diagnostics]. +#' +#' - `plot_vaccine_gavi()`: Output of +#' [`plot_prep_vax_gavi()`][plot_prep_impact_diagnostics] +#' +#' - `plot_cumul()`: Output of +#' [`plot_prep_cumul()`][plot_prep_impact_diagnostics] +#' +#' @param outcome A string for the impact outcome. One of [IMPACT_OUTCOMES]. +#' +#' @return A `` object that can be viewed or saved. +#' +#' @export +plot_sig_diff <- function(data, outcome = IMPACT_OUTCOMES) { + checkmate::assert_tibble(data) + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) + + # retained here as this is a small df and a small operation + data$label <- glue::glue( + "{data$country_name} | {data$vaccine} | {data$activity_type} | {data$year}" + ) + + ggplot( + data, + aes( + .data$diff, + stats::reorder(.data$label, .data$diff), + color = .data$modelling_group + ) + ) + + geom_segment( + aes(x = 0, xend = .data$diff, y = .data$label, yend = .data$label), + linewidth = 1 + ) + + geom_point(size = 2) + + labs( + x = "Difference", + y = NULL, + title = glue::glue( + "Significant Differences in {outcome} by Country, Vaccine, \\ + Activity Type and Year" + ) + ) + + theme_vimc(x_text_angle = 0) +} + +#' @name plot_impact_diagnostics +#' +#' @param group_vars A single string for the grouping variables. May be any of +#' [IMPACT_OUTCOMES], which are `"activity_type"` and `"vaccine"`. +#' +#' @param touchstone_old A string for the previous touchstone in +#' format `"YYYYMM"`. Defaults to [DEF_TOUCHSTONE_OLD]. +#' +#' @param touchstone_new A string for the current or new touchstone in +#' format `"YYYYMM"`. Defaults to [DEF_TOUCHSTONE_NEW]. +#' +#' @export +plot_diff <- function( + data, + outcome = IMPACT_OUTCOMES, + group_vars = IMPACT_GROUP_VARS, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) { + checkmate::assert_tibble(data) + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) + checkmate::assert_subset( + group_vars, + IMPACT_GROUP_VARS + ) + + touchstone_old <- validate_ts_year(touchstone_old) + touchstone_new <- validate_ts_year(touchstone_new) + + x_var <- glue::glue("{outcome}_new") + y_var <- glue::glue("{outcome}_old") + + # small operations retained + # NOTE: data masking using `{{` does not appear to work + # see last example in https://dplyr.tidyverse.org/reference/filter.html + # + # NOTE: exclude values < 1 to prevent log transform errors + data <- dplyr::filter_out( + data, + dplyr::when_any( + is.na(.data[[x_var]]), + is.na(.data[[y_var]]), + .data[[x_var]] < 1, + .data[[y_var]] < 1 + ) + ) + + # nolint start + n_facets <- nrow( + dplyr::distinct( + data, + .data$activity_type, + .data$vaccine + ) + ) + # nolint end + + ncol_dynamic <- dplyr::case_when( + n_facets <= 4 ~ 2, + n_facets <= 9 ~ 3, + n_facets <= 16 ~ 4, + n_facets <= 25 ~ 6, + TRUE ~ 8 + ) + + p <- ggplot( + data, + aes(.data[[x_var]], .data[[y_var]]) + ) + + ggplot2::geom_point(alpha = 0.5, colour = COLOUR_VIMC) + + ggplot2::geom_abline(slope = 1, intercept = 0, linetype = "dashed") + + facet_wrap( + facets = c("activity_type", "vaccine"), + scales = "free", + ncol = ncol_dynamic + ) + + ggplot2::scale_x_log10() + + ggplot2::scale_y_log10() + + theme_vimc(0) + + theme( + strip.text = ggplot2::element_text(size = 7), + panel.spacing = ggplot2::unit(0.05, "lines"), + axis.text = ggplot2::element_text(size = 6.5) + ) + + labs( + title = glue::glue("{outcome}: Current vs Previous Report"), + x = glue::glue("{touchstone_new} - {outcome}"), + y = glue::glue("{touchstone_old} - {outcome}") + ) + + p +} + +#' @name plot_impact_diagnostics +#' +#' @export +plot_modelling_group_variation <- function(data) { + checkmate::assert_tibble(data, min.rows = 1L, min.cols = 1L) + + outcome <- unique(data[["outcome_name"]]) + checkmate::assert_string(outcome) + + outcome_short <- stringr::word(outcome, sep = "_") + outcome_short <- dplyr::if_else( + outcome_short == "dalys", + stringr::str_to_upper(outcome_short), + outcome_short + ) + x_lab <- glue::glue("Burden averted ({outcome_short})") + + # for scales formatting + .x <- NULL + + ggplot(data) + + aes( + fill = as.character(.data$mod_num), + x = .data$adj_outc, + y = stats::reorder(.data$vaccine, .data$mean_outc) + ) + + ggridges::geom_density_ridges( + alpha = 0.5, + stat = "binline", + bins = 200, + draw_baseline = FALSE + ) + + facet_grid(cols = ggplot2::vars("activity_type"), scales = "fixed") + + ggplot2::scale_x_log10( + breaks = scales::trans_breaks("log10", function(x) 10^x), + labels = scales::trans_format("log10", scales::math_format(10^.x)) + ) + + ggplot2::scale_fill_viridis_d() + + theme_vimc() + + theme( + legend.position = "none", + axis.text.x = ggplot2::element_text(angle = 90, hjust = 1) + ) + + labs( + x = x_lab, + y = "Vaccine" + ) +} + +# Gavi plot - future deaths and DALYS averted, 2021-2024 +# (current time window Gavi looking at, can be amended) +#' @name plot_impact_diagnostics +#' +#' @export +plot_vaccine_gavi <- function(data) { + checkmate::assert_tibble(data) + outcome <- unique(data[["outcome_name"]]) + + ggplot( + data, + aes( + x = stats::reorder(.data$disease, .data$yearly_outcome), + y = .data$yearly_outcome, + fill = factor(.data$year) + ) + ) + + geom_col(position = "dodge") + + ggplot2::scale_fill_manual( + values = c( + "2021" = "#008080", + "2022" = "#E68424", + "2023" = "#9573B5", + "2024" = "#A1D15C" + ) + ) + + facet_wrap(~dataset, scales = "free_y") + + scale_y_continuous(labels = scales::scientific) + + theme_vimc() + + theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + + labs(x = "Disease", y = paste("Impact -", outcome), fill = "Year") +} + +# Gavi Cumulative Plot (modelling group + average) +#' @name plot_impact_diagnostics +#' +#' @export +plot_cumul <- function(data) { + checkmate::assert_tibble(data) + outcome <- unique(data[["outcome_name"]]) + disease <- unique(data[["disease"]]) + + p <- ggplot( + data, + aes( + x = .data$year, + y = .data$value, + color = .data$modelling_group, + linetype = .data$line_type + ) + ) + + ggplot2::geom_step(direction = "hv", linewidth = 0.7, alpha = 0.9) + + ggplot2::scale_linetype_manual( + values = c(solid = "solid", dashed = "dashed") + ) + + ggplot2::guides(linetype = "none") + + scale_y_continuous(labels = scales::scientific) + + theme_vimc() + + labs( + x = "Year", + y = paste("Cumulative", outcome), + color = "Modelling Group", + title = paste("Cumulative", outcome, "Over Time -", disease) + ) + + theme(legend.position = "bottom") + + p +} diff --git a/R/plotting_prep.R b/R/fn_plotting_prep_bur_diag.R similarity index 91% rename from R/plotting_prep.R rename to R/fn_plotting_prep_bur_diag.R index 277c083..b76b29f 100644 --- a/R/plotting_prep.R +++ b/R/fn_plotting_prep_bur_diag.R @@ -1,12 +1,13 @@ #' Prepare data for plotting #' -#' @name plotting_prep -#' @rdname plotting_prep +#' @name plot_prep_burden_diagnostics +#' @rdname plot_prep_burden_diagnostics #' #' @description #' Transform burden estimate data from modelling groups to make them suitable -#' for plotting using an appropriate [plotting function][plotting]. Each -#' preparation function corresponds to a plotting function. +#' for plotting using an appropriate +#' [plotting function][plot_prep_burden_diagnostics]. Each preparation function +#' corresponds to a plotting function. #' #' @param burden For `prep_plot_demography()`, a `` output from #' [check_demography_alignment()]. @@ -31,6 +32,8 @@ #' #' - For `prep_plot_fvp()`: WIP. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_demography <- function(burden) { checkmate::assert_tibble(burden) @@ -88,7 +91,9 @@ prep_plot_demography <- function(burden) { burden_long } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics +#' +#' @keywords plot_prep_burden_diagnostics #' #' @export prep_plot_age <- function(burden) { @@ -109,11 +114,13 @@ prep_plot_age <- function(burden) { burden_summary } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics #' #' @param year_max The maximum year to be represented in a subsequent figure. #' For `prep_plot_burden_decades()`, must be a decade, i.e., multiple of 10. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_burden_decades <- function(burden, year_max) { checkmate::assert_tibble(burden) @@ -158,7 +165,9 @@ prep_plot_burden_decades <- function(burden, year_max) { burden_data } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics +#' +#' @keywords plot_prep_burden_diagnostics #' #' @export prep_plot_global_burden <- function(burden) { @@ -184,10 +193,12 @@ prep_plot_global_burden <- function(burden) { burden_nested } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics #' #' @param coverage WIP. Coverage data. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_coverage_set <- function(coverage) { checkmate::assert_tibble(coverage) @@ -245,12 +256,14 @@ prep_plot_coverage_set <- function(coverage) { coverage_set } -#' @name plotting_prep +#' @name plot_prep_burden_diagnostics #' #' @param fvp WIP. Data on counts of fully vaccinated persons. #' #' @param year_min Minimum year. #' +#' @keywords plot_prep_burden_diagnostics +#' #' @export prep_plot_fvp <- function(fvp, year_min, year_max) { checkmate::assert_tibble(fvp) diff --git a/R/fn_plotting_prep_impact_diagnostics.R b/R/fn_plotting_prep_impact_diagnostics.R new file mode 100644 index 0000000..272771a --- /dev/null +++ b/R/fn_plotting_prep_impact_diagnostics.R @@ -0,0 +1,337 @@ +#' Prepare impact diagnostics for plotting +#' +#' @name plot_prep_impact_diagnostics +#' @rdname plot_prep_impact_diagnostics +#' +#' @description +#' A suite of helper functions that sit between impact diagnostics functions and +#' plotting functions. These functions transform and aggregate impact estimates +#' to prepare them for visualisation. Functions have basic checks on input data +#' but otherwise assume users will not modify inputs. +#' +#' @param df2 A `` of impact estimates with at least columns +#' `modelling_group`, `vaccine`, outcome variable, and `fvps` (doses +#' delivered). Used as the primary data source for calculations in +#' [prep_plot_mod_grp_varn()]. +#' +#' @param df3 A `` of modelling group and vaccine combinations, +#' typically with one row per modelling group per vaccine. Joined with `df2` +#' to ensure complete group coverage in [prep_plot_mod_grp_varn()]. +#' +#' @param data A `` of impact estimates with columns including at least +#' those in [COLNAMES_KEY_PRESSURE_TEST], the outcome variable, and +#' potentially outcome-specific columns (for [prep_plot_cumul()]). Used in +#' [prep_plot_vax_gavi()] and [prep_plot_cumul()]. +#' +#' @param prev_data A `` of impact estimates from a previous touchstone, +#' used as a comparison baseline in [prep_plot_vax_gavi()]. Should have the +#' same structure as `data`. +#' +#' @param outcome A character string for the impact outcome. Must be one of +#' `"deaths_averted"` or `"dalys_averted"`. For [prep_plot_cumul()], +#' `data` must include columns named `{outcome}_old` and `{outcome}_new`. +#' +#' @param disease A character string specifying a single disease for filtering +#' in [prep_plot_cumul()]. +#' +#' @param touchstone_old A six-character touchstone identifier (YYYYMM format) +#' for the previous dataset. Defaults to [DEF_TOUCHSTONE_OLD]. Used in +#' [prep_plot_vax_gavi()] and [prep_plot_cumul()]. +#' +#' @param touchstone_new A six-character touchstone identifier (YYYYMM format) +#' for the current dataset. Defaults to [DEF_TOUCHSTONE_NEW]. Used in +#' [prep_plot_vax_gavi()] and [prep_plot_cumul()]. +#' +#' @importFrom rlang := +#' +#' @return +#' +#' - [prep_plot_mod_grp_varn()] returns a grouped `` (grouped by +#' `vaccine`) with all columns from `df2` and `df3` plus derived columns: +#' `adj_outc` (adjusted outcome with small offset), `outcome_name` (input +#' outcome), and `mean_outc` (vaccine-level weighted mean outcome). +#' +#' - [prep_plot_vax_gavi()] returns a `` with columns `disease`, +#' `year`, `yearly_outcome`, `dataset` (factor with levels for old touchstone, +#' "Difference", and new touchstone), and `outcome_name`. Summarizes outcomes +#' by disease and year across two touchstones. +#' +#' - [prep_plot_cumul()] returns a `` with columns `year`, +#' `modelling_group`, `touchstone`, `value` (cumulative or average outcome), +#' `line_type` ("solid" for individual models, "dashed" for model average), +#' and `outcome_name`. Returns `NULL` if the specified disease has no non-zero +#' data to plot. +#' +#' @export +prep_plot_mod_grp_varn <- function(df2, df3, outcome = IMPACT_OUTCOMES) { + checkmate::assert_tibble(df2, min.rows = 1L, min.cols = 1L) + checkmate::assert_tibble(df3, min.rows = 1L, min.cols = 1L) + + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) + + offset_manual <- 1e-6 + df_combined <- dplyr::left_join( + df2, + df3, + by = c("modelling_group", "vaccine") + ) + + df_combined <- dplyr::mutate( + df_combined, + adj_outc = .data[[outcome]] + offset_manual, + outcome_name = outcome + ) + + df_combined <- dplyr::group_by( + df_combined, + .data$vaccine + ) + + df_combined <- dplyr::mutate( + df_combined, + mean_outc = stats::weighted.mean(.data$adj_outc, .data$fvps, na.rm = TRUE) + ) + + df_combined +} + +#' @name plot_prep_impact_diagnostics +#' +#' @param data A `` of impact estimates with columns including at least +#' those in [COLNAMES_KEY_PRESSURE_TEST], the outcome variable, and +#' potentially other columns for analysis. +#' +#' @param prev_data A `` of impact estimates from a previous touchstone, +#' used as a comparison baseline. Should have the same structure as `data`. +#' +#' @export +prep_plot_vax_gavi <- function( + data, + prev_data, + outcome = IMPACT_OUTCOMES, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) { + checkmate::assert_tibble(data) + checkmate::assert_tibble(prev_data) + outcome <- rlang::arg_match(outcome, IMPACT_OUTCOMES) + touchstone_old <- validate_ts_year(touchstone_old) + touchstone_new <- validate_ts_year(touchstone_new) + + df_list <- Map( + list(data, prev_data), + list(touchstone_new, touchstone_old), + f = function(df, ts_id) { + df <- dplyr::select( + df, + dplyr::all_of(COLNAMES_KEY_PRESSURE_TEST), + {{ outcome }} + ) + + df <- dplyr::filter( + df, + dplyr::between(.data$year, 2021, 2024) + ) + + df <- dplyr::filter_out( + df, + grepl("COVID", .data$disease, ignore.case = TRUE) + ) + + df <- dplyr::group_by(df, .data$disease, .data$year) + + df <- dplyr::summarise( + df, + yearly_outcome = sum(.data[[outcome]], na.rm = TRUE), + .groups = "drop" + ) + + df <- dplyr::mutate( + df, + dataset = as.character(ts_id) + ) + } + ) + + df_combined <- dplyr::bind_rows(df_list) + + df_diff <- Reduce( + df_list, + f = function(x, y) { + dplyr::left_join( + x, + y, + by = c("disease", "year"), + suffix = c("_curr", "_prev") + ) + } + ) + + df_diff <- dplyr::mutate( + df_diff, + yearly_outcome = .data$yearly_outcome_curr - .data$yearly_outcome_prev, + dataset = "Difference" + ) + cols_to_select <- c("disease", "year", "yearly_outcome", "dataset") + df_diff <- dplyr::select(df_diff, {{ cols_to_select }}) + + df_combined <- dplyr::bind_rows(df_combined, df_diff) + + df_combined$dataset <- factor( + df_combined$dataset, + levels = c( + as.character(touchstone_old), + "Difference", + as.character(touchstone_new) + ) + ) + + df_combined$outcome_name <- outcome + + df_combined +} + +#' @name plot_prep_impact_diagnostics +#' +#' @param disease A character string specifying a single disease for filtering +#' and analysis. +#' +#' @export +prep_plot_cumul <- function( + data, + outcome, + disease, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) { + checkmate::assert_tibble(data) + checkmate::assert_subset( + outcome, + IMPACT_OUTCOMES + ) + + outcome_cols <- colnames(data)[stringr::str_detect( + colnames(data), + glue::glue("^{outcome}_") + )] + + cum_col <- glue::glue("cum_{outcome}") + avg_col <- glue::glue("avg_{outcome}") + + combined2 <- dplyr::select( + data, + {{ COLNAMES_KEY_PRESSURE_TEST }}, + {{ outcome_cols }} + ) + combined2 <- combined2[combined2$disease == disease, ] + + combined2 <- tidyr::pivot_longer( + combined2, + cols = dplyr::all_of(outcome_cols), + names_to = "touchstone", + values_to = "value" + ) + + combined2 <- dplyr::mutate( + combined2, + touchstone = stringr::str_remove( + .data$touchstone, + glue::glue("^{outcome}_") + ), + touchstone = dplyr::replace_values( + .data$touchstone, + from = c("old", "new"), + to = as.character(c(touchstone_old, touchstone_new)) + ), + touchstone = factor( + .data$touchstone, + levels = as.character(c(touchstone_old, touchstone_new)) + ) + ) + + # Cumulative values by modelling group + df_cum <- dplyr::group_by( + combined2, + .data$modelling_group, + .data$touchstone + ) + df_cum <- tidyr::complete( + df_cum, + year = tidyr::full_seq(.data$year, 1) + ) + df_cum <- dplyr::arrange(df_cum, .data$year) + df_cum <- dplyr::mutate( + df_cum, + first_valid = min(.data$year[!is.na(.data$value)]), + {{ cum_col }} := dplyr::if_else( + .data$year < .data$first_valid, + NA_real_, + cumsum(tidyr::replace_na(.data$value, 0.0)) + ) + ) + + df_cum$first_valid <- NULL + df_cum <- dplyr::ungroup(df_cum) + df_cum <- dplyr::mutate( + df_cum, + modelling_group = glue::glue("{.data$modelling_group}-{.data$touchstone}") + ) + + # Model average + df_avg <- dplyr::summarise( + df_cum, + {{ avg_col }} := mean({{ cum_col }}, na.rm = TRUE), + n_models = sum(!is.na({{ cum_col }})), + .by = c("year", "touchstone") + ) + df_avg <- dplyr::filter( + df_avg, + .data$n_models >= 1 + ) + df_avg <- dplyr::mutate( + df_avg, + modelling_group = glue::glue( + "Model Average-{.data$touchstone}" + ) + ) + + # Combine for plot + cols_to_select <- c("year", "modelling_group", "touchstone") + df_plot <- dplyr::bind_rows( + dplyr::select( + df_cum, + {{ cols_to_select }}, + value = {{ cum_col }} + ), + dplyr::select( + df_avg, + {{ cols_to_select }}, + value = {{ avg_col }} + ) + ) + + df_plot <- dplyr::group_by(df_plot, .data$modelling_group) + df_plot <- dplyr::filter( + df_plot, + sum(.data$value, na.rm = TRUE) > 0 + ) + df_plot <- dplyr::ungroup(df_plot) + df_plot <- dplyr::mutate( + df_plot, + line_type = dplyr::if_else( + grepl("Model Average", .data$modelling_group, fixed = TRUE), + "dashed", + "solid" + ) + ) + + # add outcome name + df_plot$outcome_name <- outcome + + if (nrow(df_plot) == 0 || all(df_plot$value == 0)) { + message("No non-zero data to plot for ", disease, ". Skipping plot.") + return(NULL) + } + + df_plot +} diff --git a/R/helpers.R b/R/helpers.R deleted file mode 100644 index 7557d02..0000000 --- a/R/helpers.R +++ /dev/null @@ -1,37 +0,0 @@ -#' Make data for a no-vaccination scenario -#' -#' @name helpers -#' @rdname helpers -#' -#' @description -#' Helper functions for burden diagnostics. -#' -#' @inheritParams validate_file_dict_template -#' -#' @keywords internal -#' -#' @return -#' -#' - `make_novax_scenario()` returns a tibble with the minimum required column -#' names, and entries corresponding to a 'no-vaccination' scenario for -#' `disease`. -make_novax_scenario <- function(disease) { - v <- c( - "novac", - "No Vaccination", - glue::glue("{disease}-no-vaccination"), - "No vaccination", - "no-vaccination.csv" - ) - - # internal function without input checking - df_ <- dplyr::tibble( - variable = file_dict_colnames, - value = v - ) - - tidyr::pivot_wider( - df_, - names_from = "variable" - ) -} diff --git a/_pkgdown.yml b/_pkgdown.yml index bbc0826..40a29ee 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -7,28 +7,44 @@ reference: - title: Package-level documentation contents: - has_keyword("package_doc") - - title: Diagnostic functions - desc: Package diagnostic functions. + + - title: Burden estimate diagnostics + + - subtitle: Check burden estimates + contents: + - has_keyword("burden_diagnostics") + - subtitle: Prepare burden estimates for plotting contents: - - has_keyword("diagnostics") - - title: Plotting prepartion - desc: Prepare validated data for plotting. + - plot_prep_burden_diagnostics + - subtitle: Plot burden estimates + contents: + - plot_burden_diagnostics + + + - title: Impact estimate diagnostics + + - subtitle: Check impact estimates contents: - - plotting_prep - - title: Plotting functions - desc: Package plotting functions. + - has_keyword("impact_diagnostics") + - subtitle: Prepare impact estimate checks for plotting + contents: + - plot_prep_impact_diagnostics + - subtitle: Plot impact estimates + contents: + - plot_impact_diagnostics + + - title: Plotting helper functions contents: - - plotting - plotting_theme + - title: Internal functions - desc: Internal helper functions. contents: - has_keyword("internal") - - title: Data - desc: Package data. + + - title: Package data contents: - has_keyword("data") - - title: Constants - desc: Package constants. + + - title: Package constants contents: - has_keyword("constants") diff --git a/data-raw/eg_impact.R b/data-raw/eg_impact.R new file mode 100644 index 0000000..29e5c27 --- /dev/null +++ b/data-raw/eg_impact.R @@ -0,0 +1,28 @@ +## code to prepare `eg_impact` dataset goes here + +library(dplyr) +library(countrycode) + +eg_impact <- rbind( + readRDS("inst/extdata/impact_method2a.rds"), + readRDS("inst/extdata/impact_method2b.rds") +) + +# assign disease inferred from vaccine names +# infer MCV to be for MenA from report +eg_impact <- mutate( + eg_impact, + disease = case_when( + vaccine %in% c("HepB", "HepB_BD") ~ "HepB", + vaccine %in% c("MCV1", "MCV2") ~ "MenA", + .default = vaccine + ) +) + +# infer country name from ISO code (only includes PINE but for future-proofing) +eg_impact <- mutate( + eg_impact, + country_name = countrycode::countrycode(country, "iso3c", "country.name") +) + +usethis::use_data(eg_impact, overwrite = TRUE) diff --git a/data-raw/who_subregions.R b/data-raw/who_subregions.R new file mode 100644 index 0000000..41e00f8 --- /dev/null +++ b/data-raw/who_subregions.R @@ -0,0 +1,8 @@ +## code to prepare `who_subregions` dataset goes here + +library(readr) +library(dplyr) + +who_subregions <- read_csv("inst/extdata/who_sub_regions.csv") + +usethis::use_data(who_subregions, overwrite = TRUE) diff --git a/data/eg_impact.rda b/data/eg_impact.rda new file mode 100644 index 0000000..9afab65 Binary files /dev/null and b/data/eg_impact.rda differ diff --git a/data/who_subregions.rda b/data/who_subregions.rda new file mode 100644 index 0000000..292520d Binary files /dev/null and b/data/who_subregions.rda differ diff --git a/inst/WORDLIST b/inst/WORDLIST index 69da54f..380dd4c 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -7,6 +7,7 @@ FVPs GAVI GHA Hallett +IQRs Jaspreet Kaja Londono @@ -14,14 +15,38 @@ Nayagam ORCID Shevanthi Susy +Tidyverse Toor VIMC +VIMC's WIP WPP YLLs +YYYYMM autogenerated +cli +cumul +dalys +devtools +df +diffdf +eg +erroring facetted +gavi ggplot +grp +iqr +pkgdown +roxygen +sig +subreg +testthat tibble tibbles -timeseries +ts +usethis +varn +vax +vdiffr +vimpact diff --git a/inst/extdata/impact_method2a.rds b/inst/extdata/impact_method2a.rds new file mode 100644 index 0000000..6bb4bef Binary files /dev/null and b/inst/extdata/impact_method2a.rds differ diff --git a/inst/extdata/impact_method2b.rds b/inst/extdata/impact_method2b.rds new file mode 100644 index 0000000..f3650a9 Binary files /dev/null and b/inst/extdata/impact_method2b.rds differ diff --git a/inst/extdata/who_sub_regions.csv b/inst/extdata/who_sub_regions.csv new file mode 100644 index 0000000..051ab6b --- /dev/null +++ b/inst/extdata/who_sub_regions.csv @@ -0,0 +1,250 @@ +choice_subregion,country_name,country,Global.Name,Region.Name,Sub.region,Intermediate.Region.Name,subregion, vimc117 +EMR D,Afghanistan,AFG,World,Asia,Southern Asia,,Central and Southern Asia,1 +AFR D,Angola,AGO,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,1 +EUR B,Albania,ALB,World,Europe,Southern Europe,,Eastern and Southern Europe,1 +EUR A,Andorra,AND,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +EMR B,United Arab Emirates,ARE,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +AMR B,Argentina,ARG,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +EUR B,Armenia,ARM,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +AMR B,Antigua and Barbuda,ATG,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +WPR A,Australia,AUS,World,Oceania,Australia and New Zealand,,Oceania,0 +EUR A,Austria,AUT,World,Europe,Western Europe,,Northern and Western Europe,0 +EUR B,Azerbaijan,AZE,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +AFR E,Burundi,BDI,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +EUR A,Belgium,BEL,World,Europe,Western Europe,,Northern and Western Europe,0 +AFR D,Benin,BEN,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Burkina Faso,BFA,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +SEAR D,Bangladesh,BGD,World,Asia,Southern Asia,,Central and Southern Asia,1 +EUR B,Bulgaria,BGR,World,Europe,Eastern Europe,,Eastern and Southern Europe,0 +EMR B,Bahrain,BHR,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +AMR B,Bahamas,BHS,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +EUR B,Bosnia and Herzegovina,BIH,World,Europe,Southern Europe,,Eastern and Southern Europe,1 +EUR C,Belarus,BLR,World,Europe,Eastern Europe,,Eastern and Southern Europe,1 +AMR B,Belize,BLZ,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,1 +AMR D,Bolivia (Plurinational State of),BOL,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,1 +AMR B,Brazil,BRA,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +AMR B,Barbados,BRB,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +WPR A,Brunei Darussalam,BRN,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,0 +SEAR D,Bhutan,BTN,World,Asia,Southern Asia,,Central and Southern Asia,1 +AFR E,Botswana,BWA,World,Africa,Sub-Saharan Africa,Southern Africa,Southern Africa,0 +AFR E,Central African Republic,CAF,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,1 +AMR A,Canada,CAN,World,Americas,Northern America,,Northern America,0 +EUR A,Switzerland,CHE,World,Europe,Western Europe,,Northern and Western Europe,0 +AMR B,Chile,CHL,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +WPR B,China,CHN,World,Asia,Eastern Asia,,Eastern and South-Eastern Asia,1 +AFR E,Cote d'Ivoire,CIV,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Cameroon,CMR,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,1 +AFR E,Democratic Republic of the Congo,COD,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,1 +AFR E,Congo,COG,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,1 +WPR B,Cook Islands,COK,World,Oceania,Polynesia,,Oceania,0 +AMR B,Colombia,COL,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,1 +AFR D,Comoros,COM,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +AFR D,Cape Verde,CPV,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AMR B,Costa Rica,CRI,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,0 +AMR A,Cuba,CUB,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,1 +EUR A,Cyprus,CYP,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +EUR A,Czech Republic,CZE,World,Europe,Eastern Europe,,Eastern and Southern Europe,0 +EUR A,Germany,DEU,World,Europe,Western Europe,,Northern and Western Europe,0 +EMR D,Djibouti,DJI,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +AMR B,Dominica,DMA,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,1 +EUR A,Denmark,DNK,World,Europe,Northern Europe,,Northern and Western Europe,0 +AMR B,Dominican Republic,DOM,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +AFR D,Algeria,DZA,World,Africa,Northern Africa,,Northern Africa and Western Asia,1 +AMR D,Ecuador,ECU,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,1 +EMR D,Egypt,EGY,World,Africa,Northern Africa,,Northern Africa and Western Asia,1 +AFR E,Eritrea,ERI,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +EUR A,Spain,ESP,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +EUR C,Estonia,EST,World,Europe,Northern Europe,,Northern and Western Europe,0 +AFR E,Ethiopia,ETH,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +EUR A,Finland,FIN,World,Europe,Northern Europe,,Northern and Western Europe,0 +WPR B,Fiji,FJI,World,Oceania,Melanesia,,Oceania,1 +EUR A,France,FRA,World,Europe,Western Europe,,Northern and Western Europe,0 +WPR B,Micronesia (Federated States of),FSM,World,Oceania,Micronesia,,Oceania,1 +AFR D,Gabon,GAB,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,0 +EUR A,United Kingdom,GBR,World,Europe,Northern Europe,,Northern and Western Europe,0 +EUR B,Georgia,GEO,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +AFR D,Ghana,GHA,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Guinea,GIN,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Gambia,GMB,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Guinea-Bissau,GNB,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Equatorial Guinea,GNQ,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,0 +EUR A,Greece,GRC,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +AMR B,Grenada,GRD,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,1 +AMR D,Guatemala,GTM,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,1 +AMR B,Guyana,GUY,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,1 +AMR B,Honduras,HND,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,1 +EUR A,Croatia,HRV,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +AMR D,Haiti,HTI,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,1 +EUR C,Hungary,HUN,World,Europe,Eastern Europe,,Eastern and Southern Europe,0 +SEAR B,Indonesia,IDN,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +SEAR D,India,IND,World,Asia,Southern Asia,,Central and Southern Asia,1 +EUR A,Ireland,IRL,World,Europe,Northern Europe,,Northern and Western Europe,0 +EMR B,Iran (Islamic Republic of),IRN,World,Asia,Southern Asia,,Central and Southern Asia,1 +EMR D,Iraq,IRQ,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +EUR A,Iceland,ISL,World,Europe,Northern Europe,,Northern and Western Europe,0 +EUR A,Israel,ISR,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +EUR A,Italy,ITA,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +AMR B,Jamaica,JAM,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,1 +EMR B,Jordan,JOR,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +WPR A,Japan,JPN,World,Asia,Eastern Asia,,Eastern and South-Eastern Asia,0 +EUR C,Kazakhstan,KAZ,World,Asia,Central Asia,,Central and Southern Asia,0 +AFR E,Kenya,KEN,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +EUR B,Kyrgyzstan,KGZ,World,Asia,Central Asia,,Central and Southern Asia,1 +WPR B,Cambodia,KHM,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +WPR B,Kiribati,KIR,World,Oceania,Micronesia,,Oceania,1 +AMR B,Saint Kitts and Nevis,KNA,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +WPR B,Republic of Korea,KOR,World,Asia,Eastern Asia,,Eastern and South-Eastern Asia,0 +EMR B,Kuwait,KWT,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +WPR B,Lao People's Democratic Republic,LAO,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +EMR B,Lebanon,LBN,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +AFR D,Liberia,LBR,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +EMR B,Libyan Arab Jamahiriya,LBY,World,Africa,Northern Africa,,Northern Africa and Western Asia,0 +AMR B,Saint Lucia,LCA,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,1 +SEAR B,Sri Lanka,LKA,World,Asia,Southern Asia,,Central and Southern Asia,1 +AFR E,Lesotho,LSO,World,Africa,Sub-Saharan Africa,Southern Africa,Southern Africa,1 +EUR C,Lithuania,LTU,World,Europe,Northern Europe,,Northern and Western Europe,0 +EUR A,Luxembourg,LUX,World,Europe,Western Europe,,Northern and Western Europe,0 +EUR C,Latvia,LVA,World,Europe,Northern Europe,,Northern and Western Europe,0 +EMR D,Morocco,MAR,World,Africa,Northern Africa,,Northern Africa and Western Asia,1 +EUR A,Monaco,MCO,World,Europe,Western Europe,,Northern and Western Europe,0 +EUR C,Republic of Moldova,MDA,World,Europe,Eastern Europe,,Eastern and Southern Europe,1 +AFR D,Madagascar,MDG,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +SEAR D,Maldives,MDV,World,Asia,Southern Asia,,Central and Southern Asia,1 +AMR B,Mexico,MEX,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,0 +WPR B,Marshall Islands,MHL,World,Oceania,Micronesia,,Oceania,1 +EUR B,The Former Yugoslav Republic of Macedonia,MKD,World,Europe,Southern Europe,,Eastern and Southern Europe,1 +AFR D,Mali,MLI,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +EUR A,Malta,MLT,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +SEAR D,Myanmar,MMR,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +EUR B,Montenegro,MNE,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +WPR B,Mongolia,MNG,World,Asia,Eastern Asia,,Eastern and South-Eastern Asia,1 +AFR E,Mozambique,MOZ,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +AFR D,Mauritania,MRT,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Mauritius,MUS,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,0 +AFR E,Malawi,MWI,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +WPR B,Malaysia,MYS,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,0 +AFR E,Namibia,NAM,World,Africa,Sub-Saharan Africa,Southern Africa,Southern Africa,1 +AFR D,Niger,NER,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AFR D,Nigeria,NGA,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AMR D,Nicaragua,NIC,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,1 +WPR B,Niue,NIU,World,Oceania,Polynesia,,Oceania,0 +EUR A,Netherlands,NLD,World,Europe,Western Europe,,Northern and Western Europe,0 +EUR A,Norway,NOR,World,Europe,Northern Europe,,Northern and Western Europe,0 +SEAR D,Nepal,NPL,World,Asia,Southern Asia,,Central and Southern Asia,1 +WPR B,Nauru,NRU,World,Oceania,Micronesia,,Oceania,0 +WPR A,New Zealand,NZL,World,Oceania,Australia and New Zealand,,Oceania,0 +EMR B,Oman,OMN,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +EMR D,Pakistan,PAK,World,Asia,Southern Asia,,Central and Southern Asia,1 +AMR B,Panama,PAN,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,0 +AMR D,Peru,PER,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,1 +WPR B,Philippines,PHL,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +WPR B,Palau,PLW,World,Oceania,Micronesia,,Oceania,0 +WPR B,Papua New Guinea,PNG,World,Oceania,Melanesia,,Oceania,1 +EUR B,Poland,POL,World,Europe,Eastern Europe,,Eastern and Southern Europe,0 +SEAR D,Democratic People's Republic of Korea,PRK,World,Asia,Eastern Asia,,Eastern and South-Eastern Asia,1 +EUR A,Portugal,PRT,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +AMR B,Paraguay,PRY,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,1 +EMR B,Qatar,QAT,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +EUR B,Romania,ROU,World,Europe,Eastern Europe,,Eastern and Southern Europe,0 +EUR C,Russian Federation,RUS,World,Europe,Eastern Europe,,Eastern and Southern Europe,0 +AFR E,Rwanda,RWA,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +EMR B,Saudi Arabia,SAU,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +EMR D,Sudan,SDN,World,Africa,Northern Africa,,Northern Africa and Western Asia,1 +AFR D,Senegal,SEN,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +WPR A,Singapore,SGP,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,0 +WPR B,Solomon Islands,SLB,World,Oceania,Melanesia,,Oceania,1 +AFR D,Sierra Leone,SLE,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +AMR B,El Salvador,SLV,World,Americas,Latin America and the Caribbean,Central America,Latin America and the Caribbean,1 +EUR A,San Marino,SMR,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +EMR D,Somalia,SOM,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +EUR B,Serbia,SRB,World,Europe,Southern Europe,,Eastern and Southern Europe,1 +EMR D,South Sudan,SSD,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +AFR D,Sao Tome and Principe,STP,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,1 +AMR B,Suriname,SUR,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +EUR B,Slovakia,SVK,World,Europe,Eastern Europe,,Eastern and Southern Europe,0 +EUR A,Slovenia,SVN,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +EUR A,Sweden,SWE,World,Europe,Northern Europe,,Northern and Western Europe,0 +AFR E,Swaziland,SWZ,World,Africa,Sub-Saharan Africa,Southern Africa,Southern Africa,1 +AFR D,Seychelles,SYC,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,0 +EMR B,Syrian Arab Republic,SYR,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +AFR D,Chad,TCD,World,Africa,Sub-Saharan Africa,Middle Africa,Middle Africa,1 +AFR D,Togo,TGO,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,1 +SEAR B,Thailand,THA,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +EUR B,Tajikistan,TJK,World,Asia,Central Asia,,Central and Southern Asia,1 +EUR B,Turkmenistan,TKM,World,Asia,Central Asia,,Central and Southern Asia,1 +SEAR D,Timor-Leste,TLS,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +WPR B,Tonga,TON,World,Oceania,Polynesia,,Oceania,1 +AMR B,Trinidad and Tobago,TTO,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +EMR B,Tunisia,TUN,World,Africa,Northern Africa,,Northern Africa and Western Asia,1 +EUR B,Turkey,TUR,World,Asia,Western Asia,,Northern Africa and Western Asia,0 +WPR B,Tuvalu,TUV,World,Oceania,Polynesia,,Oceania,1 +AFR E,United Republic of Tanzania,TZA,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +AFR E,Uganda,UGA,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +EUR C,Ukraine,UKR,World,Europe,Eastern Europe,,Eastern and Southern Europe,1 +AMR B,Uruguay,URY,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +AMR A,United States of America,USA,World,Americas,Northern America,,Northern America,0 +EUR B,Uzbekistan,UZB,World,Asia,Central Asia,,Central and Southern Asia,1 +AMR B,Saint Vincent and the Grenadines,VCT,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,1 +AMR B,Venezuela (Bolivarian Republic of),VEN,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,1 +WPR B,Viet Nam,VNM,World,Asia,South-eastern Asia,,Eastern and South-Eastern Asia,1 +WPR B,Vanuatu,VUT,World,Oceania,Melanesia,,Oceania,1 +WPR B,Samoa,WSM,World,Oceania,Polynesia,,Oceania,1 +EMR D,Yemen,YEM,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +AFR E,South Africa,ZAF,World,Africa,Sub-Saharan Africa,Southern Africa,Southern Africa,1 +AFR E,Zambia,ZMB,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +AFR E,Zimbabwe,ZWE,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,1 +NA,NA,ESH,World,Africa,Northern Africa,,Northern Africa and Western Asia,0 +NA,NA,IOT,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,0 +NA,NA,ATF,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,0 +NA,NA,MYT,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,0 +NA,NA,REU,World,Africa,Sub-Saharan Africa,Eastern Africa,Eastern Africa,0 +NA,NA,SHN,World,Africa,Sub-Saharan Africa,Western Africa,Western Africa,0 +NA,NA,AIA,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,ABW,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,BES,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,VGB,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,CYM,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,CUW,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,GLP,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,MTQ,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,MSR,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,PRI,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,BLM,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,MAF,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,SXM,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,TCA,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,VIR,World,Americas,Latin America and the Caribbean,Caribbean,Latin America and the Caribbean,0 +NA,NA,BVT,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +NA,NA,FLK,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +NA,NA,GUF,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +NA,NA,SGS,World,Americas,Latin America and the Caribbean,South America,Latin America and the Caribbean,0 +NA,NA,BMU,World,Americas,Northern America,,Northern America,0 +NA,NA,GRL,World,Americas,Northern America,,Northern America,0 +NA,NA,SPM,World,Americas,Northern America,,Northern America,0 +NA,NA,ATA,World,,,,,0 +NA,NA,HKG,World,Asia,Eastern Asia,,Eastern and South-Eastern Asia,0 +NA,NA,MAC,World,Asia,Eastern Asia,,Eastern and South-Eastern Asia,0 +NA,Palestine,PSE,World,Asia,Western Asia,,Northern Africa and Western Asia,1 +NA,NA,ALA,World,Europe,Northern Europe,,Northern and Western Europe,0 +NA,NA,FRO,World,Europe,Northern Europe,,Northern and Western Europe,0 +NA,NA,GGY,World,Europe,Northern Europe,,Northern and Western Europe,0 +NA,NA,IMN,World,Europe,Northern Europe,,Northern and Western Europe,0 +NA,NA,JEY,World,Europe,Northern Europe,,Northern and Western Europe,0 +NA,NA,SJM,World,Europe,Northern Europe,,Northern and Western Europe,0 +NA,NA,GIB,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +NA,NA,VAT,World,Europe,Southern Europe,,Eastern and Southern Europe,0 +NA,NA,LIE,World,Europe,Western Europe,,Northern and Western Europe,0 +NA,NA,CXR,World,Oceania,Australia and New Zealand,,Oceania,0 +NA,NA,CCK,World,Oceania,Australia and New Zealand,,Oceania,0 +NA,NA,HMD,World,Oceania,Australia and New Zealand,,Oceania,0 +NA,NA,NFK,World,Oceania,Australia and New Zealand,,Oceania,0 +NA,NA,NCL,World,Oceania,Melanesia,,Oceania,0 +NA,NA,GUM,World,Oceania,Micronesia,,Oceania,0 +NA,NA,MNP,World,Oceania,Micronesia,,Oceania,0 +NA,NA,UMI,World,Oceania,Micronesia,,Oceania,0 +NA,NA,ASM,World,Oceania,Polynesia,,Oceania,0 +NA,NA,PYF,World,Oceania,Polynesia,,Oceania,0 +NA,NA,PCN,World,Oceania,Polynesia,,Oceania,0 +NA,NA,TKL,World,Oceania,Polynesia,,Oceania,0 +NA,NA,WLF,World,Oceania,Polynesia,,Oceania,0 +EUR B,Kosovo,XK,World,Europe,Southern Europe,,Eastern and Southern Europe,1 \ No newline at end of file diff --git a/jarl.toml b/jarl.toml new file mode 100644 index 0000000..a2e2af7 --- /dev/null +++ b/jarl.toml @@ -0,0 +1,2 @@ +[lint.assignment] +operator = "<-" diff --git a/man/adaptive_round.Rd b/man/adaptive_round.Rd new file mode 100644 index 0000000..ab1ae9c --- /dev/null +++ b/man/adaptive_round.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{adaptive_round} +\alias{adaptive_round} +\title{Adaptively round numerics} +\usage{ +adaptive_round(x, large_threshold = 1, small_sigfig = 2, large_digits = 1) +} +\arguments{ +\item{x}{A numeric vector.} + +\item{large_threshold}{A single number for the threshold over which numbers +are to be considered 'large'.} + +\item{small_sigfig}{A single number for the number of significant digits for +'small' numbers.} + +\item{large_digits}{A single number for the number of places to which 'large' +numbers should be rounded.} +} +\value{ +\code{x} rounded to either \code{large_digits} or to \code{small_sigfig}. +} +\description{ +Adaptively round numerics +} +\keyword{internal} diff --git a/man/add_campaign_id.Rd b/man/add_campaign_id.Rd new file mode 100644 index 0000000..7e007ed --- /dev/null +++ b/man/add_campaign_id.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{add_campaign_id} +\alias{add_campaign_id} +\title{Add campaign id to dataframe} +\usage{ +add_campaign_id(df, key_cols) +} +\arguments{ +\item{df}{A data.frame.} + +\item{key_cols}{A character vector of columns in \code{df} by which the data are +to be grouped.} +} +\value{ +\code{df} with a campaign identifier as a numeric. +} +\description{ +Add campaign id to dataframe +} +\keyword{internal} diff --git a/man/basic_burden_sanity.Rd b/man/basic_burden_sanity.Rd index 7217a7d..035018f 100644 --- a/man/basic_burden_sanity.Rd +++ b/man/basic_burden_sanity.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{basic_burden_sanity} \alias{basic_burden_sanity} \title{Sanity checks on burden estimates} @@ -18,4 +18,4 @@ estimates, with the length of the vector depending on how many checks fail. Helper function for sanity checks on burden estimate values. Checks whether any burden estimates are non-numeric, missing, or negative. } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/check_demography_alignment.Rd b/man/check_demography_alignment.Rd index 1b5ca1f..d5fecd9 100644 --- a/man/check_demography_alignment.Rd +++ b/man/check_demography_alignment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{check_demography_alignment} \alias{check_demography_alignment} \title{Check incoming burden cohort size against interpolated population} @@ -27,4 +27,4 @@ modelled population size from the WPP-derived population estimates. Check the modelled disease burden data has similar population sizes as the provided population data. } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/compare_natl_subreg.Rd b/man/compare_natl_subreg.Rd new file mode 100644 index 0000000..395d116 --- /dev/null +++ b/man/compare_natl_subreg.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_impact_diagnostics.R +\name{compare_natl_subreg} +\alias{compare_natl_subreg} +\title{Compare sub-regional and national estimates} +\usage{ +compare_natl_subreg( + df, + outcome = c("deaths_averted_rate", "dalys_averted_rate"), + activity_filter = c("campaign", "routine") +) +} +\arguments{ +\item{df}{A data.frame with sub-region level data on vaccination impact +outcomes.} + +\item{outcome}{A string for the outcome of interest. May be one of +\code{"deaths_averted_rate"} or \code{"dalys_averted_rate"}.} + +\item{activity_filter}{A string for the type of vaccination activity. May be +one of \code{"campaign"} or \code{"routine"}.} +} +\value{ +A data.frame of sub-regional vaccination impact estimates where the +impact is considered to be outside the tolerance limit. +} +\description{ +Compare sub-regional and national estimates +} +\keyword{impact_diagnostics} diff --git a/man/constants.Rd b/man/constants.Rd index 70b14da..1d93a1f 100644 --- a/man/constants.Rd +++ b/man/constants.Rd @@ -7,6 +7,21 @@ \alias{scenario_data_colnames} \alias{burden_outcome_names} \alias{colnames_plot_demog_compare} +\alias{colnames_df_missing_cols} +\alias{COLNAMES_KEY_PRESSURE_TEST} +\alias{COLNAMES_INTEREST_PRESSURE_TEST} +\alias{IMPACT_OUTCOMES} +\alias{EXCLUDED_DISEASES} +\alias{N_TS_MIN_CHARS} +\alias{N_TS_YEAR_CHARS} +\alias{MIN_TS_YEAR} +\alias{MAX_TS_YEAR} +\alias{MIN_TS_MONTH} +\alias{MAX_TS_MONTH} +\alias{DEF_TOUCHSTONE_OLD} +\alias{DEF_TOUCHSTONE_NEW} +\alias{DEF_TOUCHSTONE_OLD_OLD} +\alias{COLOUR_VIMC} \title{Package constants} \format{ An object of class \code{character} of length 5. @@ -16,6 +31,36 @@ An object of class \code{character} of length 4. An object of class \code{character} of length 10. An object of class \code{character} of length 7. + +An object of class \code{character} of length 5. + +An object of class \code{character} of length 7. + +An object of class \code{character} of length 14. + +An object of class \code{character} of length 2. + +An object of class \code{character} of length 4. + +An object of class \code{integer} of length 1. + +An object of class \code{integer} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{numeric} of length 1. + +An object of class \code{character} of length 1. + +An object of class \code{character} of length 1. + +An object of class \code{character} of length 1. + +An object of class \code{character} of length 1. } \usage{ file_dict_colnames @@ -25,9 +70,80 @@ scenario_data_colnames burden_outcome_names colnames_plot_demog_compare + +colnames_df_missing_cols + +COLNAMES_KEY_PRESSURE_TEST + +COLNAMES_INTEREST_PRESSURE_TEST + +IMPACT_OUTCOMES + +EXCLUDED_DISEASES + +N_TS_MIN_CHARS + +N_TS_YEAR_CHARS + +MIN_TS_YEAR + +MAX_TS_YEAR + +MIN_TS_MONTH + +MAX_TS_MONTH + +DEF_TOUCHSTONE_OLD + +DEF_TOUCHSTONE_NEW + +DEF_TOUCHSTONE_OLD_OLD + +COLOUR_VIMC } \description{ -Package constants +Constant values used in \emph{vimcheck}. See the \strong{Examples} section for the +constant values. +} +\examples{ +file_dict_colnames + +scenario_data_colnames + +burden_outcome_names + +colnames_plot_demog_compare + +colnames_df_missing_cols + +COLNAMES_KEY_PRESSURE_TEST + +COLNAMES_INTEREST_PRESSURE_TEST + +IMPACT_OUTCOMES + +EXCLUDED_DISEASES + +N_TS_MIN_CHARS + +N_TS_YEAR_CHARS + +MIN_TS_YEAR + +MAX_TS_YEAR + +MIN_TS_MONTH + +MAX_TS_MONTH + +DEF_TOUCHSTONE_OLD + +DEF_TOUCHSTONE_NEW + +DEF_TOUCHSTONE_OLD_OLD + +COLOUR_VIMC + } \keyword{constants} \keyword{datasets} diff --git a/man/eg_impact.Rd b/man/eg_impact.Rd new file mode 100644 index 0000000..7334325 --- /dev/null +++ b/man/eg_impact.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/example_data.R +\docType{data} +\name{eg_impact} +\alias{eg_impact} +\title{Example of impact data} +\format{ +\subsection{\code{eg_impact}}{ + +A data frame with 5396 rows and 9 columns: +\describe{ +\item{disease}{Disease name.} +\item{vaccine}{Vaccine identifier.} +\item{modelling_group}{Modelling group name.} +\item{country}{Country ISO 3-character code.} +\item{country_name}{Country name.} +\item{year}{Year for which impacts are modelled.} +\item{activity_type}{Activity type: either "routine" or "campaign."} +\item{burden_outcome}{Name of the burden outcome; one of "deaths_averted" +or "dalys_averted".} +\item{impact}{Value of the impact}. +} +} +} +\source{ +Prepared by the VIMC secretariat. +} +\usage{ +eg_impact +} +\description{ +Example of vaccine impact data taken from data used to test \pkg{vimpact}. +This data is primarily used for testing here too. +} +\keyword{data} diff --git a/man/filter_impact_data.Rd b/man/filter_impact_data.Rd new file mode 100644 index 0000000..bb941e3 --- /dev/null +++ b/man/filter_impact_data.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_impact_diagnostics.R +\name{filter_impact_data} +\alias{filter_impact_data} +\alias{filter_recent_ts} +\alias{filter_excluded_diseases_ts} +\alias{flag_duplicates} +\alias{filter_invalid_trajectories} +\title{Filter data for touchstones or diseases} +\usage{ +filter_recent_ts(df, threshold = DEF_TOUCHSTONE_NEW) + +filter_excluded_diseases_ts(df, threshold = DEF_TOUCHSTONE_OLD_OLD) + +flag_duplicates(df, key_cols = COLNAMES_KEY_PRESSURE_TEST) + +filter_invalid_trajectories( + df, + prev_data, + outcome = c("deaths_averted", "dalys_averted") +) +} +\arguments{ +\item{df}{A \verb{} holding impact data. This data.frame is not +checked for contents} + +\item{threshold}{A six-digit number that is checked as a valid touchstone +identifier (YYYYMM format) using \code{\link[=validate_ts_year]{validate_ts_year()}}. Defaults to +\link{DEF_TOUCHSTONE_NEW} (\code{"202310"}).} + +\item{key_cols}{Key columns in \code{df} to check for duplicates.} + +\item{prev_data}{A \verb{} holding data from a previous touchstone for +the same scenarios as \code{df}.} + +\item{outcome}{A string giving the outcome of interest; may be one of +\code{"deaths_averted"} or \code{"dalys_averted"}.} +} +\value{ +A filtered \verb{}. +\itemize{ +\item \code{filter_recent_ts()} returns \code{df} with rows where the touchstone condition +is not met excluded. +\item \code{filter_excluded_diseases_ts()} returns \code{df} with rows where rows relating +to the \link{EXCLUDED_DISEASES}, when the touchstone year in \code{df} is less than the +\code{threshold}, excluded. +\item \code{flag_duplicates()} returns \code{df} with duplicated combinations of +\code{key_cols} flagged using the column \code{n_key} (or a user-defined name). +\item \code{filter_invalid_trajectories()} returns \code{df} with bad outcome trajectories +(\code{NA} to non-\code{NA}) removed. +} +} +\description{ +A pair of helper functions allowing filtering out of recent touchstone values +and excluded diseases. +} +\keyword{impact_diagnostics} diff --git a/man/flag_large_diffs.Rd b/man/flag_large_diffs.Rd new file mode 100644 index 0000000..f54488d --- /dev/null +++ b/man/flag_large_diffs.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_impact_diagnostics.R +\name{flag_large_diffs} +\alias{flag_large_diffs} +\title{Flag significant changes in impact estimates} +\usage{ +flag_large_diffs( + changes_list, + iqr_df, + variable = c("deaths_averted", "dalys_averted"), + group_cols = c("country", "vaccine", "activity_type"), + threshold = 100, + touchstone_old = DEF_TOUCHSTONE_OLD_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) +} +\arguments{ +\item{changes_list}{A list of data.frames with one element per variable of +interest (see \code{variable}). Usually generated using \code{\link[=generate_diffs]{generate_diffs()}}.} + +\item{iqr_df}{A data.frame of inter-quartile differences generated using +\code{\link[=gen_national_iqr]{gen_national_iqr()}}.} + +\item{variable}{A string specifying the variable of interest. Must be one of +"deaths_averted" or "dalys_averted", and must be present as a name and +element of \code{changes_list}.} + +\item{group_cols}{A character vector of grouping columns. Defaults to +"country", "vaccine", "activity_type".} + +\item{threshold}{A single numeric value for the IQR multiplier. Defaults to +100.} + +\item{touchstone_old}{The previous touchstone identifier. Defaults to +\link{DEF_TOUCHSTONE_OLD_OLD}.} + +\item{touchstone_new}{The new touchstone identifier. Defaults to +\link{DEF_TOUCHSTONE_NEW}.} +} +\value{ +A filtered data.frame of differences in impact estimates flagged +as too large. Rows with differences within tolerance are removed. +} +\description{ +Calculates and flags whether the difference in impact estimates +between touchstones is greater than expected. A row is flagged if the +difference is greater than \code{threshold} \eqn{\times} the inter-quartile range +for cases where the IQR is greater than zero. +} +\keyword{impact_diagnostics} diff --git a/man/gen_combined_df.Rd b/man/gen_combined_df.Rd new file mode 100644 index 0000000..db8a4ad --- /dev/null +++ b/man/gen_combined_df.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_impact_diagnostics.R +\name{gen_combined_df} +\alias{gen_combined_df} +\title{Combine and align data from two touchstones} +\usage{ +gen_combined_df( + prev_dat, + df_clean, + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST +) +} +\arguments{ +\item{prev_dat}{A data.frame of impact estimates corresponding to an earlier +touchstone.} + +\item{df_clean}{A data.frame of impact estimates corresponding to a more +recent touchstone.} + +\item{interest_cols}{A character vector of columns of interest. Defaults to +\link{COLNAMES_INTEREST_PRESSURE_TEST}.} + +\item{key_cols}{A character vector of columns of interest. Defaults to +\link{COLNAMES_KEY_PRESSURE_TEST}.} +} +\value{ +A data.frame which is a full join of \code{prev_dat} and \code{df_clean}. +Columns are disambiguated with the suffixes \code{"_old"} and \code{"_new"}. +} +\description{ +Generates a full join of two data.frames, selecting for columns of interest. +} +\keyword{impact_diagnostics} diff --git a/man/gen_national_iqr.Rd b/man/gen_national_iqr.Rd new file mode 100644 index 0000000..853a8b0 --- /dev/null +++ b/man/gen_national_iqr.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_impact_diagnostics.R +\name{gen_national_iqr} +\alias{gen_national_iqr} +\title{Generate IQR for key outcomes} +\usage{ +gen_national_iqr( + df, + group_cols = c("country", "vaccine", "activity_type"), + value_cols = c("deaths_averted", "dalys_averted"), + prefix = "national_iqr" +) +} +\arguments{ +\item{df}{A data.frame of impact estimates.} + +\item{group_cols}{A character vector of grouping columns. Defaults to +"country", "vaccine", "activity_type".} + +\item{value_cols}{A character vector of value columns. Defaults to +"deaths_averted" and "dalys_averted".} + +\item{prefix}{A string for the prefix applied to every IQR summary column. +Defaults to "national_iqr".} +} +\value{ +A \verb{} with the inter-quartile range of the columns +in \code{value_cols}, with the column name constructed as \verb{\{prefix\}_\{value_col\}} +using string interpolation. +} +\description{ +Generate IQR for key outcomes +} +\keyword{impact_diagnostics} diff --git a/man/generate_diffs.Rd b/man/generate_diffs.Rd new file mode 100644 index 0000000..a577600 --- /dev/null +++ b/man/generate_diffs.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_impact_diagnostics.R +\name{generate_diffs} +\alias{generate_diffs} +\title{Explore significant changes in deaths and DALYs} +\usage{ +generate_diffs( + prev_df, + curr_df, + interest_cols = COLNAMES_INTEREST_PRESSURE_TEST, + key_cols = COLNAMES_KEY_PRESSURE_TEST, + touchstone = DEF_TOUCHSTONE_OLD +) +} +\arguments{ +\item{prev_df}{A \verb{} of impact estimates from the previous +touchstone.} + +\item{curr_df}{A \verb{} of impact estimates for the current +touchstone.} + +\item{interest_cols}{A character vector of columns to check for differences. +Defaults to \link{COLNAMES_INTEREST_PRESSURE_TEST}.} + +\item{key_cols}{A character vector of columns to use when assigning campaign +identifiers. Passed to \code{\link[=add_campaign_id]{add_campaign_id()}}, defaults to +\link{COLNAMES_KEY_PRESSURE_TEST}.} + +\item{touchstone}{A six character string that can be converted to a six digit +numeric giving a touchstone identifier in \code{YYYYMM} format.} +} +\value{ +A list of tibbles of differences between \code{prev_df} and \code{curr_df}, +with one list element per element of \code{interest_cols}. +} +\description{ +Explore significant changes in deaths and DALYs +} +\keyword{impact_diagnostics} diff --git a/man/helpers.Rd b/man/helpers.Rd index fb9c4df..af26b2d 100644 --- a/man/helpers.Rd +++ b/man/helpers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpers.R +% Please edit documentation in R/fn_helpers.R \name{helpers} \alias{helpers} \alias{make_novax_scenario} diff --git a/man/plotting.Rd b/man/plot_burden_diagnostics.Rd similarity index 89% rename from man/plotting.Rd rename to man/plot_burden_diagnostics.Rd index 8dd565e..8431edd 100644 --- a/man/plotting.Rd +++ b/man/plot_burden_diagnostics.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plotting} -\alias{plotting} +% Please edit documentation in R/fn_plotting_burden_diagnostics.R +\name{plot_burden_diagnostics} +\alias{plot_burden_diagnostics} \alias{plot_compare_demography} \alias{plot_age_patterns} \alias{plot_global_burden_decades} @@ -55,8 +55,8 @@ A \verb{} object that can be printed to screen in the plot frame or saved to an output device (i.e., saved as an image file). } \description{ -Plotting functions for burden and impact diagnostics. All functions operate +Plotting functions for burden diagnostics. All functions operate on data prepared for plotting by a corresponding -\link[=plotting_prep]{plotting-preparation function}. +\link[=plot_prep_burden_diagnostics]{plotting-preparation function}. } \keyword{plotting} diff --git a/man/plot_impact_diagnostics.Rd b/man/plot_impact_diagnostics.Rd new file mode 100644 index 0000000..aee324f --- /dev/null +++ b/man/plot_impact_diagnostics.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_plotting_impact_diagnostics.R +\name{plot_impact_diagnostics} +\alias{plot_impact_diagnostics} +\alias{plot_sig_diff} +\alias{plot_diff} +\alias{plot_modelling_group_variation} +\alias{plot_vaccine_gavi} +\alias{plot_cumul} +\title{Create impact diagnostics plots} +\usage{ +plot_sig_diff(data, outcome = IMPACT_OUTCOMES) + +plot_diff( + data, + outcome = IMPACT_OUTCOMES, + group_vars = IMPACT_GROUP_VARS, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) + +plot_modelling_group_variation(data) + +plot_vaccine_gavi(data) + +plot_cumul(data) +} +\arguments{ +\item{data}{A data.frame suitable for plotting. +\itemize{ +\item \code{plot_sig_diff()}: Output of \code{\link[=flag_large_diffs]{flag_large_diff()}}. +\item \code{plot_diff()}: Output of \code{\link[=gen_combined_df]{gen_combined_df()}}. +\item \code{plot_modelling_group_variation()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{plot_prep_mod_grp_varn()}}. +\item \code{plot_vaccine_gavi()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{plot_prep_vax_gavi()}} +\item \code{plot_cumul()}: Output of +\code{\link[=plot_prep_impact_diagnostics]{plot_prep_cumul()}} +}} + +\item{outcome}{A string for the impact outcome. One of \link{IMPACT_OUTCOMES}.} + +\item{group_vars}{A single string for the grouping variables. May be any of +\link{IMPACT_OUTCOMES}, which are \code{"activity_type"} and \code{"vaccine"}.} + +\item{touchstone_old}{A string for the previous touchstone in +format \code{"YYYYMM"}. Defaults to \link{DEF_TOUCHSTONE_OLD}.} + +\item{touchstone_new}{A string for the current or new touchstone in +format \code{"YYYYMM"}. Defaults to \link{DEF_TOUCHSTONE_NEW}.} +} +\value{ +A \verb{} object that can be viewed or saved. +} +\description{ +Functions that create impact diagnostics plots (or plotting objects). All +functions are associated with one other upstream data processing function, +and can be used in a pipe with that function. Where appropriate, outcome +selection and label preparation is automated to reduce function arguments. + +Plotting functions for impact diagnostics. See +\link[=plot_prep_impact_diagnostics]{plotting-preparation functions} for a set of +helper functions that prepare impact diagnostics for plotting. See the +details of the \code{data} argument for functions that help to prepare the data. +} diff --git a/man/plotting_prep.Rd b/man/plot_prep_burden_diagnostics.Rd similarity index 84% rename from man/plotting_prep.Rd rename to man/plot_prep_burden_diagnostics.Rd index 881a9f3..3afa707 100644 --- a/man/plotting_prep.Rd +++ b/man/plot_prep_burden_diagnostics.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting_prep.R -\name{plotting_prep} -\alias{plotting_prep} +% Please edit documentation in R/fn_plotting_prep_bur_diag.R +\name{plot_prep_burden_diagnostics} +\alias{plot_prep_burden_diagnostics} \alias{prep_plot_demography} \alias{prep_plot_age} \alias{prep_plot_burden_decades} @@ -53,6 +53,8 @@ column "burden_outcome", and a list column of tibbles "burden_data". } \description{ Transform burden estimate data from modelling groups to make them suitable -for plotting using an appropriate \link[=plotting]{plotting function}. Each -preparation function corresponds to a plotting function. +for plotting using an appropriate +\link[=plot_prep_burden_diagnostics]{plotting function}. Each preparation function +corresponds to a plotting function. } +\keyword{plot_prep_burden_diagnostics} diff --git a/man/plot_prep_impact_diagnostics.Rd b/man/plot_prep_impact_diagnostics.Rd new file mode 100644 index 0000000..7afc4f1 --- /dev/null +++ b/man/plot_prep_impact_diagnostics.Rd @@ -0,0 +1,82 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_plotting_prep_impact_diagnostics.R +\name{plot_prep_impact_diagnostics} +\alias{plot_prep_impact_diagnostics} +\alias{prep_plot_mod_grp_varn} +\alias{prep_plot_vax_gavi} +\alias{prep_plot_cumul} +\title{Prepare impact diagnostics for plotting} +\usage{ +prep_plot_mod_grp_varn(df2, df3, outcome = IMPACT_OUTCOMES) + +prep_plot_vax_gavi( + data, + prev_data, + outcome = IMPACT_OUTCOMES, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) + +prep_plot_cumul( + data, + outcome, + disease, + touchstone_old = DEF_TOUCHSTONE_OLD, + touchstone_new = DEF_TOUCHSTONE_NEW +) +} +\arguments{ +\item{df2}{A \verb{} of impact estimates with at least columns +\code{modelling_group}, \code{vaccine}, outcome variable, and \code{fvps} (doses +delivered). Used as the primary data source for calculations in +\code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}}.} + +\item{df3}{A \verb{} of modelling group and vaccine combinations, +typically with one row per modelling group per vaccine. Joined with \code{df2} +to ensure complete group coverage in \code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}}.} + +\item{outcome}{A character string for the impact outcome. Must be one of +\code{"deaths_averted"} or \code{"dalys_averted"}. For \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}, +\code{data} must include columns named \verb{\{outcome\}_old} and \verb{\{outcome\}_new}.} + +\item{data}{A \verb{} of impact estimates with columns including at least +those in \link{COLNAMES_KEY_PRESSURE_TEST}, the outcome variable, and +potentially other columns for analysis.} + +\item{prev_data}{A \verb{} of impact estimates from a previous touchstone, +used as a comparison baseline. Should have the same structure as \code{data}.} + +\item{touchstone_old}{A six-character touchstone identifier (YYYYMM format) +for the previous dataset. Defaults to \link{DEF_TOUCHSTONE_OLD}. Used in +\code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} and \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}.} + +\item{touchstone_new}{A six-character touchstone identifier (YYYYMM format) +for the current dataset. Defaults to \link{DEF_TOUCHSTONE_NEW}. Used in +\code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} and \code{\link[=prep_plot_cumul]{prep_plot_cumul()}}.} + +\item{disease}{A character string specifying a single disease for filtering +and analysis.} +} +\value{ +\itemize{ +\item \code{\link[=prep_plot_mod_grp_varn]{prep_plot_mod_grp_varn()}} returns a grouped \verb{} (grouped by +\code{vaccine}) with all columns from \code{df2} and \code{df3} plus derived columns: +\code{adj_outc} (adjusted outcome with small offset), \code{outcome_name} (input +outcome), and \code{mean_outc} (vaccine-level weighted mean outcome). +\item \code{\link[=prep_plot_vax_gavi]{prep_plot_vax_gavi()}} returns a \verb{} with columns \code{disease}, +\code{year}, \code{yearly_outcome}, \code{dataset} (factor with levels for old touchstone, +"Difference", and new touchstone), and \code{outcome_name}. Summarizes outcomes +by disease and year across two touchstones. +\item \code{\link[=prep_plot_cumul]{prep_plot_cumul()}} returns a \verb{} with columns \code{year}, +\code{modelling_group}, \code{touchstone}, \code{value} (cumulative or average outcome), +\code{line_type} ("solid" for individual models, "dashed" for model average), +and \code{outcome_name}. Returns \code{NULL} if the specified disease has no non-zero +data to plot. +} +} +\description{ +A suite of helper functions that sit between impact diagnostics functions and +plotting functions. These functions transform and aggregate impact estimates +to prepare them for visualisation. Functions have basic checks on input data +but otherwise assume users will not modify inputs. +} diff --git a/man/plotting_theme.Rd b/man/plotting_theme.Rd index c2c49e0..0e95add 100644 --- a/man/plotting_theme.Rd +++ b/man/plotting_theme.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R +% Please edit documentation in R/fn_plotting_helpers.R \name{plotting_theme} \alias{plotting_theme} \alias{theme_vimc} diff --git a/man/round_numeric.Rd b/man/round_numeric.Rd new file mode 100644 index 0000000..3c95234 --- /dev/null +++ b/man/round_numeric.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{round_numeric} +\alias{round_numeric} +\title{Round numeric columns of a data.frame} +\usage{ +round_numeric(df) +} +\arguments{ +\item{df}{A data.frame.} +} +\description{ +Round numeric columns of a data.frame +} +\keyword{internal} diff --git a/man/save_outputs.Rd b/man/save_outputs.Rd new file mode 100644 index 0000000..0b2afbe --- /dev/null +++ b/man/save_outputs.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_impact_diagnostics.R +\name{save_outputs} +\alias{save_outputs} +\title{Save pressure-testing diagnostics to local file} +\usage{ +save_outputs( + missing_in_current, + missing_deaths, + missing_dalys, + changes_deaths, + changes_dalys, + subregional_flags_deaths_camp, + subregional_flags_deaths_rout, + subregional_flags_dalys_camp, + subregional_flags_dalys_rout, + output_dir = here::here("outputs") +) +} +\arguments{ +\item{missing_in_current}{A data.frame.} + +\item{missing_deaths}{A data.frame that is the output of +\code{\link[=filter_invalid_trajectories]{filter_invalid_trajectories()}} with the outcome \code{"deaths_averted"}.} + +\item{missing_dalys}{A data.frame that is the output of +\code{\link[=filter_invalid_trajectories]{filter_invalid_trajectories()}} with the outcome \code{"dalys_averted"}.} + +\item{changes_deaths}{A data.frame that is the output of \code{\link[=flag_large_diffs]{flag_large_diffs()}} +with the outcome \code{"deaths_averted"}.} + +\item{changes_dalys}{A data.frame that is the output of \code{\link[=flag_large_diffs]{flag_large_diffs()}} +with the outcome \code{"dalys_averted"}.} + +\item{subregional_flags_deaths_camp}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"deaths_averted_rate"} for the +\code{"campaign"} activity type.} + +\item{subregional_flags_deaths_rout}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"deaths_averted_rate"} for the +\code{"routine"} activity type.} + +\item{subregional_flags_dalys_camp}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the +\code{"campaign"} activity type.} + +\item{subregional_flags_dalys_rout}{A data.frame that is the output of +\code{\link[=compare_natl_subreg]{compare_natl_subreg()}} with the outcome \code{"dalys_averted_rate"} for the +\code{"campaign"} activity type.} + +\item{output_dir}{A writeable directory. Defaults to "./outputs".} +} +\value{ +None. Called for the convenience side-effect of saving data.frames as +\code{.Rds} format. +} +\description{ +Save pressure-testing diagnostics data.frames to local compressed files in +the \code{.Rds} format. Input data.frames are generated by other package functions +and are not checked here. +} +\keyword{impact_diagnostics} diff --git a/man/validate_complete_incoming_files.Rd b/man/validate_complete_incoming_files.Rd index 148311b..dcdfb7b 100644 --- a/man/validate_complete_incoming_files.Rd +++ b/man/validate_complete_incoming_files.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{validate_complete_incoming_files} \alias{validate_complete_incoming_files} \title{Validate files in a burden estimate} @@ -20,4 +20,4 @@ This function expects that incoming burden files are in the directory given by \code{path_burden}, which holds a file dictionary which maps each data file to a specific scenario. } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/validate_file_dict_template.Rd b/man/validate_file_dict_template.Rd index 293a605..e84f3f7 100644 --- a/man/validate_file_dict_template.Rd +++ b/man/validate_file_dict_template.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{validate_file_dict_template} \alias{validate_file_dict_template} \title{Validate file dictionary template} @@ -25,4 +25,4 @@ scenarios i.e. the number of files that we expect from a model. Users should populate the file column to match the scenario-file. This function will run if a \code{file_dictionary.csv} file does not exist } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/validate_template_alignment.Rd b/man/validate_template_alignment.Rd index 7ff801a..c6c9e36 100644 --- a/man/validate_template_alignment.Rd +++ b/man/validate_template_alignment.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/burden_diagnostics.R +% Please edit documentation in R/fn_burden_diagnostics.R \name{validate_template_alignment} \alias{validate_template_alignment} \title{Check incoming burden set against template} @@ -19,4 +19,4 @@ against \code{template}, with information on missing and extra data. \description{ Identify extra and missing columns and rows in burden data. } -\keyword{diagnostics} +\keyword{burden_diagnostics} diff --git a/man/validate_ts_year.Rd b/man/validate_ts_year.Rd new file mode 100644 index 0000000..6812fcf --- /dev/null +++ b/man/validate_ts_year.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fn_helpers.R +\name{validate_ts_year} +\alias{validate_ts_year} +\title{Check and return touchstone year-month} +\usage{ +validate_ts_year(x) +} +\arguments{ +\item{x}{A string for the touchstone identifier.} +} +\value{ +The first 6 characters of \code{x} converted to a numeric. Also has side +effects of erroring if conditions on \code{x} are not met. +} +\description{ +Check and return touchstone year-month +} +\keyword{internal} diff --git a/man/who_subregions.Rd b/man/who_subregions.Rd new file mode 100644 index 0000000..56c5552 --- /dev/null +++ b/man/who_subregions.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data_who_subregions.R +\docType{data} +\name{who_subregions} +\alias{who_subregions} +\title{Data on countries in WHO subregions} +\format{ +\subsection{\code{who_subregions}}{ + +A data frame with 249 rows and 9 columns: +\describe{ +\item{choice_subregion}{Sub-region three character code with added letter +identifier.} +\item{country_name}{Country name in long form.} +\item{country}{ISO 3 character country code.} +\item{Global.Name}{A constant, 'World'.} +\item{Region.Name}{Continent-scale region identifier.} +\item{Sub.region}{Sub-continental scale region identifier.} +\item{Intermediate.Region.Name}{Alternative regional scale identifier.} +\item{subregion}{Sub-regional identifier.} +\item{vimc117}{Logical identifier.} +} +} +} +\source{ +Prepared by the VIMC secretariat. +} +\usage{ +who_subregions +} +\description{ +Data that groups countries into WHO subregions. +} +\keyword{data} diff --git a/tests/spelling.R b/tests/spelling.R index a8cf85b..d60e024 100644 --- a/tests/spelling.R +++ b/tests/spelling.R @@ -1,6 +1,7 @@ -if (requireNamespace('spelling', quietly = TRUE)) +if (requireNamespace("spelling", quietly = TRUE)) { spelling::spell_check_test( vignettes = TRUE, error = FALSE, skip_on_cran = TRUE ) +} diff --git a/tests/testthat.R b/tests/testthat.R index 5e3689c..25c27c3 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -7,6 +7,8 @@ # * https://testthat.r-lib.org/articles/special-files.html library(testthat) +library(checkmate) +library(dplyr) library(vimcheck) test_check("vimcheck") diff --git a/tests/testthat/_snaps/plotting-pressure-testing/plot-cumul.svg b/tests/testthat/_snaps/plotting-pressure-testing/plot-cumul.svg new file mode 100644 index 0000000..df9658c --- /dev/null +++ b/tests/testthat/_snaps/plotting-pressure-testing/plot-cumul.svg @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0e+00 +1e+04 +2e+04 +3e+04 + + + + + + + + +2000 +2010 +2020 +2030 +Year +Cumulative deaths_averted + +Modelling Group + + + + +LSHTM-Jit-201910 +LSHTM-Jit-202310 +Cumulative deaths_averted Over Time - + + diff --git a/tests/testthat/_snaps/plotting-pressure-testing/plot-diff.svg b/tests/testthat/_snaps/plotting-pressure-testing/plot-diff.svg new file mode 100644 index 0000000..a4a983e --- /dev/null +++ b/tests/testthat/_snaps/plotting-pressure-testing/plot-diff.svg @@ -0,0 +1,1288 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +routine + + + + + + + + + + +YF + + + + + + + + + + +routine + + + + + + + + + + +HepB_BD + + + + + + + + + + +routine + + + + + + + + + + +MCV1 + + + + + + + + + + +routine + + + + + + + + + + +MCV2 + + + + + + + + + + +campaign + + + + + + + + + + +Measles + + + + + + + + + + +campaign + + + + + + + + + + +YF + + + + + + + + + + +routine + + + + + + + + + + +HepB + + + + + + + +299.8 +299.9 +300.0 +300.1 +300.2 + + + + + +299.8 +299.9 +300.0 +300.1 +300.2 + + + + +299.8 +300.0 +300.2 +300.4 + + + + + +299.8 +299.9 +300.0 +300.1 +300.2 + + + + + + +299.7 +299.8 +299.9 +300.0 +300.1 +300.2 + + + + + +299.8 +299.9 +300.0 +300.1 +300.2 + + + + + + +299.7 +299.8 +299.9 +300.0 +300.1 +300.2 +99.7 +99.8 +99.9 +100.0 +100.1 +100.2 + + + + + + +99.8 +99.9 +100.0 +100.1 +100.2 + + + + + +99.8 +99.9 +100.0 +100.1 +100.2 + + + + + +99.8 +100.0 +100.2 +100.4 + + + + +99.7 +99.8 +99.9 +100.0 +100.1 +100.2 + + + + + + +99.8 +99.9 +100.0 +100.1 +100.2 + + + + + +99.8 +99.9 +100.0 +100.1 +100.2 + + + + + +202310 - deaths_averted +201910 - deaths_averted +deaths_averted: Current vs Previous Report + + diff --git a/tests/testthat/_snaps/plotting-pressure-testing/plot-modelling-group-variation.svg b/tests/testthat/_snaps/plotting-pressure-testing/plot-modelling-group-variation.svg new file mode 100644 index 0000000..09035fa --- /dev/null +++ b/tests/testthat/_snaps/plotting-pressure-testing/plot-modelling-group-variation.svg @@ -0,0 +1,292 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +activity_type + + + + + + + +10 +2.4768 +10 +2.477 +10 +2.4772 +10 +2.4774 +10 +2.4776 +Measles +HepB +MCV2 +MCV1 +HepB_BD +YF + + + + + + +Burden averted (deaths) +Vaccine +plot_modelling_group_variation + + diff --git a/tests/testthat/_snaps/plotting-pressure-testing/plot-sig-diff.svg b/tests/testthat/_snaps/plotting-pressure-testing/plot-sig-diff.svg new file mode 100644 index 0000000..7fe5197 --- /dev/null +++ b/tests/testthat/_snaps/plotting-pressure-testing/plot-sig-diff.svg @@ -0,0 +1,236 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Ethiopia | HepB | routine | 2003 +Ethiopia | HepB | routine | 2004 +Ethiopia | HepB | routine | 2018 +Ethiopia | HepB | routine | 2019 +Ethiopia | HepB | routine | 2022 +Ethiopia | HepB | routine | 2026 +Ethiopia | HepB | routine | 2000 +Ethiopia | HepB | routine | 2002 +Ethiopia | HepB | routine | 2005 +Ethiopia | HepB | routine | 2006 +Ethiopia | HepB | routine | 2007 +Ethiopia | HepB | routine | 2008 +Ethiopia | HepB | routine | 2009 +Ethiopia | HepB | routine | 2010 +Ethiopia | HepB | routine | 2011 +Ethiopia | HepB | routine | 2012 +Ethiopia | HepB | routine | 2013 +Ethiopia | HepB | routine | 2014 +Ethiopia | HepB | routine | 2015 +Ethiopia | HepB | routine | 2020 +Ethiopia | HepB | routine | 2021 +Ethiopia | HepB | routine | 2023 +Ethiopia | HepB | routine | 2024 +Ethiopia | HepB | routine | 2025 +Ethiopia | HepB | routine | 2027 +Ethiopia | HepB | routine | 2028 +Ethiopia | HepB | routine | 2029 +Ethiopia | HepB | routine | 2030 +Ethiopia | HepB | routine | 2001 +Ethiopia | HepB | routine | 2016 +Ethiopia | HepB | routine | 2017 +Ethiopia | HepB_BD | routine | 2007 +Ethiopia | HepB_BD | routine | 2013 +Ethiopia | HepB_BD | routine | 2016 +Ethiopia | HepB_BD | routine | 2023 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +0 +50 +100 +150 +200 +Difference + +modelling_group + + + +IC-Hallett +Significant Differences in deaths_averted by Country, Vaccine, Activity Type and Year + + diff --git a/tests/testthat/_snaps/plotting-pressure-testing/plot-vaccine-gavi.svg b/tests/testthat/_snaps/plotting-pressure-testing/plot-vaccine-gavi.svg new file mode 100644 index 0000000..e19814f --- /dev/null +++ b/tests/testthat/_snaps/plotting-pressure-testing/plot-vaccine-gavi.svg @@ -0,0 +1,247 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +201910 + + + + + + + + + + +Difference + + + + + + + + + + +202310 + + + + + + +Measles +YF +MenA +HepB + + + + +Measles +YF +MenA +HepB + + + + +Measles +YF +MenA +HepB +0.0e+00 +5.0e+02 +1.0e+03 +1.5e+03 +2.0e+03 +2.5e+03 + + + + + + +0.0e+00 +5.0e+02 +1.0e+03 +1.5e+03 + + + + +0e+00 +2e+02 +4e+02 +6e+02 +8e+02 + + + + + +Disease +Impact - deaths_averted + +Year + + + + + + + + +2021 +2022 +2023 +2024 +plot_vaccine_gavi + + diff --git a/tests/testthat/test-helpers.R b/tests/testthat/test-helpers.R index 4c7f56d..e6b277b 100644 --- a/tests/testthat/test-helpers.R +++ b/tests/testthat/test-helpers.R @@ -11,3 +11,122 @@ test_that("`make_novax_scenario()` works", { file_dict_colnames ) }) + +test_that("Adaptive rounding works", { + expect_number( + adaptive_round(runif(1)) + ) + expect_numeric( + adaptive_round(runif(10)) + ) + + large_num <- 1.346 + expected_num <- 1.3 + expect_identical( + adaptive_round(large_num), + expected_num + ) + + small_num <- 0.345 + expected_num <- 0.34 + expect_identical( + adaptive_round(small_num), + expected_num + ) + + # round numeric + df <- data.frame( + year_but_complex = 2010:2015 + 0.35, + num = 1.346 + ) + expect_data_frame(round_numeric(df)) + df_new <- round_numeric(df) + + # expect year not rounded + expect_identical( + df_new$year_but_complex, + df$year_but_complex + ) + + expect_identical( + df_new$num, + adaptive_round(df_new$num) + ) +}) + +test_that("`validate_ts_year()`: Validating touchstone works", { + ts <- "202010" + expect_number( + validate_ts_year(ts), + lower = 200001, + upper = 210012 + ) + + expect_error( + validate_ts_year(202010), + "should be a string" + ) + + ts <- "2020" + expect_error( + validate_ts_year(ts), + "at least 6 characters" + ) + + ts <- "199910" + expect_error( + validate_ts_year(ts), + "expected an year in the range \\[2000, 2100\\]" + ) + + ts <- "220010" + expect_error( + validate_ts_year(ts), + "expected an year in the range \\[2000, 2100\\]" + ) + + ts <- "202019" + expect_error( + validate_ts_year(ts), + "expected a month in the range \\[1, 12\\]" + ) +}) + +test_that("`add_campaign_id()`: Adding campaign identifier works", { + df <- data.frame( + key1 = c("a", "b", "c", "a", "b", "c"), + key2 = letters[1:6] + ) + + df_new <- add_campaign_id(df, c("key1", "key2")) + expect_data_frame(df_new) + expect_names(names(df_new), must.include = "campaign_id") + expect_identical( + unique(df_new$campaign_id), + 1L + ) + + df_new <- add_campaign_id(df, c("key1")) + expect_identical( + unique(df_new$campaign_id), + c(1L, 2L) + ) + + expect_error( + add_campaign_id("df", "key"), + "Must be of type 'data.frame'" + ) + expect_error( + add_campaign_id(df, letters), + "Must have at least 26 cols" + ) + expect_error( + add_campaign_id(df, c("a", "b")), + "(columns)*(are missing)" + ) + + expect_error( + add_campaign_id(df, 1:2), + "(key_cols)*(Must be of type 'character')" + ) +}) diff --git a/tests/testthat/test-impact_diagnostics_filtering.R b/tests/testthat/test-impact_diagnostics_filtering.R new file mode 100644 index 0000000..db0cc00 --- /dev/null +++ b/tests/testthat/test-impact_diagnostics_filtering.R @@ -0,0 +1,186 @@ +# each test uses a scoped copy of impact data +test_that("`filter_recent_ts()`: Filtering by touchstone works", { + df <- eg_impact + df$touchstone <- DEF_TOUCHSTONE_NEW + test_scenario_types <- rep( + c("default", "dummy"), + each = nrow(df) / 2 + ) + df$scenario_type <- test_scenario_types + + # touchstone is most recent one - expect filtering + expect_data_frame( + filter_recent_ts(df) + ) + # half rows are excluded due to scenario name + expect_equal( + nrow(filter_recent_ts(df)), + nrow(df) / 2L + ) + + # touchstone is older - no filtering + df$touchstone <- DEF_TOUCHSTONE_OLD + + expect_data_frame( + filter_recent_ts(df) + ) + expect_equal( + nrow(filter_recent_ts(df)), + nrow(df) + ) + + # warnings and errors + # bad df input + expect_error( + filter_recent_ts("df"), + "Must be of type 'data.frame'" + ) + expect_error( + filter_recent_ts(data.frame()) + ) + expect_error( + filter_recent_ts(data.frame(year = 2026)) + ) + expect_error( + filter_recent_ts(data.frame(year = 2026, disease = "YF")), + "Names must include the elements \\{'touchstone'\\}" + ) + expect_error( + filter_recent_ts(data.frame(touchstone = 2026, year = 2026)), + "(Touchstone year should be a string with at least 6)*(characters)" + ) + + # bad touchstone + expect_error( + filter_recent_ts(df, as.numeric(DEF_TOUCHSTONE_NEW)), + "(Touchstone year should be a string with at least 6)*(characters)" + ) +}) + +# each test uses a scoped copy of impact data +test_that("`filter_excluded_diseases_ts()`: Filtering by disease works", { + df <- eg_impact + df$touchstone <- DEF_TOUCHSTONE_NEW + + # replace known rows with an exlcuded disease + n_replaced <- 100L + df[1:n_replaced, "disease"] <- EXCLUDED_DISEASES[1L] + + # touchstone at/above threshold - expect no filtering + expect_data_frame( + filter_excluded_diseases_ts(df) + ) + expect_equal( + nrow(filter_excluded_diseases_ts(df)), + nrow(df) + ) + + # touchstone is older than threshold - expect filtering + df$touchstone <- DEF_TOUCHSTONE_OLD + expect_equal( + nrow(filter_excluded_diseases_ts(df, DEF_TOUCHSTONE_NEW)), + nrow(df) - n_replaced + ) + + # warnings and errors + # bad df input + expect_error( + filter_excluded_diseases_ts("df"), + "Must be of type 'data.frame'" + ) + expect_error( + filter_excluded_diseases_ts(data.frame()) + ) + expect_error( + filter_excluded_diseases_ts(data.frame(year = 2026)), + "Names must include the elements \\{'touchstone'\\}" + ) + expect_error( + filter_excluded_diseases_ts(data.frame(touchstone = 2026)), + "(Touchstone year should be a string with at least 6)*(characters)" + ) + + # bad touchstone + expect_error( + filter_excluded_diseases_ts(df, as.numeric(DEF_TOUCHSTONE_NEW)), + "(Touchstone year should be a string with at least 6)*(characters)" + ) +}) + +test_that("`flag_duplicates()`: Flagging duplicates works", { + df <- eg_impact + expect_warning( + flag_duplicates(df), + "duplicates found in data" + ) + expect_data_frame( + suppressWarnings(flag_duplicates(df)) + ) + + expect_equal( + nrow(suppressWarnings(flag_duplicates(df))), + nrow(df) + ) + + expect_true( + "n_key" %in% colnames(suppressWarnings(flag_duplicates(df))) + ) + + # errors + expect_error( + flag_duplicates("df"), + "Must be of type 'data.frame'" + ) + expect_error( + flag_duplicates(df, 1:100), + "Must have at least 100 cols" + ) + expect_error( + flag_duplicates(df, 1:6), + "'key_cols' failed: Must be of type 'character'" + ) + expect_error( + flag_duplicates( + data.frame(year = 2026), + "vaccine" + ), + "(Expected `df` to have columns)*(but columns)*(were missing)" + ) +}) + +# TODO: how should this be tested? +test_that("`filter_invalid_trajectories()`: Filtering impact trends works", { + prev_df <- suppressWarnings(flag_duplicates(eg_impact)) + prev_df <- dplyr::filter(prev_df, n_key == 1) + prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" + ) + prev_df$support_type <- "other" # unsure what values this can take + prev_df$coverage <- 0.5 + prev_df$deaths_averted <- 1e3 + prev_df$dalys_averted <- 1e6 + + # assign dummy values + curr_df <- prev_df + curr_df$deaths_averted <- NA_real_ + curr_df$dalys_averted <- NA_real_ + + expect_data_frame( + filter_invalid_trajectories( + curr_df, + prev_df + ) + ) + expect_equal( + nrow( + filter_invalid_trajectories( + curr_df, + prev_df + ) + ), + nrow(curr_df) + ) +}) diff --git a/tests/testthat/test-impact_diffs.R b/tests/testthat/test-impact_diffs.R new file mode 100644 index 0000000..f4da073 --- /dev/null +++ b/tests/testthat/test-impact_diffs.R @@ -0,0 +1,269 @@ +test_that("`gen_national_iqr()`: Generating impact IQR works", { + df <- suppressWarnings(flag_duplicates(eg_impact)) + df <- dplyr::filter(df, n_key == 1) + df <- tidyr::pivot_wider( + df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" + ) + + expect_data_frame( + gen_national_iqr(df) + ) + expect_data_frame( + gen_national_iqr(df, value_cols = "deaths_averted") + ) + expect_data_frame( + gen_national_iqr(df, value_cols = "deaths_averted") + ) + + group_cols <- c("country", "vaccine", "activity_type") + value_cols <- c("deaths_averted", "dalys_averted") + prefix <- "xyz_prefix" + df_iqr <- gen_national_iqr( + df, + group_cols, + value_cols, + prefix + ) + + expect_names( + names(df_iqr), + must.include = c(group_cols, sprintf("%s_%s", prefix, value_cols)) + ) + + # check for errors + expect_error( + gen_national_iqr("df"), + "Must be of type 'data.frame'" + ) + expect_error( + gen_national_iqr(data.frame(year = 2026)), + "Must have at least 5 cols" + ) + expect_error( + gen_national_iqr( + as.data.frame(as.list(1:10)) + ), + "Names must include" + ) + expect_error( + gen_national_iqr( + df, + 1:10 + ), + "(group_cols)*(Must be of type 'character')" + ) + expect_error( + gen_national_iqr( + df, + value_cols = "dummy_value_col" + ), + "(value_cols)*(has additional elements)*(dummy_value_col)" + ) + expect_error( + gen_national_iqr(df, prefix = 1L), + "Must be of type 'string'" + ) +}) + +test_that("`generate_diffs()`: Generating differences works", { + prev_df <- suppressWarnings(flag_duplicates(eg_impact)) + prev_df <- dplyr::filter(prev_df, n_key == 1) + prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" + ) + prev_df$support_type <- "other" # unsure what values this can take + prev_df$coverage <- 0.5 + + # assign dummy values + curr_df <- prev_df + curr_df$deaths_averted <- 1e3 + curr_df$dalys_averted <- 1e6 + + interest_cols <- c("deaths_averted", "dalys_averted") + difflist <- suppressWarnings( + generate_diffs( + prev_df, + curr_df, + interest_cols + ) + ) + expect_list( + difflist, + names = "unique" + ) + expect_names( + names(difflist), + permutation.of = interest_cols + ) + + # errors and warnings + expect_error( + generate_diffs("df"), + "Must be of type 'data.frame'" + ) + expect_error( + generate_diffs(data.frame(year = 2026)), + "Must have at least 14 cols" + ) + expect_error( + generate_diffs(prev_df, "df"), + "Must be of type 'data.frame'" + ) + expect_error( + generate_diffs(prev_df, data.frame(year = 2026)), + "Must have at least 14 cols" + ) + expect_error( + generate_diffs( + prev_df, + curr_df, + 1:10 + ), + "(interest_cols)*(Must be of type 'character')" + ) + expect_error( + generate_diffs( + prev_df, + curr_df, + key_cols = 1:10 + ), + "(key_cols)*(Must be of type 'character')" + ) + expect_error( + generate_diffs( + as.data.frame(as.list(1:14)), + curr_df + ), + "(colnames\\(prev_df\\))*(Names must include)" + ) + expect_error( + generate_diffs( + prev_df, + as.data.frame(as.list(1:14)) + ), + "(colnames\\(curr_df\\))*(Names must include)" + ) + expect_error( + generate_diffs( + prev_df, + curr_df, + touchstone = "999999" + ) + ) +}) + + +test_that("`flag_large_diffs()`: Flagging large diffs works", { + prev_df <- suppressWarnings(flag_duplicates(eg_impact)) + prev_df <- dplyr::filter(prev_df, n_key == 1) + prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" + ) + prev_df$support_type <- "other" # unsure what values this can take + prev_df$coverage <- 0.5 + + # assign dummy values + curr_df <- prev_df + curr_df$deaths_averted <- 1e6 + curr_df$dalys_averted <- 1e9 + + interest_cols <- c("deaths_averted", "dalys_averted") + changes <- suppressWarnings(generate_diffs( + prev_df, + curr_df, + interest_cols + )) + + # national IQR - inset dummy values for tests + national_iqr <- gen_national_iqr(prev_df) + national_iqr$national_iqr_deaths_averted <- seq_len(nrow(national_iqr)) + + expect_data_frame( + flag_large_diffs( + changes, + national_iqr, + "deaths_averted" + ) + ) + expect_data_frame( + flag_large_diffs( + changes, + national_iqr, + "dalys_averted" + ) + ) + + # check touchstones added + tstone_old <- "208801" + tstone_new <- "209901" + diffs <- flag_large_diffs( + changes, + national_iqr, + "deaths_averted", + touchstone_old = tstone_old, + touchstone_new = tstone_new + ) + expect_names( + names(diffs), + must.include = c(tstone_old, tstone_new) + ) +}) + +test_that("`gen_combined_df()`: Generating combined data works", { + prev_df <- suppressWarnings(flag_duplicates(eg_impact)) + prev_df <- dplyr::filter(prev_df, n_key == 1) + prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" + ) + prev_df$support_type <- "other" # unsure what values this can take + prev_df$coverage <- 0.5 + prev_df$fvps <- 1e6 + prev_df$target_population <- 2e6 + prev_df$touchstone <- "202010" + + # assign dummy values + curr_df <- prev_df + curr_df$deaths_averted <- 1e6 + curr_df$dalys_averted <- 1e9 + curr_df$touchstone <- "202310" + + expect_data_frame( + gen_combined_df( + prev_df, + curr_df + ) + ) + expect_names( + names( + gen_combined_df( + prev_df, + curr_df + ) + ), + must.include = sprintf( + "%s_%s", + c("deaths_averted", "dalys_averted"), + c("old", "new") + ) + ) + + # check error on touchstone + prev_df$touchstone <- NULL + curr_df$touchstone <- NULL + expect_error( + gen_combined_df(prev_df, curr_df), + "(Names)*(is missing elements)*(touchstone)" + ) +}) diff --git a/tests/testthat/test-natl_subreg_comparison.R b/tests/testthat/test-natl_subreg_comparison.R new file mode 100644 index 0000000..8194391 --- /dev/null +++ b/tests/testthat/test-natl_subreg_comparison.R @@ -0,0 +1,65 @@ +# TODO: unsure how this should be tested with data provided +test_that("`compare_natl_subreg()`: Comparing national-subregional works", { + df <- suppressWarnings(flag_duplicates(eg_impact)) + df <- dplyr::filter(df, n_key == 1) + df <- tidyr::pivot_wider( + df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" + ) + # prev_df$support_type <- "other" # unsure what values this can take + # prev_df$coverage <- 0.5 + + # assign dummy values + df$deaths_averted <- 1e3 + df$dalys_averted <- 1e6 + + df <- dplyr::left_join( + df, + who_subregions, + by = c("country", "country_name") + ) + + expect_data_frame( + compare_natl_subreg(df) + ) + expect_names( + names(compare_natl_subreg(df)), + must.include = c( + "country_name", + "vaccine", + "year", + "modelling_group", + "national_value", + "subregional_mean", + "subregional_iqr", + "difference", + "iqr_score" + ) + ) + + # errors and warnings + expect_error( + compare_natl_subreg("df"), + "Must be of type 'data.frame'" + ) + expect_error( + compare_natl_subreg(data.frame(year = 2026)), + "Must have at least 10 cols" + ) + expect_error( + compare_natl_subreg( + as.data.frame(as.list(1:10)) + ), + "Names must include the elements" + ) + expect_error( + compare_natl_subreg(df, "dummy_outcome"), + "`outcome` must be one of" + ) + expect_error( + compare_natl_subreg(df, activity_filter = "dummy_activity"), + "`activity_filter` must be one of" + ) +}) diff --git a/tests/testthat/test-plotting-pressure-testing.R b/tests/testthat/test-plotting-pressure-testing.R new file mode 100644 index 0000000..a27dead --- /dev/null +++ b/tests/testthat/test-plotting-pressure-testing.R @@ -0,0 +1,95 @@ +# preparatory code +prev_df <- suppressWarnings(flag_duplicates(eg_impact)) +prev_df <- dplyr::filter(prev_df, n_key == 1) +prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" +) +prev_df$support_type <- "other" # unsure what values this can take +prev_df$coverage <- 0.5 +prev_df$fvps <- 1e6 +prev_df$target_population <- 2e6 + +prev_df$deaths_averted <- withr::with_seed( + 1, + rnorm(nrow(prev_df), 100, 0.1) +) +prev_df$dalys_averted <- prev_df$deaths_averted * 100 +prev_df$touchstone <- "202010" + +# assign dummy values +curr_df <- prev_df +curr_df$deaths_averted <- withr::with_seed( + 1, + rnorm(nrow(prev_df), 300, 0.1) +) +curr_df$dalys_averted <- curr_df$deaths_averted * 100 +curr_df$touchstone <- "202110" + +interest_cols <- c("deaths_averted", "dalys_averted") +changes <- suppressWarnings(generate_diffs( + prev_df, + curr_df, + interest_cols +)) + +# national IQR - inset dummy values for tests +national_iqr <- gen_national_iqr(prev_df) +national_iqr$national_iqr_deaths_averted <- seq_len(nrow(national_iqr)) + +test_that("plot_sig_diff() works", { + df_plot <- flag_large_diffs(changes, national_iqr) + + p <- plot_sig_diff(df_plot) + + expect_class(p, "ggplot") + vdiffr::expect_doppelganger("plot_sig_diff", p) +}) + +test_that("plot_diff() works", { + df_plot <- gen_combined_df(prev_df, curr_df) + + p <- plot_diff(df_plot) + + expect_class(p, "ggplot") + vdiffr::expect_doppelganger("plot_diff", p) +}) + +test_that("plot_modelling_group_variation() works", { + prev_df <- dplyr::select(curr_df, vaccine, modelling_group) %>% + dplyr::distinct() %>% + dplyr::group_by(vaccine) %>% + dplyr::mutate(mod_num = dplyr::row_number()) + + df_plot <- prep_plot_mod_grp_varn(curr_df, prev_df) + + p <- plot_modelling_group_variation(df_plot) + + expect_class(p, "ggplot") + vdiffr::expect_doppelganger("plot_modelling_group_variation", p) +}) + +test_that("plot_vaccine_gavi() works", { + df_plot <- prep_plot_vax_gavi(curr_df, prev_df, "deaths_averted") + + p <- plot_vaccine_gavi(df_plot) + + expect_class(p, "ggplot") + vdiffr::expect_doppelganger("plot_vaccine_gavi", p) +}) + +test_that("plot_cumul() works", { + combined_df <- gen_combined_df(prev_df, curr_df) + + # NOTE: warnings probably generated due to use of dummy data + df_plot <- suppressWarnings( + prep_plot_cumul(combined_df, "deaths_averted", "Measles") + ) + + p <- plot_cumul(df_plot) + + expect_class(p, "ggplot") + vdiffr::expect_doppelganger("plot_cumul", p) +}) diff --git a/vignettes/design_decisions.Rmd b/vignettes/design_decisions.Rmd new file mode 100644 index 0000000..638f786 --- /dev/null +++ b/vignettes/design_decisions.Rmd @@ -0,0 +1,104 @@ +--- +title: "Package design decisions" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Package design decisions} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This page documents _vimcheck_ package design decisions as a guide to users, and to potential contributors seeking to extend this package or revise some of those decisions. + +Some design decisions that live in the background include: + +- _vimcheck_ is set up as a modern R package built with _devtools_, _usethis_, and _testthat_ (testing); +- _vimcheck_ is documented using _roxygen2_ and _pkgdown_ for the package website; +- _vimcheck_ is developed on GitHub and uses continuous integration workflows provided by GitHub Actions; +- Any other decisions not covered here most likely follow advice in [Wickham and Bryan's _R Packages_](https://r-pkgs.org/). + +## Big picture + +_vimcheck_ is intended to collect, house, and be the single source for functionality used by VIMC to check submitted modelling outputs for discrepancies. +This solves the immediate problem that this functionality is currently spread and repeated over multiple reports, increasing the potential for discrepancies and functionality drift in the tools. +The overall goal is to improve the quality of VIMC's work by improving the reliability of VIMC outputs. +The main users are currently intended to be members of the VIMC Secretariat, but may include VIMC consortium members in future. + +## Function organisation + +_vimcheck_ is currently developed in bursts, with each burst so far adding a set of data wrangling and plotting functions taken from a specific VIMC report. + +The package has two axes of organisation for its functionality: the theme or goal of the report from which the function comes, and what the function does. + +The current reports from which functions have been taken relate to: + +- Diagnostics on burden estimates provided by VIMC modelling groups, and +- Pressure testing diagnostics intended to check for outliers in vaccine impact estimates. + +Functions are split into four categories, and the general idea is to have functionality be modular. +As an example, _vimcheck_ favours functions that produce intermediate products that can be reused by multiple downstream functions (following the [DRY principle](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself)). + +- Functions that work on data: This is the main functionality of the package and includes functions that work with raw or semi-processed data, such as burden or impact estimates; +- Functions that prepare data for plotting: These are functions that sit between the data wrangling functions and plotting functions; +- Plotting functions that are typically related to one upstream data-wrangling or plotting-preparation function; +- Helper functions that provide useful but miscellaneous functionality. + +The general idea is to be able to set up small R pipelines within reports of the following form. + +```r +# some data read in from a local source +data |> + fn_wrangle_data() |> + fn_prep_data_for_plotting() |> + fn_plot_data() +``` + +The [function reference in the documentation](https://vimc.github.io/vimcheck/reference/index.html) is organised similarly. + +The R source code files in `./R/` are also organised in this way; for example `R/fn_burden_diagnostics.R` holds data-wrangling functions related to burden estimates, `R/fn_plotting_prep_bur_diag.R` holds functions to prepare wrangled data for plotting, and `R/fn_plotting_burden_diagnostics.R` holds plotting functions for the prepared data. + +## Package data + +_vimcheck_ includes some package data which is used to demonstrate and test its functionality. +Some data is purely dummy data that follows the structure of data seen in VIMC reports. +However, some data such as [eg_impact] is real VIMC data that has been released publicly as part of other packages. + +There is a number of package constants, which are single values or small vectors that are provided with and exported from the package. + +## Package dependencies + +We only list notable dependencies here. + +- [Tidyverse packages](https://tidyverse.org/) over base R or _data.table_; this is to keep functions within the dependency framework used in the reports from which they come --- we assume the report writers are also the package user-base and _vimcheck_ aims to be used, and friendly to use, for these people. + +- _cli_ and _glue_ for string interpolation and printing error messages to screen. + +- _diffdf_ to provide differences between data.frames. + +- _ggplot2_ for plotting; functions are not explicitly namespaced in many cases, but imported from the package to reduce code clutter in plotting function files. + +- _checkmate_ for input checking and to extend _testthat_. + +- [_vdiffr_](https://cran.r-project.org/package=vdiffr) for snapshot tests of plotting functions. + +### Data frames and tibbles + +Data-wrangling functions are agnostic to the type of tabular input, but always return a [tibble](https://tibble.tidyverse.org/) rather than a plain data.frame (if they return tabular data). +This is because internal manipulation using Tidyverse functions often results in tibbles being produced (e.g. using `tidyr::pivot_*()`, or `dplyr::group_by()` followed by ungrouping), but inexplicably some Tidyverse functions preserve data.frames. +We think it is preferable for users and developers to have a uniform function output type rather than have to guess whether it will be a tibble or a data.frame. +A second reason is that tibbles are much easier to read when printed to screen. + +**Note that** all downstream functions --- plotting preparation and plotting functions --- that expect tabular data expect a tibble, and **will error** if they are not passed a tibble! +This is partially to create some friction so that users check what they are passing: data processed with _vimcheck_ will always return a tibble, downstream functions only work on processed data, and errors might indicate that the wrong data are being passed. + +## Testing + +_vimcheck_ function are tested using package data (see above). +As a result, tests focus on input checking and the form of outputs. +There are comparatively few tests on correctness (e.g. are output numbers within a range), and this is a clear avenue for further development. diff --git a/vignettes/pressure_testing.Rmd b/vignettes/pressure_testing.Rmd new file mode 100644 index 0000000..e19271a --- /dev/null +++ b/vignettes/pressure_testing.Rmd @@ -0,0 +1,332 @@ +--- +title: "Pressure testing functions" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Pressure testing functions} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This vignette shows how to use functions added from the pressure testing report. + +```{r setup} +library(vimcheck) +library(dplyr) +library(tidyr) +``` + +## Example data + +Example impact data are taken from [_vimpact_](https://github.com/vimc/vimpact) and included in the package as `eg_impact`. +This dataset holds projections for four countries, four diseases, and three modelling groups; combinations are shown below. + +```{r impact_data} +eg_impact + +# check combinations +distinct(eg_impact, country_name, disease, modelling_group) +``` + +Data on WHO regions is provided as `who_subregions` to enable comparing countries with their regions. + +```{r who_regions} +who_subregions +``` + +## Filtering impact data + +Impact data can be filtered, or flagged for filtering, in different ways. + +### Filtering on touchstone + +Data can be filtered on touchstone using `filter_recent_ts()`; rows with `scenario_type` matching "default" are retained. + +Some useful default touchstone values are `DEF_TOUCHSTONE_NEW` (`r DEF_TOUCHSTONE_NEW`), `DEF_TOUCHSTONE_OLD` (`r DEF_TOUCHSTONE_OLD`), and `DEF_TOUCHSTONE_OLD_OLD` (`r DEF_TOUCHSTONE_OLD_OLD`) + +```{r filter_ts} +# assign dummy touchstones and scenario type for demo +df <- eg_impact +df$touchstone <- "202401" + +test_scenario_types <- rep( + c("default", "dummy"), + each = nrow(df) / 2 +) +df$scenario_type <- test_scenario_types + +# use a package default touchstone +DEF_TOUCHSTONE_NEW + +# touchstone filtering is applied to all non-default scenario rows +filter_recent_ts(df, DEF_TOUCHSTONE_NEW) +``` + +### Filtering on diseases + +Data can be filtered to exclude a fixed set of diseases, if the touchstone is older than a threshold value, using `filter_excluded_diseases_ts()`. + +The excluded diseases are stored as the package constant `EXCLUDED_DISEASES` (`r cli::cli_text("{.str {EXCLUDED_DISEASES}}")`). + +```{r filter_diseases} +# make a copy and add dummy disease values +df_copy <- df +df_copy$disease <- rep( + EXCLUDED_DISEASES, each = nrow(df_copy) / length(EXCLUDED_DISEASES) +) + +# pass dummy touchstone to filter out all rows +filter_excluded_diseases_ts(df_copy, "202501") +``` + +### Flagging duplicates + +Duplicated rows in the data can be identifier by adding a flag variable column `n_key`, using `flag_duplicates()`. + +Duplicates are identified across columns specified by the argument `key_cols`, which defaults to `r cli::cli_text("{.str {COLNAMES_KEY_PRESSURE_TEST}}")`. + +```{r flag_dups} +# view n_keys column +flag_duplicates(eg_impact) %>% + select( + modelling_group, country, disease, + vaccine, activity_type, year, n_key + ) +``` + +### Filtering invalid trajectories + +The function `filter_invalid_trajectories()` can be used to remove rows where outcome values are missing in one of two paired datasets (each ideally from a different touchstone). + +This function should be applied to data which has the impact outcomes as columns (i.e., in wide format), with duplicates removed. + +The outcome to check is specified by the argument `"outcome"`. + +```{r filter_traj} +# create some dummy data from exampled data +prev_df <- flag_duplicates(eg_impact) +prev_df <- dplyr::filter(prev_df, n_key == 1) +prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" +) +prev_df$support_type <- "other" # unsure what values this can take +rows <- nrow(prev_df) +prev_df$coverage <- 0.5 + +prev_df$deaths_averted <- withr::with_seed( + 1, sample(c(5e2, NA_real_), rows, TRUE) +) +prev_df$dalys_averted <- withr::with_seed( + 1, sample(c(5e5, NA_real_), rows, TRUE) +) + +# assign dummy values +curr_df <- prev_df +curr_df$deaths_averted <- 1e3 +curr_df$dalys_averted <- 1e6 + +# View data with invalid trajectories removed +filter_invalid_trajectories(prev_df, curr_df, "deaths_averted") +``` + +## Identifying differences between datasets + +This section provides a general demonstration of tools that help to identify differences between two paired datasets. + +### Generating differences + +The function `generate_diffs()` uses the [_diffdf_ package](https://cran.r-project.org/package=diffdf) to identify differences between two data frames. +The output is a list of tibbles with the added column `VARIABLE` for the column examined for differences, with the baseline and comparator as `BASE` and `COMPARE`. + +```{r gen_diffs} +# use prev_df from section above +prev_df$deaths_averted <- withr::with_seed( + 1, rnorm(rows, 1e3, 100) +) +prev_df$dalys_averted <- withr::with_seed( + 1, rnorm(rows, 1e6, 100) +) + +# assign dummy values +curr_df <- prev_df +curr_df$deaths_averted <- prev_df$deaths_averted * 2 +curr_df$dalys_averted <- prev_df$dalys_averted * 2 + +interest_cols <- c("deaths_averted", "dalys_averted") +difflist <- generate_diffs( + prev_df, + curr_df, + interest_cols +) + +# all rows are different - view the output types +names(difflist) + +difflist +``` + +### Generate national IQRs + +The function `generate_national_iqr()` generates the inter-quartile range of the impact outcome for a dataset. + +```{r gen_iqr} +# assign dummy values to check functionality +df <- prev_df +df$deaths_averted <- withr::with_seed( + 1, rnorm(rows, 1e3, 100) +) + +iqr_df <- gen_national_iqr(df, value_cols = "deaths_averted") + +iqr_df +``` + +### Flag large differences + +The function `flag_large_diffs()` can be used with the outputs of `generate_diffs()` and `gen_national_iqr()` to find rows where the impact estimate is outside the range expected, given the IQR. + +```{r flag_diffs} +# assign some dummy values that will trigger flagging +difflist2 <- difflist +difflist2$deaths_averted$COMPARE <- 1e9 # typical values for BASE are ~1000 + +# all rows are flagged as having differences > IQR +flag_large_diffs(difflist2, iqr_df, "deaths_averted") +``` + +### Generate a combined dataset + +`gen_combined_df()` can be used to generate a combined dataset across two different touchstones. + +```{r gen_combined} +# regenerate data +prev_df <- flag_duplicates(eg_impact) +prev_df <- dplyr::filter(prev_df, n_key == 1) +prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" +) +prev_df$support_type <- "other" # unsure what values this can take +prev_df$coverage <- 0.5 +prev_df$fvps <- 1e6 +prev_df$target_population <- 2e6 +prev_df$touchstone <- "202010" + +# assign dummy values +curr_df <- prev_df +curr_df$deaths_averted <- 1e6 +curr_df$dalys_averted <- 1e9 +curr_df$touchstone <- "202110" + +gen_combined_df(prev_df, curr_df) +``` + +## Comparing national values to regional values + +`compare_natl_subreg()` allows comparing national impact rates with regional rates, where regions are the WHO regions. + +There is no example for this functionality as yet. + +## Plotting functions + +This section covers plotting functions. + +First we prepare some dummy data for plotting. + +```{r plot_prep} +# preparatory code with dummy data +prev_df <- flag_duplicates(eg_impact) +prev_df <- dplyr::filter(prev_df, n_key == 1) +prev_df <- tidyr::pivot_wider( + prev_df, + id_cols = {{ COLNAMES_KEY_PRESSURE_TEST }}, + names_from = "burden_outcome", + values_from = "impact" +) +prev_df$support_type <- "other" # unsure what values this can take +prev_df$coverage <- 0.5 +prev_df$fvps <- 1e6 +prev_df$target_population <- 2e6 + +prev_df$deaths_averted <- withr::with_seed( + 1, + rnorm(nrow(prev_df), 100, 0.1) +) +prev_df$dalys_averted <- prev_df$deaths_averted * 100 +prev_df$touchstone <- "202010" + +# assign dummy values +curr_df <- prev_df +curr_df$deaths_averted <- withr::with_seed( + 1, + rnorm(nrow(prev_df), 300, 0.1) +) +curr_df$dalys_averted <- curr_df$deaths_averted * 100 +curr_df$touchstone <- "202110" + +interest_cols <- c("deaths_averted", "dalys_averted") +changes <- generate_diffs( + prev_df, + curr_df, + interest_cols +) + +# national IQR - inset dummy values for tests +national_iqr <- gen_national_iqr(prev_df) +national_iqr$national_iqr_deaths_averted <- seq_len(nrow(national_iqr)) +``` + +### Plotting significant differences + +Find and flag large diffs using `flag_large_diffs()` and visualise the output using `plot_sig_diff()`. + +```{r plot_sig_diff} +flag_large_diffs(changes, national_iqr) |> + plot_sig_diff() +``` + +### Plotting modelling group variation + +Visualise variation in impact by modelling group using `plot_modelling_group_variation()`. + +Data should be prepared using `prep_plot_mod_grp_varn()` first. + +```{r plot_mod_grp_var} +prev_df_copy <- dplyr::select(curr_df, vaccine, modelling_group) %>% + dplyr::distinct() %>% + dplyr::group_by(vaccine) %>% + dplyr::mutate(mod_num = dplyr::row_number()) + +prep_plot_mod_grp_varn(curr_df, prev_df_copy) |> + plot_modelling_group_variation() +``` + +### Plotting GAVI vaccination + +Use `plot_vaccine_gavi()` on data that has been prepared using `prep_plot_vax_gavi()`. + +```{r plot_gavi} +prep_plot_vax_gavi(curr_df, prev_df, "deaths_averted") |> + plot_vaccine_gavi() +``` + +### Plot cumulative values + +Use `plot_cumul()` on data prepared using `prep_plot_cumul()` and `gen_combined_df()`. + +```{r plot_cumul} +gen_combined_df(prev_df, curr_df) |> + prep_plot_cumul("deaths_averted", "Measles") |> + plot_cumul() +``` \ No newline at end of file