diff --git a/.github/instructions/config-method.instructions.md b/.github/instructions/config-method.instructions.md index e78c5b04..8477d699 100644 --- a/.github/instructions/config-method.instructions.md +++ b/.github/instructions/config-method.instructions.md @@ -71,7 +71,7 @@ runners: ## Arguments -- Do **not** set `default` on any argument — defaults belong to the library, not the config. Use `example` to document a typical value. +- Do **not** set `default` on tuning/hyperparameter arguments — defaults belong to the library, not the config. Use `example` to document a typical value. Exception: if an argument is a variant-defining parameter that must always have a value, use `default` to ensure it is always set. - Use `info.test_default` to override a parameter value **only during `viash test`** (not in benchmarks). This is useful to reduce epoch counts, disable slow quality checks, etc., so tests run quickly without affecting real benchmark results. - Argument names use `--snake_case`. Viash exposes them in the script as `par['snake_case']` (Python) or `par$snake_case` (R). - After adding, removing, or renaming any argument, regenerate the `## VIASH START` block in the script so the `par` dict stays in sync: diff --git a/.github/instructions/method-scripts-r.instructions.md b/.github/instructions/method-scripts-r.instructions.md index 0605889e..512ecaee 100644 --- a/.github/instructions/method-scripts-r.instructions.md +++ b/.github/instructions/method-scripts-r.instructions.md @@ -40,12 +40,12 @@ This block is **auto-generated** by viash from the component's `config.vsh.yaml` **Method: reading input:** ```r -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) ``` **Method: writing simulated dataset output:** ```r -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = simulated_counts # integer matrix, cells x genes ), @@ -63,14 +63,14 @@ output$write_h5ad(par$output, compression = "gzip") **Metric: reading inputs:** ```r -input_spatial_dataset <- anndata::read_h5ad(par[["input_spatial_dataset"]]) -input_singlecell_dataset <- anndata::read_h5ad(par[["input_singlecell_dataset"]]) -input_simulated_dataset <- anndata::read_h5ad(par[["input_simulated_dataset"]]) +input_spatial_dataset <- anndataR::read_h5ad(par[["input_spatial_dataset"]]) +input_singlecell_dataset <- anndataR::read_h5ad(par[["input_singlecell_dataset"]]) +input_simulated_dataset <- anndataR::read_h5ad(par[["input_simulated_dataset"]]) ``` **Metric: writing score output:** ```r -output <- anndata::AnnData( +output <- anndataR::AnnData( uns = list( dataset_id = input_simulated_dataset$uns[["dataset_id"]], method_id = input_simulated_dataset$uns[["method_id"]], diff --git a/src/control_methods/negative_normal/config.vsh.yaml b/src/control_methods/negative_normal/config.vsh.yaml index 73c2559f..72118cec 100644 --- a/src/control_methods/negative_normal/config.vsh.yaml +++ b/src/control_methods/negative_normal/config.vsh.yaml @@ -4,8 +4,8 @@ name: negative_normal label: negative_normal summary: A negative control which generates normal distributed data. description: | - This control method generates normal distributed data as a negative control. - The mean and the sd are defined by the mean and sd of the input data. + This control method generates normally distributed data as a negative control, + using a fixed mean of 3 and standard deviation of 1. resources: - type: r_script diff --git a/src/control_methods/negative_normal/script.R b/src/control_methods/negative_normal/script.R index a58caad7..b113141d 100644 --- a/src/control_methods/negative_normal/script.R +++ b/src/control_methods/negative_normal/script.R @@ -9,7 +9,7 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) # generate random values n_rows <- nrow(input) @@ -21,7 +21,7 @@ values <- rnorm(n = n_rows * n_cols, mean = 3, sd = 1) values[values < 0] <- abs(values[values < 0]) cat("Generate outoput file\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = matrix(values, nrow = n_rows, ncol = n_cols) ), diff --git a/src/control_methods/negative_shuffle/script.R b/src/control_methods/negative_shuffle/script.R index e3b8e14f..8181f29d 100644 --- a/src/control_methods/negative_shuffle/script.R +++ b/src/control_methods/negative_shuffle/script.R @@ -9,7 +9,7 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) count_matrix <- as.matrix(input$layers[['counts']]) @@ -19,7 +19,7 @@ shuffled_matrix <- matrix(shuffled_values, nrow = nrow(count_matrix), ncol = nco cat("Generate outoput file\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = shuffled_matrix ), diff --git a/src/control_methods/positive/config.vsh.yaml b/src/control_methods/positive/config.vsh.yaml index b93ccc7c..5a8c3724 100644 --- a/src/control_methods/positive/config.vsh.yaml +++ b/src/control_methods/positive/config.vsh.yaml @@ -19,9 +19,6 @@ resources: engines: - type: docker image: openproblems/base_r:1 - setup: - - type: r - cran: SRTsim runners: - type: executable diff --git a/src/control_methods/positive/script.R b/src/control_methods/positive/script.R index b9854f75..d49cca3f 100644 --- a/src/control_methods/positive/script.R +++ b/src/control_methods/positive/script.R @@ -9,7 +9,7 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) input$uns$method_id <- meta$name diff --git a/src/methods/scdesign2/script.R b/src/methods/scdesign2/script.R index 59d103fc..aac648d2 100644 --- a/src/methods/scdesign2/script.R +++ b/src/methods/scdesign2/script.R @@ -11,7 +11,7 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) if (par$base != "domain") { stop("ONLY domain base") @@ -41,7 +41,7 @@ colnames(sim_out) <- input$obs_names cat("Generating output\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = Matrix::t(sim_out) ), diff --git a/src/methods/scdesign3_nb/script.R b/src/methods/scdesign3_nb/script.R index d1a3cc40..957ccb7d 100644 --- a/src/methods/scdesign3_nb/script.R +++ b/src/methods/scdesign3_nb/script.R @@ -15,7 +15,7 @@ meta <- list( ## VIASH END reticulate::py_config() cat("Read input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) sce <- SingleCellExperiment( list(counts = Matrix::t(input$layers[["counts"]])), @@ -56,7 +56,7 @@ sce_simu <- scdesign3( cat("Generating output file\n") new_obs <- sce_simu$new_covariate[c("row", "col")] -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = Matrix::t(sce_simu$new_count) ), diff --git a/src/methods/scdesign3_poisson/script.R b/src/methods/scdesign3_poisson/script.R index e42bbf9f..90d56973 100644 --- a/src/methods/scdesign3_poisson/script.R +++ b/src/methods/scdesign3_poisson/script.R @@ -15,7 +15,7 @@ meta <- list( ## VIASH END reticulate::py_config() cat("Read input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) sce <- SingleCellExperiment( list(counts = Matrix::t(input$layers[["counts"]])), @@ -56,7 +56,7 @@ sce_simu <- scdesign3( cat("Generating output file\n") new_obs <- sce_simu$new_covariate[c("row", "col")] -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = Matrix::t(sce_simu$new_count) ), diff --git a/src/methods/sparsim/script.R b/src/methods/sparsim/script.R index 4ca76a45..aed4c46f 100644 --- a/src/methods/sparsim/script.R +++ b/src/methods/sparsim/script.R @@ -23,7 +23,7 @@ find_cluster_indices <- function(cluster_column) { } cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) cat("SPARSim simulation start\n") @@ -68,7 +68,7 @@ simulated_result_ordered <- sim_result$count_matrix[ ] cat("Generating output\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list(counts = t(simulated_result_ordered)), obs = input_ordered$obs[c("row", "col")], var = input_ordered$var, diff --git a/src/methods/splatter/script.R b/src/methods/splatter/script.R index 001492f3..b63735f9 100644 --- a/src/methods/splatter/script.R +++ b/src/methods/splatter/script.R @@ -12,7 +12,7 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) cat("Splatter simulation start\n") @@ -51,7 +51,7 @@ simulated_result_ordered <- counts(simulated_result)[ ] cat("Generating output\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list(counts = t(simulated_result_ordered)), obs = input_ordered$obs[c("row", "col")], var = input_ordered$var, diff --git a/src/methods/srtsim/script.R b/src/methods/srtsim/script.R index 95768862..a679d5f7 100644 --- a/src/methods/srtsim/script.R +++ b/src/methods/srtsim/script.R @@ -12,12 +12,12 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) cat("SRTsim simulation start\n") -real_count <- Matrix::t(input$layers["counts"]) -real_loc <- data.frame(x = input$obs["row"], y = input$obs["col"], region = input$obs["spatial_cluster"]) +real_count <- Matrix::t(input$layers[["counts"]]) +real_loc <- data.frame(x = input$obs[["row"]], y = input$obs[["col"]], region = input$obs[["spatial_cluster"]]) rownames(real_loc) <- rownames(input$obs) simSRT <- createSRT(count_in = real_count, loc_in = real_loc) @@ -41,7 +41,7 @@ col_data <- data.frame( cat("Generating output\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = Matrix::t(counts_single) ), diff --git a/src/methods/symsim/script.R b/src/methods/symsim/script.R index 793e9be9..e921b5a2 100644 --- a/src/methods/symsim/script.R +++ b/src/methods/symsim/script.R @@ -12,7 +12,7 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) +input <- anndataR::read_h5ad(par$input) cat("SymSim simulation start\n") @@ -89,7 +89,7 @@ simulated_result_ordered <- counts(simulated_result)[ ] cat("Generating output\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = Matrix::t(simulated_result_ordered) ), diff --git a/src/methods/zinbwave/script.R b/src/methods/zinbwave/script.R index dbd439e8..759b2c24 100644 --- a/src/methods/zinbwave/script.R +++ b/src/methods/zinbwave/script.R @@ -13,15 +13,7 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input <- anndata::read_h5ad(par$input) - -# sce <- SingleCellExperiment( -# list(counts = Matrix::t(input$layers[["counts"]])), -# colData = input$obs -# ) - -# ordered_indices <- order(colData(sce)$spatial_cluster) -# sce_ordered <- sce[, ordered_indices] +input <- anndataR::read_h5ad(par$input) ordered_indices <- order(input$obs$spatial_cluster) input_ordered <- input[ordered_indices] @@ -36,30 +28,21 @@ cpus <- if (is.null(meta$cpus)) 2L else meta$cpus multicoreParam <- MulticoreParam(workers = cpus) -# X <- model.matrix(~spatial_cluster, data=colData(sce_ordered)) X <- model.matrix(~spatial_cluster, data = input_ordered$obs) -# params <- splatter::zinbEstimate(as.matrix(counts(sce_ordered)), design.samples = X, BPPARAM = multicoreParam) params <- splatter::zinbEstimate(as.matrix(t(input_ordered$layers[["counts"]])), design.samples = X, BPPARAM = multicoreParam) simulated_result <- splatter::zinbSimulate(params) colnames(simulated_result) <- rownames(input_ordered$obs) rownames(simulated_result) <- rownames(input_ordered$var) -# simulated_result_order <- sce_ordered -# counts(simulated_result_order) <- counts(simulated_result) - -# simulated_result_order <- simulated_result_order[,match(colnames(sce), colnames(simulated_result_order))] -# simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)),] -# new_obs <- as.data.frame(simulated_result_order@colData[c("row", "col")]) - simulated_result_ordered <- counts(simulated_result)[ match(rownames(counts(simulated_result)), rownames(input_ordered$var)), match(colnames(counts(simulated_result)), rownames(input_ordered$obs)) ] cat("Generating output\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( layers = list( counts = Matrix::t(simulated_result_ordered) ), diff --git a/src/metrics/downstream/script.R b/src/metrics/downstream/script.R index dacffe3b..d9d7adff 100644 --- a/src/metrics/downstream/script.R +++ b/src/metrics/downstream/script.R @@ -15,9 +15,9 @@ meta <- list( ## VIASH END source(paste0(meta[["resources_dir"]], "/utils.R")) -input_real_sp <- anndata::read_h5ad(par$input_spatial_dataset) -input_sc <- anndata::read_h5ad(par$input_singlecell_dataset) -input_simulated_sp <- anndata::read_h5ad(par$input_simulated_dataset) +input_real_sp <- anndataR::read_h5ad(par$input_spatial_dataset) +input_sc <- anndataR::read_h5ad(par$input_singlecell_dataset) +input_simulated_sp <- anndataR::read_h5ad(par$input_simulated_dataset) cat("spatial variable gene evaluation\n") real_svg <- generate_svg_sparkx(input_real_sp) @@ -78,7 +78,7 @@ uns_metric_values <- c( ) cat("Writing output AnnData to file\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( uns = list( dataset_id = input_simulated_sp$uns[["dataset_id"]], method_id = input_simulated_sp$uns[["method_id"]], diff --git a/src/metrics/ks_statistic_gene_cell/script.R b/src/metrics/ks_statistic_gene_cell/script.R index 08d14809..c680f09d 100644 --- a/src/metrics/ks_statistic_gene_cell/script.R +++ b/src/metrics/ks_statistic_gene_cell/script.R @@ -29,11 +29,11 @@ meta <- list( ## VIASH END cat("Reading input files\n") -input_spatial_dataset <- anndata::read_h5ad(par[["input_spatial_dataset"]]) -input_singlecell_dataset <- anndata::read_h5ad(par[[ +input_spatial_dataset <- anndataR::read_h5ad(par[["input_spatial_dataset"]]) +input_singlecell_dataset <- anndataR::read_h5ad(par[[ "input_singlecell_dataset" ]]) -input_simulated_dataset <- anndata::read_h5ad(par[["input_simulated_dataset"]]) +input_simulated_dataset <- anndataR::read_h5ad(par[["input_simulated_dataset"]]) real_counts <- input_spatial_dataset$layers[["counts"]] sim_counts <- input_simulated_dataset$layers[["counts"]] @@ -267,7 +267,7 @@ uns_metric_values <- c( ) cat("Writing output AnnData to file\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( uns = list( dataset_id = input_simulated_dataset$uns[["dataset_id"]], method_id = input_simulated_dataset$uns[["method_id"]], diff --git a/src/metrics/ks_statistic_sc_features/config.vsh.yaml b/src/metrics/ks_statistic_sc_features/config.vsh.yaml index c25e2354..30c6c138 100644 --- a/src/metrics/ks_statistic_sc_features/config.vsh.yaml +++ b/src/metrics/ks_statistic_sc_features/config.vsh.yaml @@ -19,20 +19,20 @@ info: references: doi: 10.1201/9780429485572 - name: ks_statistic_nn_correlation - label: Library size - summary: KS statistic of the library size. + label: Nearest-neighbour correlation + summary: KS statistic of the nearest-neighbour correlation. description: | - The Kolmogorov-Smirnov statistic comparing the nn correlation in the real datasets versus the nn correlation in the simmulated datasets. + The Kolmogorov-Smirnov statistic comparing the nn correlation in the real datasets versus the nn correlation in the simulated datasets. min: -Inf max: +Inf maximize: false references: doi: 10.1201/9780429485572 - name: ks_statistic_morans_I - label: Effective library size - summary: KS statistic of the effective library size. + label: Moran's I + summary: KS statistic of Moran's I. description: | - The Kolmogorov-Smirnov statistic comparing the morans I of the real datasets versus the morans I of the simulated datasets. + The Kolmogorov-Smirnov statistic comparing the Moran's I of the real datasets versus the Moran's I of the simulated datasets. min: -Inf max: +Inf maximize: false diff --git a/src/metrics/ks_statistic_sc_features/script.R b/src/metrics/ks_statistic_sc_features/script.R index d5d87f1d..5d6e9277 100644 --- a/src/metrics/ks_statistic_sc_features/script.R +++ b/src/metrics/ks_statistic_sc_features/script.R @@ -55,9 +55,9 @@ meta <- list( source(paste0(meta[["resources_dir"]], "/utils.R")) -input_real_sp <- anndata::read_h5ad(par$input_spatial_dataset) -input_sc <- anndata::read_h5ad(par$input_singlecell_dataset) -input_simulated_sp <- anndata::read_h5ad(par$input_simulated_dataset) +input_real_sp <- anndataR::read_h5ad(par$input_spatial_dataset) +input_sc <- anndataR::read_h5ad(par$input_singlecell_dataset) +input_simulated_sp <- anndataR::read_h5ad(par$input_simulated_dataset) real_log_count <- t(input_real_sp$layers[["logcounts"]]) real_prob_matrix <- input_real_sp$obsm[["celltype_proportions"]] @@ -157,7 +157,7 @@ uns_metric_values <- c( ) cat("Writing output AnnData to file\n") -output <- anndata::AnnData( +output <- anndataR::AnnData( uns = list( dataset_id = input_simulated_sp$uns[["dataset_id"]], method_id = input_simulated_sp$uns[["method_id"]], diff --git a/src/process_datasets/convert/script.R b/src/process_datasets/convert/script.R index 947579ba..b03d5349 100644 --- a/src/process_datasets/convert/script.R +++ b/src/process_datasets/convert/script.R @@ -55,7 +55,7 @@ uns <- list( ) cat("Transforming single cell into AnnData\n") -output_sc <- anndata::AnnData( +output_sc <- anndataR::AnnData( layers = list( counts = process_matrix(input_sc, "counts") ), @@ -84,7 +84,7 @@ output_sc <- anndata::AnnData( cat("Transforming spatial into AnnData\n") celltype_proportions <- as.data.frame(metadata(input_sp)[["celltype_prop"]]) -output_sp <- anndata::AnnData( +output_sp <- anndataR::AnnData( layers = list( counts = process_matrix(input_sp, "counts"), logcounts = process_matrix(input_sp, "logcounts") diff --git a/src/process_datasets/generate_sim_spatialcluster/script.R b/src/process_datasets/generate_sim_spatialcluster/script.R index 6632ed71..f3f8e3a8 100644 --- a/src/process_datasets/generate_sim_spatialcluster/script.R +++ b/src/process_datasets/generate_sim_spatialcluster/script.R @@ -16,8 +16,8 @@ meta <- list( source(file.path(meta$resources_dir, "utils.R")) cat("Read input files\n") -input_real_sp <- anndata::read_h5ad(par$input_sp) -input_simulated_sp <- anndata::read_h5ad(par$input_sp_sim) +input_real_sp <- anndataR::read_h5ad(par$input_sp) +input_simulated_sp <- anndataR::read_h5ad(par$input_sp_sim) cat("add spatial cluster in simulated dataset:\n") sim_cluster <- generate_sim_spatialCluster(input_real_sp, input_simulated_sp) diff --git a/src/process_datasets/precompute_downstream/script.R b/src/process_datasets/precompute_downstream/script.R index e698be13..2f707e57 100644 --- a/src/process_datasets/precompute_downstream/script.R +++ b/src/process_datasets/precompute_downstream/script.R @@ -18,7 +18,7 @@ meta <- list( source(file.path(meta$resources_dir, "utils.R")) cat("Read input files\n") -input_sp <- anndata::read_h5ad(par$input_sp) +input_sp <- anndataR::read_h5ad(par$input_sp) cat("Spatial dataset:\n") print(input_sp) diff --git a/src/process_datasets/sc_features/script.R b/src/process_datasets/sc_features/script.R index 5541ab35..888e2b4f 100644 --- a/src/process_datasets/sc_features/script.R +++ b/src/process_datasets/sc_features/script.R @@ -1,4 +1,4 @@ -requireNamespace("anndata", quietly = TRUE) +requireNamespace("anndataR", quietly = TRUE) requireNamespace("scFeatures", quietly = TRUE) ## VIASH START @@ -11,7 +11,7 @@ par <- list( ## VIASH END cat("Read input files\n") -input_sp <- anndata::read_h5ad(par$input_sp) +input_sp <- anndataR::read_h5ad(par$input_sp) cat("Spatial dataset:\n") print(input_sp) @@ -27,7 +27,7 @@ species <- organism_mapping[[input_sp$uns[["dataset_organism"]]]] cat("Run scFeatures\n") scfeatures_result <- scFeatures::scFeatures( data = Matrix::t(input_sp$layers[["logcounts"]]), - sample = rep("sample1", input_sp$n_obs), + sample = rep("sample1", input_sp$n_obs()), spatialCoords = input_sp$obs[, c("row", "col")], feature_types = c("L_stats", "celltype_interaction", "nn_correlation", "morans_I"), type = "spatial_t",