Skip to content

Commit

Permalink
Fix dataset info components (#316)
Browse files Browse the repository at this point in the history
* simplify get_dataset_info component

* fix script

* change default into example

* fix script

* fix script
  • Loading branch information
rcannood authored Dec 19, 2023
1 parent 17cc7cf commit 4b7c085
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ platforms:
image: ghcr.io/openproblems-bio/base_r:1.0.2
setup:
- type: r
cran: [ purrr, dplyr, yaml, rlang, processx ]
cran: [ yaml, jsonlite ]
- type: nextflow
directives:
label: [lowmem, lowtime, lowcpu]
23 changes: 10 additions & 13 deletions src/common/process_task_results/get_dataset_info/script.R
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
library(purrr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(rlang, warn.conflicts = FALSE)
requireNamespace("jsonlite", quietly = TRUE)
requireNamespace("yaml", quietly = TRUE)

## VIASH START
par <- list(
input = "resources_test/common/task_metadata/dataset_info.yaml",
output = "output/metric_info.json"
output = "output/dataset_info.json"
)
## VIASH END

datasets <- yaml::yaml.load_file(par$input)

df <- map_df(datasets, function(dataset) {
info <- as_tibble(map(dataset, as.data.frame))
}) %>%
rename(
data_url = dataset_url,
data_reference = dataset_reference
)

# transform into format expected by website
datasets_formatted <- lapply(datasets, function(dataset) {
dataset$data_url <- dataset$dataset_url
dataset$data_reference <- dataset$dataset_reference
dataset
})

jsonlite::write_json(
purrr::transpose(df),
datasets_formatted,
par$output,
auto_unbox = TRUE,
pretty = TRUE
Expand Down
16 changes: 15 additions & 1 deletion src/datasets/resource_scripts/dataset_info.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

DATASETS_DIR="s3://openproblems-data/resources/datasets"

cat > "/tmp/params.yaml" << HERE
param_list:
- id: openproblems_v1
Expand Down Expand Up @@ -37,4 +39,16 @@ tw launch https://github.com/openproblems-bio/openproblems-v2.git \
--workspace 53907369739130 \
--compute-env 1pK56PjjzeraOOC2LDZvN2 \
--params-file "/tmp/params.yaml" \
--config /tmp/nextflow.config
--config /tmp/nextflow.config


# # run locally after the above has finished
# nextflow run . \
# -main-script target/nextflow/common/process_task_results/get_dataset_info/main.nf \
# -profile docker \
# -resume \
# --input "$DATASETS_DIR/dataset_info.yaml" \
# --task_id "common" \
# --output "dataset_info.json" \
# --output_state state.yaml \
# --publish_dir "../website/documentation/reference/datasets/data/"
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ functionality:
direction: input
description: If defined, only the normalization with this ID will be included in the output.
multiple: true
default: [ log_cp10k ]
example: [ log_cp10k ]
- name: Outputs
arguments:
- name: "--output"
Expand Down
1 change: 1 addition & 0 deletions src/datasets/workflows/extract_dataset_info/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ workflow run_wf {

| joinStates { ids, states ->
// remove normalization id
// TODO: make this optional through a parameter?
def dataset_uns = states.collect{state ->
def uns = state.dataset_uns.clone()
uns.remove("normalization_id")
Expand Down

0 comments on commit 4b7c085

Please sign in to comment.