Merge pull request nf-core#11 from nf-core/linting_and_testing

Merge branch 'dev' of github.com:nf-core/drugresponseeval into dev
picciama · Nov 25, 2024 · 3d00d07 · 3d00d07
2 parents d3a8864 + 004555c
commit 3d00d07
Show file tree

Hide file tree

Showing 57 changed files with 584 additions and 1,273 deletions.
diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml
@@ -2,17 +2,20 @@ name: nf-core template version comment
 # This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version.
 # It posts a comment to the PR, even if it comes from a fork.
 
-on: pull_request_target
+on:
+  pull_request:
 
 jobs:
   template_version:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
 
       - name: Read template version from .nf-core.yml
-        uses: pietrobolcato/action-read-yaml@1.0.0
+        uses: nichmor/minimal-read-yaml@v0.0.2
         id: read_yml
         with:
           config: ${{ github.workspace }}/.nf-core.yml
@@ -24,20 +27,21 @@ jobs:
 
       - name: Check nf-core outdated
         id: nf_core_outdated
-        run: pip list --outdated | grep nf-core
+        run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV}
 
       - name: Post nf-core template version comment
         uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2
         if: |
-          ${{ steps.nf_core_outdated.outputs.stdout }} =~ 'nf-core'
+          contains(env.OUTPUT, 'nf-core')
         with:
           repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }}
           allow-repeats: false
           message: |
-            ## :warning: Newer version of the nf-core template is available.
-
-            Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}.
-            Please update your pipeline to the latest version.
-
-            For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync).
+            > [!WARNING]
+            > Newer version of the nf-core template is available.
+            >
+            > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}.
+            > Please update your pipeline to the latest version.
+            >
+            > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync).
           #
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ results/
 testing/
 testing*
 *.pyc
+null/
+.idea/
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -1,20 +1,29 @@
 bump_version: null
-lint: null
-nf_core_version: 3.0.1
+lint:
+  files_exist:
+    - conf/igenomes.config
+    - conf/igenomes_ignored.config
+    - assets/multiqc_config.yml
+  files_unchanged:
+    - .github/CONTRIBUTING.md
+    - assets/sendmail_template.txt
+  multiqc_config: false
+  nextflow_config:
+    - params.input
+nf_core_version: 3.0.2
 org_path: null
 repository_type: pipeline
 template:
   author: Judith Bernett
-  description: "This pipeline takes a drug response prediction model architecture
-    as input and evaluates its performance by using custom training, validation, and
-    test sets. It compares it against existing deep learning models and baseline models.
-    Additionally, it tests the model for overfitting, consistency, robustness, and
-    generalization. "
+  description: This pipeline evaluates drug response models in various settings on a variety of datasets.
   force: false
   is_nfcore: true
   name: drugresponseeval
   org: nf-core
   outdir: .
-  skip_features: null
+  skip_features:
+    - igenomes
+    - multiqc
+    - fastqc
   version: 1.0dev
 update: null
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,8 +9,16 @@ Initial release of nf-core/drugresponseeval, created with the [nf-core](https://
 
 ### `Added`
 
+- Updated to the new template
+- Added tests that run with docker, singularity, apptainer, and conda
+- Added the docker container and the conda env.yml in the nextflow.config. We just need one container for all
+  processes as this pipeline automates the PyPI package drevalpy.
+- Added usage and output documentation.
+
 ### `Fixed`
 
+- Fixed linting issues
+
 ### `Dependencies`
 
 ### `Deprecated`
diff --git a/CITATIONS.md b/CITATIONS.md
@@ -1,5 +1,9 @@
 # nf-core/drugresponseeval: Citations
 
+## [DrugResponseEval](https://github.com/nf-core/drugresponseeval/)
+
+> Bernett, J, Iversen, P, Picciani, M, Wilhelm, M, Baum, K, List, M. Will be published soon.
+
 ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/)
 
 > Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031.
@@ -10,13 +14,25 @@
 
 ## Pipeline tools
 
-- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
+- [DrEvalPy](https://github.com/daisybio/drevalpy): The pipeline mostly automates the individual steps of the DrEvalPy PyPI package.
+
+  > Bernett, J, Iversen, P, Picciani, M, Wilhelm, M, Baum, K, List, M. Will be published soon.
+
+- [DIPK](https://doi.org/10.1093/bib/bbae153): Implemented model in the pipeline.
+
+  > Li P, Jiang Z, Liu T, Liu X, Qiao H, Yao X. Improving drug response prediction via integrating gene relationships with deep learning. Briefings in Bioinformatics. 2024 May;25(3):bbae153.
+
+- [MOLI](https://doi.org/10.1093/bioinformatics/btz318): Implemented model in the pipeline.
+
+  > Sharifi-Noghabi H, Zolotareva O, Collins CC, Ester M. MOLI: multi-omics late integration with deep neural networks for drug response prediction. Bioinformatics. 2019 Jul;35(14):i501-9.
+
+- [SRMF](https://doi.org/10.1186/s12885-017-3500-5): Implemented model in the pipeline.
 
-> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
+  > Wang L, Li X, Zhang L, Gao Q. Improved anticancer drug response prediction in cell lines using matrix factorization with similarity regularization. BMC cancer. 2017 Dec;17:1-2.
 
-- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
+- [SuperFELT](https://doi.org/10.1186/s12859-021-04146-z): Implemented model in the pipeline.
 
-> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
+  > Park S, Soh J, Lee H. Super. FELT: supervised feature extraction learning using triplet loss for drug response prediction with multi-omics data. BMC bioinformatics. 2021 May 25;22(1):269.
 
 ## Software packaging/containerisation tools
 

diff --git a/README.md b/README.md
@@ -34,17 +34,7 @@ tuning is fair and consistent. With its flexible model interface, DrEval support
 ranging from statistical models to complex neural networks. By contributing your model to the
 DrEval catalog, you can increase your work's exposure, reusability, and transferability.
 
-# ![DrEval_pipeline](assets/DrEval_pipeline_simplified.png)
-
-<!-- TODO nf-core:
-   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the
-   major pipeline sections and the types of output it produces. You're giving an overview to someone new
-   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction
--->
-
-<!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core
-     workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   -->
-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
+# ![Pipeline diagram showing the major steps of nf-core/drugresponseeval](assets/drugresponseeval_pipeline_simplified.png)
 
 1. The response data is loaded
 2. All models are trained and evaluated in a cross-validation setting
@@ -66,8 +56,6 @@ For baseline models, no randomization or robustness tests are performed.
 
 Now, you can run the pipeline using:
 
-<!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
-
 ```bash
 nextflow run nf-core/drugresponseeval \
    -profile <docker/singularity/.../institute> \
@@ -95,10 +83,14 @@ Berlin).
 
 We thank the following people for their extensive assistance in the development of this pipeline:
 
-<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
-
 ## Contributions and Support
 
+Contributors to nf-core/drugresponseeval and the drevalpy PyPI package:
+
+- [Judith Bernett](https://github.com/JudithBernett) (TUM)
+- [Pascal Iversen](https://github.com/PascalIversen) (FU Berlin)
+- [Mario Picciani](https://github.com/picciama) (TUM)
+
 If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).
 
 For further information or help, don't hesitate to get in touch on the [Slack `#drugresponseeval` channel](https://nfcore.slack.com/channels/drugresponseeval) (you can join with [this invite](https://nf-co.re/join/slack)).

diff --git a/assets/DrEval_pipeline_simplified.png → .../drugresponseeval_pipeline_simplified.png b/assets/DrEval_pipeline_simplified.png → .../drugresponseeval_pipeline_simplified.png
diff --git a/conf/base.config b/conf/base.config
@@ -10,7 +10,6 @@
 
 process {
 
-    // TODO nf-core: Check the defaults for all processes
     cpus   = { 1      * task.attempt }
     memory = { 6.GB   * task.attempt }
     time   = { 4.h    * task.attempt }
@@ -24,7 +23,6 @@ process {
     //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
     //        If possible, it would be nice to keep the same label naming convention when
     //        adding in your local modules too.
-    // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
     withLabel:process_single {
         cpus   = { 1                   }
@@ -42,9 +40,9 @@ process {
         time   = { 8.h   * task.attempt }
     }
     withLabel:process_high {
-        cpus   = { check_max( 18    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus   = { 12    * task.attempt }
+        memory = { 72.GB * task.attempt }
+        time   = { 16.h  * task.attempt }
     }
     withLabel:process_long {
         time   = { 20.h  * task.attempt }

diff --git a/conf/modules.config b/conf/modules.config
@@ -20,15 +20,15 @@ process {
 
     withName: 'LOAD_RESPONSE' {
         publishDir = [
-            path: { params.save_datasets ? "${params.outdir}/${params.run_id}/datasets" : params.outdir },
+            path: { params.save_datasets ? "${params.path_data}/${params.run_id}/datasets" : params.path_data },
             mode: params.publish_dir_mode,
             saveAs: { filename -> (filename != 'versions.yml' && params.save_datasets) ? filename : null }
         ]
     }
 
     withName: 'CV_SPLIT' {
         publishDir = [
-            path: { params.save_datasets ? "${params.outdir}/${params.run_id}/datasets" : params.outdir },
+            path: { params.save_datasets ? "${params.path_data}/${params.run_id}/datasets" : params.path_data },
             mode: params.publish_dir_mode,
             saveAs: { filename -> (filename != 'versions.yml' && params.save_datasets) ? filename : null }
         ]
@@ -52,29 +52,38 @@ process {
 
     withName: 'HPAM_SPLIT' {
         publishDir = [
-            path: { params.save_datasets ? "${params.outdir}/${params.run_id}/hpams" : params.outdir },
+            path: params.outdir,
             mode: params.publish_dir_mode,
-            saveAs: { filename -> (filename != 'versions.yml' && params.save_datasets) ? filename : null }
+            saveAs: { filename -> null }
         ]
     }
 
 
     withName: 'TRAIN_AND_PREDICT_CV' {
         publishDir = [
-            path: { params.save_datasets ? "${params.outdir}/${params.run_id}/datasets" : params.outdir },
+            path: params.outdir,
             mode: params.publish_dir_mode,
-            saveAs: { filename -> (filename != 'versions.yml' && params.save_datasets) ? filename : null }
+            saveAs: { filename -> null }
         ]
 
         ext.use_gpu = { [
             'SimpleNeuralNetwork',
             'MultiOmicsNeuralNetwork',
             'MOLIR',
-            'SuperFELTR'
+            'SuperFELTR',
+            'DIPK'
             ].contains( model_name.split( '\\.' )[0] )
         }
     }
 
+    withName: 'EVALUATE_FIND_MAX' {
+        publishDir = [
+            path: params.outdir,
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> null }
+        ]
+    }
+
     withName: 'PREDICT_FULL' {
         ext.use_gpu = { [
             'SimpleNeuralNetwork',
@@ -85,12 +94,21 @@ process {
         }
     }
 
+    withName: 'RANDOMIZATION_SPLIT' {
+        publishDir = [
+            path: params.outdir,
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> null }
+        ]
+    }
+
     withName: 'RANDOMIZATION_TEST' {
         ext.use_gpu = { [
             'SimpleNeuralNetwork',
             'MultiOmicsNeuralNetwork',
             'MOLIR',
-            'SuperFELTR'
+            'SuperFELTR',
+            'DIPK'
             ].contains( model_name.split( '\\.' )[0] )
         }
     }
@@ -100,27 +118,12 @@ process {
             'SimpleNeuralNetwork',
             'MultiOmicsNeuralNetwork',
             'MOLIR',
-            'SuperFELTR'
+            'SuperFELTR',
+            'DIPK'
             ].contains( model_name.split( '\\.' )[0] )
         }
     }
 
-    withName: 'EVALUATE_FIND_MAX' {
-        publishDir = [
-            path: { params.save_datasets ? "${params.outdir}/${params.run_id}/datasets" : params.outdir },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> (filename != 'versions.yml' && params.save_datasets) ? filename : null }
-        ]
-    }
-
-    withName: 'RANDOMIZATION_SPLIT' {
-        publishDir = [
-            path: { params.save_datasets ? "${params.outdir}/${params.run_id}/datasets" : params.outdir },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> (filename != 'versions.yml' && params.save_datasets) ? filename : null }
-        ]
-    }
-
     withName: 'EVALUATE_FINAL' {
         publishDir = [
             path: params.outdir,

diff --git a/conf/test.config b/conf/test.config
@@ -13,7 +13,7 @@
 process {
     resourceLimits = [
         cpus: 4,
-        memory: '15.GB',
+        memory: '3.GB',
         time: '1.h'
     ]
 }
@@ -22,12 +22,13 @@ params {
     config_profile_name        = 'Test profile'
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
-    //TODO do this for the proper pipeline
     // Input data
     // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
     //input  = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
 
-    // Genome references
-    //genome = 'R64-1-1'
+    run_id = 'test_run'
+    dataset_name = 'Toy_Data'
+    n_cv_splits = 2
+
 }
diff --git a/conf/test_full.config b/conf/test_full.config
@@ -22,4 +22,7 @@ params {
 
     // Genome references
     //genome = 'R64-1-1'
+    run_id = 'test_run'
+    dataset_name = 'Toy_Data'
+    n_cv_splits = 5
 }
diff --git a/docs/images/nf-core-drugresponseeval_logo_light.png b/docs/images/nf-core-drugresponseeval_logo_light.png
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,5 @@ results/ @@
     testing/
     testing*
     *.pyc
+    null/
+    .idea/