ZandraFagernas · Mar 17, 2022
diff --git a/‎README.md
+18-2 b/‎README.md
+18-2
diff --git a/‎bin/check_samplesheet.py
+207-229 b/‎bin/check_samplesheet.py
+207-229
diff --git a/‎conf/modules.config
+59 b/‎conf/modules.config
+59
diff --git a/‎conf/test.config
+1-3 b/‎conf/test.config
+1-3
diff --git a/‎lib/WorkflowTaxprofiler.groovy
+5-4 b/‎lib/WorkflowTaxprofiler.groovy
+5-4
diff --git a/‎modules.json
+12 b/‎modules.json
+12
diff --git a/‎modules/local/database_check.nf
+25 b/‎modules/local/database_check.nf
+25
diff --git a/‎modules/nf-core/modules/cat/fastq/main.nf
+51 b/‎modules/nf-core/modules/cat/fastq/main.nf
+51
diff --git a/‎modules/nf-core/modules/cat/fastq/meta.yml
+39 b/‎modules/nf-core/modules/cat/fastq/meta.yml
+39
diff --git a/‎modules/nf-core/modules/fastp/main.nf
+75 b/‎modules/nf-core/modules/fastp/main.nf
+75
diff --git a/‎modules/nf-core/modules/fastp/meta.yml
+68 b/‎modules/nf-core/modules/fastp/meta.yml
+68
diff --git a/‎modules/nf-core/modules/kraken2/kraken2/main.nf
+49 b/‎modules/nf-core/modules/kraken2/kraken2/main.nf
+49
diff --git a/‎modules/nf-core/modules/kraken2/kraken2/meta.yml
+60 b/‎modules/nf-core/modules/kraken2/kraken2/meta.yml
+60
diff --git a/‎modules/nf-core/modules/malt/run/main.nf
+50 b/‎modules/nf-core/modules/malt/run/main.nf
+50
diff --git a/‎modules/nf-core/modules/malt/run/meta.yml
+58 b/‎modules/nf-core/modules/malt/run/meta.yml
+58
diff --git a/‎nextflow.config
+14-1 b/‎nextflow.config
+14-1
diff --git a/‎nextflow_schema.json
-9 b/‎nextflow_schema.json
-9
diff --git a/‎subworkflows/local/db_check.nf
+40 b/‎subworkflows/local/db_check.nf
+40
diff --git a/‎subworkflows/local/input_check.nf
+39-6 b/‎subworkflows/local/input_check.nf
+39-6
diff --git a/‎subworkflows/local/preprocessing.nf
+73 b/‎subworkflows/local/preprocessing.nf
+73
diff --git a/‎workflows/taxprofiler.nf
+113-4 b/‎workflows/taxprofiler.nf
+113-4
@@ -17,7 +17,7 @@
 ## Introduction
 
 <!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->
-**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for Taxonomic profiling of shotgun metagenomic data.
+**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic profiling of shotgun metagenomic data. It allows for in-parallel profiling against multiple profiling tools and databases and produces standardised output tables.
 
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
 
@@ -29,7 +29,23 @@ On release, automated continuous integration tests run the pipeline on a full-si
 <!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
 
 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
-2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+2. Performs optional read pre-processing
+   - Adapter clipping and merging
+   - Low complexity filtering
+   - Host read removal
+   - Run merging
+3. Performs taxonomic profiling a choice of:
+   - Kraken2
+   - MetaPhlAn3
+   - MALT
+   - DIAMOND
+   - Centrifuge
+   - Kaiju
+   - mOTUs
+4. Perform optional post-processing with:
+    - bracken
+5. Standardises output tables
+6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
 
 ## Quick Start
 
 
@@ -28,6 +28,65 @@ process {
 
     withName: FASTQC {
         ext.args = '--quiet'
+        ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
+        publishDir = [
+            path: { "${params.outdir}/fastqc/raw" },
+            mode: 'copy',
+            pattern: '*.html'
+        ]
+    }
+
+    withName: FASTP {
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
+        // TODO also include option to NOT merge
+        ext.args   = [
+            { ${meta.single_end} } == 0 ? "-m" : '',
+            params.fastp_exclude_unmerged ? '' : "--include_unmerged"
+        ].join(' ').trim()
+        publishDir = [
+            path: { "${params.outdir}/fastp" },
+            mode: 'copy',
+            pattern: '*.fastq.gz'
+        ]
+    }
+
+    withName: FASTQC_POST {
+        ext.args = '--quiet'
+        ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
+        publishDir = [
+            path: { "${params.outdir}/fastqc/processed" },
+            mode: 'copy',
+            pattern: '*.html'
+        ]
+    }
+
+    withName: CAT_FASTQ {
+        publishDir = [
+            path: { "${params.outdir}/prepared_sequences" },
+            mode: 'copy',
+            pattern: '*.fastq.gz'
+        ]
+    }
+
+    withName: MALT_RUN {
+        publishDir = [
+            path: { "${params.outdir}/malt/${meta.db_name}" },
+            mode: 'copy',
+            pattern: '*.{rma6,tab,text,sam,log}'
+        ]
+        ext.args = { "${meta.db_params}" }
+        ext.when = params.run_malt
+    }
+
+    withName: KRAKEN2_KRAKEN2 {
+        publishDir = [
+            path: { "${params.outdir}/kraken2/${meta.db_name}" },
+            mode: 'copy',
+            pattern: '.{fastq.gz,txt}'
+        ]
+        ext.args = { "${meta.db_params}" }
+        ext.when = params.run_kraken2
+        ext.prefix = { "${meta.id}-${meta.db_name}" }
     }
 
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {
 
@@ -22,8 +22,6 @@ params {
     // Input data
     // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
 
-    // Genome references
-    genome = 'R64-1-1'
 }
@@ -10,10 +10,11 @@ class WorkflowTaxprofiler {
     public static void initialise(params, log) {
         genomeExistsError(params, log)
 
-        if (!params.fasta) {
-            log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
-            System.exit(1)
-        }
+        // TODO update as necessary
+        //if (!params.fasta) {
+        //    log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
+        //    System.exit(1)
+        //}
     }
 
     //
 
@@ -3,12 +3,24 @@
     "homePage": "https://github.com/nf-core/taxprofiler",
     "repos": {
         "nf-core/modules": {
+            "cat/fastq": {
+                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+            },
             "custom/dumpsoftwareversions": {
                 "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
             },
+            "fastp": {
+                "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789"
+            },
             "fastqc": {
                 "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961"
             },
+            "kraken2/kraken2": {
+                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+            },
+            "malt/run": {
+                "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
+            },
             "multiqc": {
                 "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41"
             }
 
@@ -0,0 +1,25 @@
+process DATABASE_CHECK {
+    tag "$databasesheet"
+
+    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
+        'quay.io/biocontainers/python:3.8.3' }"
+
+    input:
+    path databasesheet
+
+    output:
+    path '*.csv'       , emit: csv
+    path "versions.yml", emit: versions
+
+    script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
+    """
+    cat $databasesheet >> database_sheet.valid.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+}
@@ -34,7 +34,7 @@ params {
     help                       = false
     validate_params            = true
     show_hidden_params         = false
-    schema_ignore_params       = 'genomes'
+    schema_ignore_params       = 'genomes,fasta'
     enable_conda               = false
 
     // Config options
@@ -51,6 +51,19 @@ params {
     max_cpus                   = 16
     max_time                   = '240.h'
 
+    // Databaess
+    databases = null
+
+    // FASTQ preprocessing
+    fastp_clip_merge           = false
+    fastp_exclude_unmerged     = true
+
+    // MALT
+    run_malt                   = false
+    malt_mode                  = 'BlastN'
+
+    // kraken2
+    run_kraken2                = false
 }
 
 // Load base.config by default for all pipelines
 
@@ -57,15 +57,6 @@
                     "fa_icon": "fas fa-book",
                     "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
                 },
-                "fasta": {
-                    "type": "string",
-                    "format": "file-path",
-                    "mimetype": "text/plain",
-                    "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
-                    "description": "Path to FASTA genome file.",
-                    "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
-                    "fa_icon": "far fa-file-code"
-                },
                 "igenomes_base": {
                     "type": "string",
                     "format": "directory-path",
 
@@ -0,0 +1,40 @@
+//
+// Check input samplesheet and get read channels
+//
+
+include { DATABASE_CHECK } from '../../modules/local/database_check'
+
+workflow DB_CHECK {
+    take:
+    dbsheet // file: /path/to/dbsheet.csv
+
+    main:
+
+    // TODO: make database sheet check
+    parsed_samplesheet = DATABASE_CHECK ( dbsheet )
+        .csv
+        .splitCsv ( header:true, sep:',' )
+        .dump(tag: "db_split_csv_out")
+        .map { create_db_channels(it) }
+        .dump(tag: "db_channel_prepped")
+        .set{ dbs }
+
+    emit:
+    dbs                                       // channel: [ val(meta), [ db ] ]
+    versions = DATABASE_CHECK.out.versions // channel: [ versions.yml ]
+}
+
+def create_db_channels(LinkedHashMap row) {
+    def meta = [:]
+    meta.tool             = row.tool
+    meta.db_name          = row.db_name
+    meta.db_params        = row.db_params
+
+    def array = []
+    if (!file(row.db_path, type: 'dir').exists()) {
+        exit 1, "ERROR: Please check input samplesheet -> database could not be found!\n${row.db_path}"
+    }
+    array = [ meta, file(row.db_path) ]
+
+    return array
+}
@@ -9,23 +9,39 @@ workflow INPUT_CHECK {
     samplesheet // file: /path/to/samplesheet.csv
 
     main:
-    SAMPLESHEET_CHECK ( samplesheet )
+    parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
         .csv
         .splitCsv ( header:true, sep:',' )
-        .map { create_fastq_channel(it) }
-        .set { reads }
+        .dump(tag: "input_split_csv_out")
+        .branch {
+            fasta: it['fasta'] != ''
+            fastq: true
+        }
+
+    parsed_samplesheet.fastq
+        .map { create_fastq_channels(it) }
+        .dump(tag: "fastq_channel_init")
+        .set { fastq }
+
+    parsed_samplesheet.fasta
+        .map { create_fasta_channels(it) }
+        .dump(tag: "fasta_channel_init")
+        .set { fasta }
 
     emit:
-    reads                                     // channel: [ val(meta), [ reads ] ]
+    fastq                                     // channel: [ val(meta), [ reads ] ]
+    fasta                                     // channel: [ val(meta), fasta ]
     versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
 }
 
 // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
 def create_fastq_channel(LinkedHashMap row) {
     // create meta map
     def meta = [:]
-    meta.id         = row.sample
-    meta.single_end = row.single_end.toBoolean()
+    meta.id                     = row.sample
+    meta.run_accession          = row.run_accession
+    meta.instrument_platform    = row.instrument_platform
+    meta.single_end             = row.single_end.toBoolean()
 
     // add path(s) of the fastq file(s) to the meta map
     def fastq_meta = []
@@ -42,3 +58,20 @@ def create_fastq_channel(LinkedHashMap row) {
     }
     return fastq_meta
 }
+
+// Function to get list of [ meta, fasta ]
+def create_fasta_channels(LinkedHashMap row) {
+    def meta = [:]
+    meta.id                     = row.sample
+    meta.run_accession          = row.run_accession
+    meta.instrument_platform    = row.instrument_platform
+    meta.single_end             = true
+
+    def array = []
+    if (!file(row.fasta).exists()) {
+        exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}"
+    }
+    array = [ meta, [ file(row.fasta) ] ]
+
+    return array
+}
@@ -0,0 +1,73 @@
+//
+// Check input samplesheet and get read channels
+//
+
+
+include { FASTP as FASTP_SINGLE       } from '../../modules/nf-core/modules/fastp/main'
+include { FASTP as FASTP_PAIRED       } from '../../modules/nf-core/modules/fastp/main'
+include { FASTQC as FASTQC_POST       } from '../../modules/nf-core/modules/fastqc/main'
+
+workflow FASTQ_PREPROCESSING {
+    take:
+    reads // file: /path/to/samplesheet.csv
+
+    main:
+    ch_versions = Channel.empty()
+    ch_multiqc_files      = Channel.empty()
+
+    //
+    // STEP: Read clipping and merging
+    //
+    // TODO give option to clip only and retain pairs
+    // TODO give option to retain singletons (probably fastp option likely)
+    // TODO move to subworkflow
+
+
+    if ( params.fastp_clip_merge ) {
+
+        ch_input_for_fastp = reads
+                                .dump(tag: "pre-fastp_branch")
+                                .branch{
+                                    single: it[0]['single_end'] == true
+                                    paired: it[0]['single_end'] == false
+                                }
+
+        ch_input_for_fastp.single.dump(tag: "input_fastp_single")
+        ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
+
+        FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
+        FASTP_PAIRED ( ch_input_for_fastp.paired, false, true )
+
+        ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged
+                                    .mix( FASTP_SINGLE.out.reads )
+                                    .map {
+                                        meta, reads ->
+                                        def meta_new = meta.clone()
+                                        meta_new['single_end'] = 1
+                                        [ meta_new, reads ]
+                                    }
+
+        FASTQC_POST ( ch_fastp_reads_prepped )
+
+        ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
+        ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
+
+        ch_processed_reads = ch_fastp_reads_prepped
+
+        ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} )
+        ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
+        ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
+
+        ch_multiqc_files.dump(tag: "preprocessing_mqc_final")
+
+    } else {
+        ch_processed_reads = reads
+    }
+
+
+    emit:
+    reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
+    versions = ch_versions          // channel: [ versions.yml ]
+    mqc      = ch_multiqc_files
+}
+
@@ -11,11 +11,12 @@ WorkflowTaxprofiler.initialise(params, log)
 
 // TODO nf-core: Add all file path parameters for the pipeline to the list below
 // Check input path parameters to see if they exist
-def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ]
+def checkPathParamList = [ params.input, params.databases, params.multiqc_config ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Check mandatory parameters
-if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
+if (params.input    ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
+if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -35,7 +36,11 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { INPUT_CHECK } from '../subworkflows/local/input_check'
+include { INPUT_CHECK         } from '../subworkflows/local/input_check'
+
+include { DB_CHECK            } from '../subworkflows/local/db_check'
+include { FASTQ_PREPROCESSING } from '../subworkflows/local/preprocessing'
+
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -50,6 +55,11 @@ include { FASTQC                      } from '../modules/nf-core/modules/fastqc/
 include { MULTIQC                     } from '../modules/nf-core/modules/multiqc/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
 
+include { CAT_FASTQ                   } from '../modules/nf-core/modules/cat/fastq/main'
+include { MALT_RUN                    } from '../modules/nf-core/modules/malt/run/main'
+include { KRAKEN2_KRAKEN2             } from '../modules/nf-core/modules/kraken2/kraken2/main'
+
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -71,18 +81,104 @@ workflow TAXPROFILER {
     )
     ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
 
+    DB_CHECK (
+        ch_databases
+    )
+
     //
     // MODULE: Run FastQC
     //
     FASTQC (
-        INPUT_CHECK.out.reads
+        INPUT_CHECK.out.fastq
     )
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
     )
 
+    //
+    // PERFORM PREPROCESSING
+    //
+    if ( params.fastp_clip_merge ) {
+        FASTQ_PREPROCESSING ( INPUT_CHECK.out.fastq )
+    }
+
+    //
+    // PERFORM RUN MERGING
+    //
+    ch_processed_for_combine = FASTQ_PREPROCESSING.out.reads
+        .dump(tag: "prep_for_combine_grouping")
+        .map {
+            meta, reads ->
+            def meta_new = meta.clone()
+            meta_new['run_accession'] = 'combined'
+            [ meta_new, reads ]
+        }
+        .groupTuple ( by: 0 )
+        .branch{
+            combine: it[1].size() >= 2
+            skip: it[1].size() < 2
+        }
+
+    CAT_FASTQ ( ch_processed_for_combine.combine )
+
+    ch_reads_for_profiling = ch_processed_for_combine.skip
+                                .dump(tag: "skip_combine")
+                                .mix( CAT_FASTQ.out.reads )
+                                .dump(tag: "files_for_profiling")
+
+    //
+    // COMBINE READS WITH POSSIBLE DATABASES
+    //
+
+    // output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
+    ch_input_for_profiling = ch_reads_for_profiling
+            .combine(DB_CHECK.out.dbs)
+            .dump(tag: "reads_plus_db")
+            .branch {
+                malt:    it[2]['tool'] == 'malt'
+                kraken2: it[2]['tool'] == 'kraken2'
+                unknown: true
+            }
+
+    //
+    // PREP PROFILER INPUT CHANNELS ON PER TOOL BASIS
+    //
+
+    // We groupTuple to have all samples in one channel for MALT as database
+    // loading takes a long time, so we only want to run it once per database
+    ch_input_for_malt =  ch_input_for_profiling.malt
+                            .map {
+                                it ->
+                                    def temp_meta =  [ id: it[2]['db_name']]  + it[2]
+                                    def db = it[3]
+                                    [ temp_meta, it[1], db ]
+                            }
+                            .groupTuple(by: [0,2])
+                            .dump(tag: "input for malt")
+                            .multiMap {
+                                it ->
+                                    reads: [ it[0], it[1].flatten() ]
+                                    db: it[2]
+                            }
+
+    // We can run Kraken2 one-by-one sample-wise
+    ch_input_for_kraken2 =  ch_input_for_profiling.kraken2
+                            .dump(tag: "input for kraken")
+                            .multiMap {
+                                it ->
+                                    reads: [ it[0] + it[2], it[1] ]
+                                    db: it[3]
+                            }
+
+    //
+    // RUN PROFILING
+    //
+    MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
+    KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db  )
+
+
     //
     // MODULE: MultiQC
     //
@@ -95,7 +191,20 @@ workflow TAXPROFILER {
     ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
+    if (params.fastp_clip_merge) {
+        ch_multiqc_files = ch_multiqc_files.mix(FASTQ_PREPROCESSING.out.mqc)
+    }
+    if (params.run_kraken2) {
+        ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
+        ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())
+    }
+    if (params.run_malt) {
+        ch_multiqc_files = ch_multiqc_files.mix(MALT_RUN.out.log.collect{it[1]}.ifEmpty([]))
+        ch_versions = ch_versions.mix(MALT_RUN.out.versions.first())
+    }
 
+    // TODO MALT results overwriting per database?
+    // TODO Versions for Karken/MALT not report?
     MULTIQC (
         ch_multiqc_files.collect()
     )
Original file line number	Diff line number	Diff line change
`@@ -22,8 +22,6 @@ params {`
`22`	`22`	`// Input data`
`23`	`23`	`// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets`
`24`	`24`	`// TODO nf-core: Give any required params for the test so that command line flags are not needed`
`25`		`- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'`
	`25`	`+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'`
`26`	`26`
`27`		`- // Genome references`
`28`		`- genome = 'R64-1-1'`
`29`	`27`	`}`