From ccf2f6cd21c854a4153e57453e153980fc20910a Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Fri, 6 Dec 2024 13:42:58 +1100 Subject: [PATCH 01/10] minor change --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 8558c86e..1ccf2a99 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -97,7 +97,7 @@ "properties": { "demultiplexer": { "type": "string", - "enum": ["bases2fastq", "bcl2fastq", "bclconvert", "fqtk", "sgdemux", "mkfastq"], + "enum": ["bases2fastq", "bcl2fastq", "bclconvert", "fqtk", "sgdemux", "mkfastq", "mgikit"], "description": "Demultiplexer to use.", "fa_icon": "fas fa-microscope", "default": "bclconvert" From 4fa5f0b88034dc2ecc0bf1cd4d027bd29b520917 Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Fri, 6 Dec 2024 20:14:11 +1100 Subject: [PATCH 02/10] add mgikit module --- modules.json | 5 + .../mgikit/demultiplex/environment.yml | 5 + modules/nf-core/mgikit/demultiplex/main.nf | 81 ++++++++ modules/nf-core/mgikit/demultiplex/meta.yml | 152 +++++++++++++++ .../mgikit/demultiplex/tests/main.nf.test | 105 ++++++++++ .../demultiplex/tests/main.nf.test.snap | 181 ++++++++++++++++++ 6 files changed, 529 insertions(+) create mode 100644 modules/nf-core/mgikit/demultiplex/environment.yml create mode 100644 modules/nf-core/mgikit/demultiplex/main.nf create mode 100644 modules/nf-core/mgikit/demultiplex/meta.yml create mode 100644 modules/nf-core/mgikit/demultiplex/tests/main.nf.test create mode 100644 modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index c73432ce..8fef246e 100644 --- a/modules.json +++ b/modules.json @@ -60,6 +60,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "mgikit/demultiplex": { + "branch": "master", + "git_sha": "0bf42a3bdf105ddc58f6cc5523c86b4617c4ed04", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", diff --git a/modules/nf-core/mgikit/demultiplex/environment.yml b/modules/nf-core/mgikit/demultiplex/environment.yml new file mode 100644 index 00000000..9ae21494 --- /dev/null +++ b/modules/nf-core/mgikit/demultiplex/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mgikit=0.1.6 diff --git a/modules/nf-core/mgikit/demultiplex/main.nf b/modules/nf-core/mgikit/demultiplex/main.nf new file mode 100644 index 00000000..df64578d --- /dev/null +++ b/modules/nf-core/mgikit/demultiplex/main.nf @@ -0,0 +1,81 @@ +process MGIKIT_DEMULTIPLEX { + tag {"$run_id"} + label 'process_high' + + conda "${moduleDir}/environment.yml" + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mgikit:0.1.6--h4c94732_0' : + 'biocontainers/mgikit:0.1.6--h4c94732_0' }" + + input: + tuple val(meta), path(samplesheet), path(run_dir) + + output: + tuple val(meta), path("${prefix}/*.fastq.gz") , emit: fastq + tuple val(meta), path("${prefix}_undetermined/*.fastq.gz") , optional:true, emit: undetermined + tuple val(meta), path("${prefix}_ambiguous/*.fastq.gz") , optional:true, emit: ambiguous + tuple val(meta), path("${prefix}/*mgikit.undetermined_barcode*") , emit: undetermined_reports, optional:true + tuple val(meta), path("${prefix}/*mgikit.ambiguous_barcode*") , emit: ambiguous_reports, optional:true + tuple val(meta), path("${prefix}/*mgikit.general") , emit: general_info_reports + tuple val(meta), path("${prefix}/*mgikit.info") , emit: index_reports + tuple val(meta), path("${prefix}/*mgikit.sample_stats") , emit: sample_stat_reports + tuple val(meta), path("${prefix}/*mgikit.{info,general,ambiguous_barcode,undetermined_barcode}") , emit: qc_reports + path("versions.yml") , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + run_id = meta.lane ? "${meta.id}-${meta.lane}" : "${meta.id}" + prefix = task.ext.prefix ?: "out-${run_id}" + + """ + mgikit demultiplex \\ + -i "${run_dir}" \\ + -s "${samplesheet}" \\ + -o "${prefix}" \\ + ${args} + + if find ${prefix} -name 'Undetermined*.fastq.gz' -print -quit | grep -q .; then + mkdir -p "${prefix}_undetermined" + mv ${prefix}/Undetermined*.fastq.gz ${prefix}_undetermined/ + fi + + if find ${prefix} -name 'Ambiguous*.fastq.gz' -print -quit | grep -q .; then + mkdir -p "${prefix}_ambiguous" + mv ${prefix}/Ambiguous*.fastq.gz ${prefix}_ambiguous/ + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mgikit: \$(mgikit --version 2>&1 | grep 'MGIKIT - MGI data demultipexing kit' | sed -e 's/MGIKIT - MGI data demultipexing kit. //g') + END_VERSIONS + """ + + stub: + run_id = meta.lane ? "${meta.id}-${meta.lane}" : "${meta.id}" + prefix = task.ext.prefix ?: "out-${run_id}" + """ + mkdir "${prefix}" + mkdir -p "${prefix}_undetermined" + + touch "${prefix}/FC1.L01.mgikit.general" + touch "${prefix}/FC1.L01.mgikit.info" + touch "${prefix}/FC1.L01.mgikit.undetermined_barcode" + touch "${prefix}/FC1.L01.mgikit.sample_stats" + + echo "@R001:0001:FC1:1:60:1:3 1:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-001_S1_L01_R1_001.fastq.gz" + echo "@R001:0001:FC1:1:60:1:3 2:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-001_S1_L01_R2_001.fastq.gz" + echo "@R001:0001:FC1:1:60:1:3 1:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-002_S2_L01_R1_001.fastq.gz" + echo "@R001:0001:FC1:1:60:1:3 2:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-002_S2_L01_R2_001.fastq.gz" + + echo "@R001:0001:FC1:1:60:1:3 1:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}_undetermined/Undetermined_L01_R1_001.fastq.gz" + echo "@R001:0001:FC1:1:60:1:3 2:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}_undetermined/Undetermined_L01_R2_001.fastq.gz" + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mgikit: \$(mgikit --version 2>&1 | grep 'MGIKIT - MGI data demultipexing kit' | sed -e 's/MGIKIT - MGI data demultipexing kit. //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mgikit/demultiplex/meta.yml b/modules/nf-core/mgikit/demultiplex/meta.yml new file mode 100644 index 00000000..e4282538 --- /dev/null +++ b/modules/nf-core/mgikit/demultiplex/meta.yml @@ -0,0 +1,152 @@ +name: "mgikit_demultiplex" +description: Demultiplex MGI fastq files +keywords: + - demultiplex + - mgi + - fastq +tools: + - "mgikit demultiplex": + description: "Demultiplex MGI fastq files" + homepage: "https://sagc-bioinformatics.github.io/mgikit/" + documentation: "https://sagc-bioinformatics.github.io/mgikit/" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - samplesheet: + type: file + description: "Input samplesheet" + pattern: "*.{csv}" + - run_dir: + type: file + description: | + Input run directory containing BioInfo.csv and fastq data. + fastq files should be in MGI format and can be either single or paired end. +output: + - fastq: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fastq.gz" + - ${prefix}/*.fastq.gz: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.fastq.gz" + - undetermined: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "Undetermined*.fastq.gz" + - ${prefix}_undetermined/*.fastq.gz: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "Undetermined*.fastq.gz" + - ambiguous: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "Ambiguous*.fastq.gz" + - ${prefix}_ambiguous/*.fastq.gz: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "Ambiguous*.fastq.gz" + - undetermined_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.undetermined_barcode*" + - ${prefix}/*mgikit.undetermined_barcode*: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.undetermined_barcode*" + - ambiguous_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.ambiguous_barcode*" + - ${prefix}/*mgikit.ambiguous_barcode*: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.ambiguous_barcode*" + - general_info_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.general" + - ${prefix}/*mgikit.general: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.general" + - index_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.info" + - ${prefix}/*mgikit.info: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.info" + - sample_stat_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.sample_stats" + - ${prefix}/*mgikit.sample_stats: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*mgikit.sample_stats" + - qc_reports: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "mgikit.{info,general,ambiguous_barcode,undetermined_barcode}" + - ${prefix}/*mgikit.{info,general,ambiguous_barcode,undetermined_barcode}: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "mgikit.{info,general,ambiguous_barcode,undetermined_barcode}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ziadbkh" diff --git a/modules/nf-core/mgikit/demultiplex/tests/main.nf.test b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test new file mode 100644 index 00000000..1228a30c --- /dev/null +++ b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process MGIKIT_DEMULTIPLEX" + script "modules/nf-core/mgikit/demultiplex/main.nf" + process "MGIKIT_DEMULTIPLEX" + tag "mgikit" + tag "mgikit/demultiplex" + tag "modules" + tag "modules_nfcore" + tag "unzip" + + setup { + run("UNZIP") { + script "modules/nf-core/unzip/main.nf" + process { + """ + input[0] = [ + [ id: 'fc01', lane:1 ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/mgi/fc01.zip', checkIfExists: true) + ] + """ + } + } + } + + test("run mgikit demultiplex without errors") { + + when { + process { + """ + input[0] = UNZIP.out.unzipped_archive.map{[ + [ id: 'test', lane:1 ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/mgi/fc01_sample_sheet.csv', checkIfExists: true), + it[1] + ]} + """ + } + } + + then { + assert process.success + with(process.out) { + assert fastq.size() == 1 + assert fastq[0][1].any { file(it).name.endsWith(".fastq.gz") } + + assert undetermined.size() == 1 + assert undetermined[0][1].any { file(it).name.startsWith("Undetermined") && file(it).name.endsWith(".fastq.gz") } + + assert ambiguous.size() == 0 + + assert undetermined_reports.size() == 1 + assert undetermined_reports[0][1].any { file(it).name.contains("mgikit.undetermined_barcode") } + + assert ambiguous_reports.size() == 0 + + assert general_info_reports.size() == 1 + assert file(general_info_reports[0][1]).name.contains("mgikit.general") + + assert index_reports.size() == 1 + assert file(index_reports[0][1]).name.contains("mgikit.info") + + assert sample_stat_reports.size() == 1 + assert file(sample_stat_reports[0][1]).name.contains("mgikit.sample_stats") + + assert qc_reports.size() == 1 + assert qc_reports[0][1].size() == 3 + + assert file(versions[0]).text.contains("mgikit") + } + } + + } + + test("run stub mode of mgikit demultiplex without errors") { + + options "-stub" + + when { + process { + """ + input[0] = UNZIP.out.unzipped_archive.map{[ + [ id: 'test', lane:1 ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/mgi/fc01_sample_sheet.csv', checkIfExists: true), + it[1] + ]} + """ + } + } + + then { + assert process.success + with(process.out) { + assert fastq.size() == 1 + assert fastq[0][1].size() == 4 + assert undetermined.size() == 1 + assert undetermined[0][1].size() == 2 + assert general_info_reports.size() == 1 + assert index_reports.size() == 1 + assert sample_stat_reports.size() == 1 + assert file(versions[0]).text.contains("mgikit") + } + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap new file mode 100644 index 00000000..a300c21c --- /dev/null +++ b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap @@ -0,0 +1,181 @@ +{ + "Should work with stub run": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "lane": "1" + }, + [ + "23-001_S1_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339", + "23-001_S1_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28", + "23-002_S2_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339", + "23-002_S2_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "lane": "1" + }, + [ + "Undetermined_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339", + "Undetermined_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28" + ] + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.sample_stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "lane": "1" + }, + [ + "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e", + "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e", + "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "9": [ + "versions.yml:md5,085c8140a4b50230ec3a0974e358562d" + ], + "ambiguous": [ + + ], + "ambiguous_reports": [ + + ], + "fastq": [ + [ + { + "id": "test", + "lane": "1" + }, + [ + "23-001_S1_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339", + "23-001_S1_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28", + "23-002_S2_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339", + "23-002_S2_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28" + ] + ] + ], + "general_info_reports": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index_reports": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "qc_reports": [ + [ + { + "id": "test", + "lane": "1" + }, + [ + "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e", + "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e", + "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "sample_stat_reports": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.sample_stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "undetermined": [ + [ + { + "id": "test", + "lane": "1" + }, + [ + "Undetermined_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339", + "Undetermined_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28" + ] + ] + ], + "undetermined_reports": [ + [ + { + "id": "test", + "lane": "1" + }, + "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,085c8140a4b50230ec3a0974e358562d" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "23.04.1" + }, + "timestamp": "2024-11-21T21:23:03.175331921" + } +} \ No newline at end of file From 1486d1166edcde3fe68c0eaf1cc624a0150dbfb8 Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Fri, 6 Dec 2024 20:28:23 +1100 Subject: [PATCH 03/10] adding workflows --- subworkflows/local/mgikit_demultiplex/main.nf | 98 +++++++++++++++++++ workflows/demultiplex.nf | 10 ++ 2 files changed, 108 insertions(+) create mode 100644 subworkflows/local/mgikit_demultiplex/main.nf diff --git a/subworkflows/local/mgikit_demultiplex/main.nf b/subworkflows/local/mgikit_demultiplex/main.nf new file mode 100644 index 00000000..1a702f9a --- /dev/null +++ b/subworkflows/local/mgikit_demultiplex/main.nf @@ -0,0 +1,98 @@ +#!/usr/bin/env nextflow + +// +// Demultiplex Element Biosciences bases data using bases2fastq +// + +include { MGIKIT_DEMULTIPLEX as DEMULTIPLEX } from "../../../modules/nf-core/mgikit/demultiplex/main" + +workflow MGIKIT_DEMULTIPLEX { + take: + ch_flowcell // [[id:"", lane:""],samplesheet.csv, path/to/bases/files] + + main: + DEMULTIPLEX( ch_flowcell ) + + // Generate meta for each fastq + ch_fastq_with_meta = generate_fastq_meta(DEMULTIPLEX.out.fastq) + + emit: + fastq = ch_fastq_with_meta + unassigned = DEMULTIPLEX.out.undetermined + ambiguous = DEMULTIPLEX.out.ambiguous + qc_reports = DEMULTIPLEX.out.qc_reports; + versions = DEMULTIPLEX.out.versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Add meta values to fastq channel +def generate_fastq_meta(ch_reads) { + // Create a tuple with the meta.id and the fastq + ch_reads.transpose().map{ + fc_meta, fastq -> + def meta = [ + "id": fastq.getSimpleName().toString().replaceAll(/_S\d+_L0\d+_R\d+.*$/, ""), + "samplename": fastq.getSimpleName().toString() - ~/_S\d+_L0\d+_R\d+.*$/, + "readgroup": [:], + "fcid": fc_meta.id, + "lane": fc_meta.lane + ] + meta.readgroup = readgroup_from_fastq(fastq) + meta.readgroup.SM = meta.samplename + + return [ meta , fastq ] + } + // Group by meta.id for PE samples + .groupTuple(by: [0]) + // Add meta.single_end + .map { + meta, fastq -> + if (fastq.size() == 1){ + meta.single_end = true + } else { + meta.single_end = false + } + return [ meta, fastq.flatten() ] + } +} + +// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040 +def readgroup_from_fastq(path) { + // expected format: + // xx:yy:FLOWCELLID:LANE:... (seven fields) + + def line + + path.withInputStream { + InputStream gzipStream = new java.util.zip.GZIPInputStream(it) + Reader decoder = new InputStreamReader(gzipStream, 'ASCII') + BufferedReader buffered = new BufferedReader(decoder) + line = buffered.readLine() + } + assert line.startsWith('@') + line = line.substring(1) + def fields = line.split(':') + //println(line); + //println(fields); + def rg = [:] + + // https://www.elementbiosciences.com/resources/user-guides/workflow/bases2fastq + // "@:::::::UMI :N:0:" + sequencer_serial = fields[0] + run_nubmer = fields[1] + fcid = fields[2] + lane = fields[3] + index = fields[-1] =~ /[GATC+-]/ ? fields[-1] : "" + + rg.ID = [fcid,lane].join(".") + rg.PU = [fcid, lane, index].findAll().join(".") + // TODO: @edmundmiller verify if this is correct + rg.PL = "ELEMENT" + + return rg +} diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf index 9597883e..0bf5387c 100644 --- a/workflows/demultiplex.nf +++ b/workflows/demultiplex.nf @@ -14,6 +14,7 @@ include { BASES_DEMULTIPLEX } from ' include { FQTK_DEMULTIPLEX } from '../subworkflows/local/fqtk_demultiplex/main' include { MKFASTQ_DEMULTIPLEX } from '../subworkflows/local/mkfastq_demultiplex/main' include { SINGULAR_DEMULTIPLEX } from '../subworkflows/local/singular_demultiplex/main' +include { MGIKIT_DEMULTIPLEX } from '../subworkflows/local/mgikit_demultiplex/main' include { RUNDIR_CHECKQC } from '../subworkflows/local/rundir_checkqc/main' include { FASTQ_TO_SAMPLESHEET as FASTQ_TO_SAMPLESHEET_RNASEQ } from '../modules/local/fastq_to_samplesheet/main' include { FASTQ_TO_SAMPLESHEET as FASTQ_TO_SAMPLESHEET_ATACSEQ } from '../modules/local/fastq_to_samplesheet/main' @@ -142,6 +143,7 @@ workflow DEMULTIPLEX { // Re-join the metadata and the untarred run directory with the samplesheet if (demultiplexer in ['bclconvert', 'bcl2fastq']) ch_flowcells_tar_merged = ch_flowcells_tar.samplesheets.join(ch_flowcells_tar.run_dirs, failOnMismatch:true, failOnDuplicate:true) + else if (demultiplexer == 'mgikit'){ ch_flowcells_tar_merged = Channel.empty() } else { ch_flowcells_tar_merged = ch_flowcells_tar.samplesheets.join( UNTAR_FLOWCELL ( ch_flowcells_tar.run_dirs ).untar, failOnMismatch:true, failOnDuplicate:true ) ch_versions = ch_versions.mix(UNTAR_FLOWCELL.out.versions) @@ -217,6 +219,14 @@ workflow DEMULTIPLEX { ch_raw_fastq = ch_raw_fastq.mix(MKFASTQ_DEMULTIPLEX.out.fastq) ch_versions = ch_versions.mix(MKFASTQ_DEMULTIPLEX.out.versions) break + case 'mgikit': + // MODULE: mgikit + // Runs when "demultiplexer" is set to "mgikit" + MGIKIT_DEMULTIPLEX ( ch_flowcells ) + ch_raw_fastq = ch_raw_fastq.mix(MGIKIT_DEMULTIPLEX.out.fastq) + ch_multiqc_files = ch_multiqc_files.mix(MGIKIT_DEMULTIPLEX.out.qc_reports.map { meta, metrics -> return metrics} ) + ch_versions = ch_versions.mix(MGIKIT_DEMULTIPLEX.out.versions) + break default: error "Unknown demultiplexer: ${demultiplexer}" } From 180f7cad12279dd50265ab30b4ee4138cf603563 Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Fri, 6 Dec 2024 20:31:52 +1100 Subject: [PATCH 04/10] make lint happy --- subworkflows/local/mgikit_demultiplex/main.nf | 196 +++++++++--------- 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/subworkflows/local/mgikit_demultiplex/main.nf b/subworkflows/local/mgikit_demultiplex/main.nf index 1a702f9a..89fc6fa6 100644 --- a/subworkflows/local/mgikit_demultiplex/main.nf +++ b/subworkflows/local/mgikit_demultiplex/main.nf @@ -1,98 +1,98 @@ -#!/usr/bin/env nextflow - -// -// Demultiplex Element Biosciences bases data using bases2fastq -// - -include { MGIKIT_DEMULTIPLEX as DEMULTIPLEX } from "../../../modules/nf-core/mgikit/demultiplex/main" - -workflow MGIKIT_DEMULTIPLEX { - take: - ch_flowcell // [[id:"", lane:""],samplesheet.csv, path/to/bases/files] - - main: - DEMULTIPLEX( ch_flowcell ) - - // Generate meta for each fastq - ch_fastq_with_meta = generate_fastq_meta(DEMULTIPLEX.out.fastq) - - emit: - fastq = ch_fastq_with_meta - unassigned = DEMULTIPLEX.out.undetermined - ambiguous = DEMULTIPLEX.out.ambiguous - qc_reports = DEMULTIPLEX.out.qc_reports; - versions = DEMULTIPLEX.out.versions -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Add meta values to fastq channel -def generate_fastq_meta(ch_reads) { - // Create a tuple with the meta.id and the fastq - ch_reads.transpose().map{ - fc_meta, fastq -> - def meta = [ - "id": fastq.getSimpleName().toString().replaceAll(/_S\d+_L0\d+_R\d+.*$/, ""), - "samplename": fastq.getSimpleName().toString() - ~/_S\d+_L0\d+_R\d+.*$/, - "readgroup": [:], - "fcid": fc_meta.id, - "lane": fc_meta.lane - ] - meta.readgroup = readgroup_from_fastq(fastq) - meta.readgroup.SM = meta.samplename - - return [ meta , fastq ] - } - // Group by meta.id for PE samples - .groupTuple(by: [0]) - // Add meta.single_end - .map { - meta, fastq -> - if (fastq.size() == 1){ - meta.single_end = true - } else { - meta.single_end = false - } - return [ meta, fastq.flatten() ] - } -} - -// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040 -def readgroup_from_fastq(path) { - // expected format: - // xx:yy:FLOWCELLID:LANE:... (seven fields) - - def line - - path.withInputStream { - InputStream gzipStream = new java.util.zip.GZIPInputStream(it) - Reader decoder = new InputStreamReader(gzipStream, 'ASCII') - BufferedReader buffered = new BufferedReader(decoder) - line = buffered.readLine() - } - assert line.startsWith('@') - line = line.substring(1) - def fields = line.split(':') - //println(line); - //println(fields); - def rg = [:] - - // https://www.elementbiosciences.com/resources/user-guides/workflow/bases2fastq - // "@:::::::UMI :N:0:" - sequencer_serial = fields[0] - run_nubmer = fields[1] - fcid = fields[2] - lane = fields[3] - index = fields[-1] =~ /[GATC+-]/ ? fields[-1] : "" - - rg.ID = [fcid,lane].join(".") - rg.PU = [fcid, lane, index].findAll().join(".") - // TODO: @edmundmiller verify if this is correct - rg.PL = "ELEMENT" - - return rg -} +#!/usr/bin/env nextflow + +// +// Demultiplex Element Biosciences bases data using bases2fastq +// + +include { MGIKIT_DEMULTIPLEX as DEMULTIPLEX } from "../../../modules/nf-core/mgikit/demultiplex/main" + +workflow MGIKIT_DEMULTIPLEX { + take: + ch_flowcell // [[id:"", lane:""],samplesheet.csv, path/to/bases/files] + + main: + DEMULTIPLEX( ch_flowcell ) + + // Generate meta for each fastq + ch_fastq_with_meta = generate_fastq_meta(DEMULTIPLEX.out.fastq) + + emit: + fastq = ch_fastq_with_meta + unassigned = DEMULTIPLEX.out.undetermined + ambiguous = DEMULTIPLEX.out.ambiguous + qc_reports = DEMULTIPLEX.out.qc_reports; + versions = DEMULTIPLEX.out.versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Add meta values to fastq channel +def generate_fastq_meta(ch_reads) { + // Create a tuple with the meta.id and the fastq + ch_reads.transpose().map{ + fc_meta, fastq -> + def meta = [ + "id": fastq.getSimpleName().toString().replaceAll(/_S\d+_L0\d+_R\d+.*$/, ""), + "samplename": fastq.getSimpleName().toString() - ~/_S\d+_L0\d+_R\d+.*$/, + "readgroup": [:], + "fcid": fc_meta.id, + "lane": fc_meta.lane + ] + meta.readgroup = readgroup_from_fastq(fastq) + meta.readgroup.SM = meta.samplename + + return [ meta , fastq ] + } + // Group by meta.id for PE samples + .groupTuple(by: [0]) + // Add meta.single_end + .map { + meta, fastq -> + if (fastq.size() == 1){ + meta.single_end = true + } else { + meta.single_end = false + } + return [ meta, fastq.flatten() ] + } +} + +// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040 +def readgroup_from_fastq(path) { + // expected format: + // xx:yy:FLOWCELLID:LANE:... (seven fields) + + def line + + path.withInputStream { + InputStream gzipStream = new java.util.zip.GZIPInputStream(it) + Reader decoder = new InputStreamReader(gzipStream, 'ASCII') + BufferedReader buffered = new BufferedReader(decoder) + line = buffered.readLine() + } + assert line.startsWith('@') + line = line.substring(1) + def fields = line.split(':') + //println(line); + //println(fields); + def rg = [:] + + // https://www.elementbiosciences.com/resources/user-guides/workflow/bases2fastq + // "@:::::::UMI :N:0:" + sequencer_serial = fields[0] + run_nubmer = fields[1] + fcid = fields[2] + lane = fields[3] + index = fields[-1] =~ /[GATC+-]/ ? fields[-1] : "" + + rg.ID = [fcid,lane].join(".") + rg.PU = [fcid, lane, index].findAll().join(".") + // TODO: @edmundmiller verify if this is correct + rg.PL = "ELEMENT" + + return rg +} From 6329da061632262ec7f7cca00787f39393e6fed7 Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Mon, 16 Dec 2024 22:49:40 +1100 Subject: [PATCH 05/10] adding mgikit tests --- conf/test_mgikit.config | 32 ++++ nextflow.config | 1 + nf-test.config | 1 - tests/mgikit.nf.test | 36 +++++ tests/mgikit.nf.test.snap | 305 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 conf/test_mgikit.config create mode 100644 tests/mgikit.nf.test create mode 100644 tests/mgikit.nf.test.snap diff --git a/conf/test_mgikit.config b/conf/test_mgikit.config new file mode 100644 index 00000000..07968209 --- /dev/null +++ b/conf/test_mgikit.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/demultiplex -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +// Limit resources so that this can run on GitHub Actions +process { + resourceLimits = [ + cpus: 2, + memory: '12.GB', + time: '4.h' + ] +} + +params { + config_profile_name = 'Test mgikit profile' + config_profile_description = 'Minimal test dataset to check pipeline function with mgikit' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/demultiplex/testdata/mgi/mgikit_input.csv' + demultiplexer = 'mgikit' + skip_tools = "checkqc,samshee" +} + + diff --git a/nextflow.config b/nextflow.config index 0d82db47..f825159f 100755 --- a/nextflow.config +++ b/nextflow.config @@ -190,6 +190,7 @@ profiles { test_checkqc { includeConfig 'conf/test_checkqc.config' } test_kraken { includeConfig 'conf/test_kraken.config' } test_two_lanes { includeConfig 'conf/test_two_lanes.config' } + test_mgikit { includeConfig 'conf/test_mgikit.config' } } diff --git a/nf-test.config b/nf-test.config index d441a08a..bed04032 100644 --- a/nf-test.config +++ b/nf-test.config @@ -18,4 +18,3 @@ config { load "nft-utils@0.0.3" } } - diff --git a/tests/mgikit.nf.test b/tests/mgikit.nf.test new file mode 100644 index 00000000..4158e1dd --- /dev/null +++ b/tests/mgikit.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + name "Test Workflow main.nf - MGIKIT" + script "../main.nf" + profile "test_mgikit,docker" + tag "mgikit" + tag "pipeline" + + test("mgikit") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/mgikit.nf.test.snap b/tests/mgikit.nf.test.snap new file mode 100644 index 00000000..245f4798 --- /dev/null +++ b/tests/mgikit.nf.test.snap @@ -0,0 +1,305 @@ +{ + "mgikit": { + "content": [ + 21, + { + "DEMULTIPLEX": { + "mgikit": "0.1.6" + }, + "FALCO": { + "falco": "1.2.1" + }, + "FASTP": { + "fastp": "0.23.4" + }, + "MD5SUM": { + "md5sum": 8.3 + }, + "Workflow": { + "nf-core/demultiplex": "v1.5.4" + } + }, + [ + "demultiplex", + "demultiplex/out-test-fc01-1", + "demultiplex/out-test-fc01-1/FC01.L01.mgikit.general", + "demultiplex/out-test-fc01-1/FC01.L01.mgikit.info", + "demultiplex/out-test-fc01-1/FC01.L01.mgikit.sample_stats", + "demultiplex/out-test-fc01-1/FC01.L01.mgikit.undetermined_barcode", + "demultiplex/out-test-fc01-1/FC01.L01.mgikit.undetermined_barcode.complete", + "demultiplex/out-test-fc01-1/Sample01_S1_L01_R1_001.fastq.gz", + "demultiplex/out-test-fc01-1/Sample01_S1_L01_R2_001.fastq.gz", + "demultiplex/out-test-fc01-1/Sample02_S2_L01_R1_001.fastq.gz", + "demultiplex/out-test-fc01-1/Sample02_S2_L01_R2_001.fastq.gz", + "demultiplex/out-test-fc01-1/Sample03_S3_L01_R1_001.fastq.gz", + "demultiplex/out-test-fc01-1/Sample03_S3_L01_R2_001.fastq.gz", + "demultiplex/out-test-fc01-1/Sample04_S4_L01_R1_001.fastq.gz", + "demultiplex/out-test-fc01-1/Sample04_S4_L01_R2_001.fastq.gz", + "demultiplex/out-test-fc01-1_undetermined", + "demultiplex/out-test-fc01-1_undetermined/Undetermined_L01_R1_001.fastq.gz", + "demultiplex/out-test-fc01-1_undetermined/Undetermined_L01_R2_001.fastq.gz", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastp-insert-size-plot.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp_filtered_reads_plot.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastp.txt", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastp-insert-size-plot.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_plots/pdf/general_stats_table.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastp-insert-size-plot.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-cnt.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png", + "multiqc/multiqc_plots/png/general_stats_table.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastp-insert-size-plot.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_plots/svg/general_stats_table.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "samplesheet", + "samplesheet/atacseq_samplesheet.csv", + "samplesheet/rnaseq_samplesheet.csv", + "samplesheet/taxprofiler_samplesheet.csv", + "test-fc01", + "test-fc01.csv", + "test-fc01/L001", + "test-fc01/L001/Sample01.fastp.html", + "test-fc01/L001/Sample01.fastp.json", + "test-fc01/L001/Sample01_1.fastp.fastq.gz", + "test-fc01/L001/Sample01_1.fastp.fastq.gz.md5", + "test-fc01/L001/Sample01_1.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample01_1.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample01_1.fastp.fastq.gz_summary.txt", + "test-fc01/L001/Sample01_2.fastp.fastq.gz", + "test-fc01/L001/Sample01_2.fastp.fastq.gz.md5", + "test-fc01/L001/Sample01_2.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample01_2.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample01_2.fastp.fastq.gz_summary.txt", + "test-fc01/L001/Sample02.fastp.html", + "test-fc01/L001/Sample02.fastp.json", + "test-fc01/L001/Sample02_1.fastp.fastq.gz", + "test-fc01/L001/Sample02_1.fastp.fastq.gz.md5", + "test-fc01/L001/Sample02_1.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample02_1.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample02_1.fastp.fastq.gz_summary.txt", + "test-fc01/L001/Sample02_2.fastp.fastq.gz", + "test-fc01/L001/Sample02_2.fastp.fastq.gz.md5", + "test-fc01/L001/Sample02_2.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample02_2.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample02_2.fastp.fastq.gz_summary.txt", + "test-fc01/L001/Sample03.fastp.html", + "test-fc01/L001/Sample03.fastp.json", + "test-fc01/L001/Sample03_1.fastp.fastq.gz", + "test-fc01/L001/Sample03_1.fastp.fastq.gz.md5", + "test-fc01/L001/Sample03_1.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample03_1.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample03_1.fastp.fastq.gz_summary.txt", + "test-fc01/L001/Sample03_2.fastp.fastq.gz", + "test-fc01/L001/Sample03_2.fastp.fastq.gz.md5", + "test-fc01/L001/Sample03_2.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample03_2.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample03_2.fastp.fastq.gz_summary.txt", + "test-fc01/L001/Sample04.fastp.html", + "test-fc01/L001/Sample04.fastp.json", + "test-fc01/L001/Sample04_1.fastp.fastq.gz", + "test-fc01/L001/Sample04_1.fastp.fastq.gz.md5", + "test-fc01/L001/Sample04_1.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample04_1.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample04_1.fastp.fastq.gz_summary.txt", + "test-fc01/L001/Sample04_2.fastp.fastq.gz", + "test-fc01/L001/Sample04_2.fastp.fastq.gz.md5", + "test-fc01/L001/Sample04_2.fastp.fastq.gz_fastqc_data.txt", + "test-fc01/L001/Sample04_2.fastp.fastq.gz_fastqc_report.html", + "test-fc01/L001/Sample04_2.fastp.fastq.gz_summary.txt" + ], + [ + "FC01.L01.mgikit.general:md5,624b0d43c3995fde2c122c447e780191", + "FC01.L01.mgikit.info:md5,69a72a1c43d47032828d8b97c6cf8807", + "FC01.L01.mgikit.sample_stats:md5,20d5b9f4a430fd34579e0f70b42f73f0", + "FC01.L01.mgikit.undetermined_barcode:md5,c1960b64d4cc5c141d742aa1b6f57a31", + "FC01.L01.mgikit.undetermined_barcode.complete:md5,c1960b64d4cc5c141d742aa1b6f57a31", + "Sample01_S1_L01_R1_001.fastq.gz:md5,19752026de713f514637fab29e4ffac4", + "Sample01_S1_L01_R2_001.fastq.gz:md5,f9015fa4a73977dd093b8b2e4a67a316", + "Sample02_S2_L01_R1_001.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144", + "Sample02_S2_L01_R2_001.fastq.gz:md5,7348f1bf066a69ee48ed3ee6afa2f91b", + "Sample03_S3_L01_R1_001.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f", + "Sample03_S3_L01_R2_001.fastq.gz:md5,38ba38d0d71343e6125fc15b63483ad4", + "Sample04_S4_L01_R1_001.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4", + "Sample04_S4_L01_R2_001.fastq.gz:md5,cd88081007614f5bed7975b04e25b7e3", + "Undetermined_L01_R1_001.fastq.gz:md5,63ce3d442f4f8c42e0d9addea9273315", + "Undetermined_L01_R2_001.fastq.gz:md5,0af27de6fc08cce579e160c31fbe61b9", + "fastp-insert-size-plot.txt:md5,c3c048605296e5adfd1761bc1871de3b", + + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fff", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64ff", + "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,31f25acdfc17539444577329c37bb0", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,31f25acdfc17539444577329c37bb", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90", + "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90", + + "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,0ac96d6e7126238688e36613c525b309", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,0ac96d6e7126238688e36613c525b309", + "fastp-seq-quality-plot_Read_2_After_filtering.txt:md5,0eb066261c3786efd7735fcb261a02c4", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt:md5,0eb066261c3786efd7735fcb261a02c4", + "fastp_filtered_reads_plot.txt:md5,4b41e2709debc2e3359f2382c31670c6", + "fastqc_overrepresented_sequences_plot.txt:md5,22b07e7ba524ece1c795a18197ef9342", + "fastqc_per_base_n_content_plot.txt:md5,4b4849f2db8f29bd4d6dcd5f190a40d4", + "fastqc_per_base_sequence_quality_plot.txt:md5,f9103469a4225c35800d1b9eca0c15ea", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,ef8bf33d0d608d9899d03db587773a5f", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,b327efa563517705181f6434672199f", + "fastqc_per_sequence_quality_scores_plot.txt:md5,5f2772bb49befe838597266ebc7c772b", + "fastqc_sequence_counts_plot.txt:md5,7f22bfc8a332ead3f4629f44b8967b69", + "fastqc_sequence_duplication_levels_plot.txt:md5,0698ed12046a621c771a27c42ae43015", + "multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2", + "test-fc01.csv:md5,74dfac5602f25fe428510a500d3b4700", + "Sample01.fastp.json:md5,58bd930574e0d97a17e176af1f1a2de2", + "Sample01_1.fastp.fastq.gz:md5,19752026de713f514637fab29e4ffac4", + "Sample01_1.fastp.fastq.gz.md5:md5,ff1685317a46c2e4f38a601a400c6ea4", + "Sample01_1.fastp.fastq.gz_fastqc_data.txt:md5,9dbc0eb3858105006a5c08fbf2b199d2", + "Sample01_1.fastp.fastq.gz_summary.txt:md5,8dd26c87ee66aaa4e7bb5f02ea05b8df", + "Sample01_2.fastp.fastq.gz:md5,f9015fa4a73977dd093b8b2e4a67a316", + "Sample01_2.fastp.fastq.gz.md5:md5,b842540b836a847a3cbda9cd2bd9602a", + "Sample01_2.fastp.fastq.gz_fastqc_data.txt:md5,581fde09673f893ff5de8480146aec73", + "Sample01_2.fastp.fastq.gz_summary.txt:md5,3b8e912a8e2d7de2bdc8f27048816d83", + "Sample02.fastp.json:md5,139e9a52b922c83bbf244d9a90d037e0", + "Sample02_1.fastp.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144", + "Sample02_1.fastp.fastq.gz.md5:md5,e5359534efdf527f23c892a395ffeda9", + "Sample02_1.fastp.fastq.gz_fastqc_data.txt:md5,9395972202f7805db07a262b0c1a2a21", + "Sample02_1.fastp.fastq.gz_summary.txt:md5,5cc98027a0a13d930b52f42c18d5dbd2", + "Sample02_2.fastp.fastq.gz:md5,7348f1bf066a69ee48ed3ee6afa2f91b", + "Sample02_2.fastp.fastq.gz.md5:md5,2d7c845b972d1a8e49542c8aec7a0d30", + "Sample02_2.fastp.fastq.gz_fastqc_data.txt:md5,5ab57f96cb140451bfce528f2c5aab2e", + "Sample02_2.fastp.fastq.gz_summary.txt:md5,04f73bff5dd39c24da6e9cff5c99eafa", + "Sample03.fastp.json:md5,3696e4379f10e439f887fda0dd54059d", + "Sample03_1.fastp.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f", + "Sample03_1.fastp.fastq.gz.md5:md5,ba1c71c4edafdf5e80817e61b65a2423", + "Sample03_1.fastp.fastq.gz_fastqc_data.txt:md5,ce8bafaa92fd7dad7e04c110a17ab9aa", + "Sample03_1.fastp.fastq.gz_summary.txt:md5,8cd319cb78678224290dc56d9d46b7fd", + "Sample03_2.fastp.fastq.gz:md5,38ba38d0d71343e6125fc15b63483ad4", + "Sample03_2.fastp.fastq.gz.md5:md5,3f6fd888aeb09e976676304ed56b9549", + "Sample03_2.fastp.fastq.gz_fastqc_data.txt:md5,6d5979154dc2d30b43561136e0adc1fa", + "Sample03_2.fastp.fastq.gz_summary.txt:md5,122391b6c6e9332fe373a464e3e3ab09", + "Sample04.fastp.json:md5,04daf32a7c3a7072b3e7a585421ea4a6", + "Sample04_1.fastp.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4", + "Sample04_1.fastp.fastq.gz.md5:md5,be3faa610361cbcb5b6d988eaaa29132", + "Sample04_1.fastp.fastq.gz_fastqc_data.txt:md5,7ff5aeecbcb95f39c6708f2d342c6217", + "Sample04_1.fastp.fastq.gz_summary.txt:md5,5fea6afad96654dbd2c100943d45d6e8", + "Sample04_2.fastp.fastq.gz:md5,cd88081007614f5bed7975b04e25b7e3", + "Sample04_2.fastp.fastq.gz.md5:md5,ae878e68b8e0861b290a20472cb71bf9", + "Sample04_2.fastp.fastq.gz_fastqc_data.txt:md5,d64d9b1a348735f42f939307c04228bf", + "Sample04_2.fastp.fastq.gz_summary.txt:md5,20d5e8b942681419b885dca77ab9d250" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.1" + }, + "timestamp": "2024-12-08T18:24:13.756472" + } +} \ No newline at end of file From eac2342d559fda6ae9c22e8162b8260e6a887136 Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Mon, 16 Dec 2024 23:05:50 +1100 Subject: [PATCH 06/10] fix tests --- tests/mgikit.nf.test.snap | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/mgikit.nf.test.snap b/tests/mgikit.nf.test.snap index 245f4798..74fc2b29 100644 --- a/tests/mgikit.nf.test.snap +++ b/tests/mgikit.nf.test.snap @@ -16,7 +16,7 @@ "md5sum": 8.3 }, "Workflow": { - "nf-core/demultiplex": "v1.5.4" + "nf-core/demultiplex": "v1.5.4dev" } }, [ @@ -234,14 +234,14 @@ "Undetermined_L01_R2_001.fastq.gz:md5,0af27de6fc08cce579e160c31fbe61b9", "fastp-insert-size-plot.txt:md5,c3c048605296e5adfd1761bc1871de3b", - "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fff", - "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64ff", - "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,31f25acdfc17539444577329c37bb0", - "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,31f25acdfc17539444577329c37bb", - "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904", - "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90", - "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904", - "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90", + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fffac", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fffac", + "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,31f25acdfc17539444577329c37bb0e5", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,31f25acdfc17539444577329c37bb0e5", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d", + "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d", "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,0ac96d6e7126238688e36613c525b309", "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,0ac96d6e7126238688e36613c525b309", @@ -252,13 +252,13 @@ "fastqc_per_base_n_content_plot.txt:md5,4b4849f2db8f29bd4d6dcd5f190a40d4", "fastqc_per_base_sequence_quality_plot.txt:md5,f9103469a4225c35800d1b9eca0c15ea", "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,ef8bf33d0d608d9899d03db587773a5f", - "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,b327efa563517705181f6434672199f", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,b327efa563517705181f6434672199f4", "fastqc_per_sequence_quality_scores_plot.txt:md5,5f2772bb49befe838597266ebc7c772b", "fastqc_sequence_counts_plot.txt:md5,7f22bfc8a332ead3f4629f44b8967b69", "fastqc_sequence_duplication_levels_plot.txt:md5,0698ed12046a621c771a27c42ae43015", "multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2", "test-fc01.csv:md5,74dfac5602f25fe428510a500d3b4700", - "Sample01.fastp.json:md5,58bd930574e0d97a17e176af1f1a2de2", + "Sample01.fastp.json:md5,39affa02f97357ff249ab6557af88cd8", "Sample01_1.fastp.fastq.gz:md5,19752026de713f514637fab29e4ffac4", "Sample01_1.fastp.fastq.gz.md5:md5,ff1685317a46c2e4f38a601a400c6ea4", "Sample01_1.fastp.fastq.gz_fastqc_data.txt:md5,9dbc0eb3858105006a5c08fbf2b199d2", @@ -267,7 +267,7 @@ "Sample01_2.fastp.fastq.gz.md5:md5,b842540b836a847a3cbda9cd2bd9602a", "Sample01_2.fastp.fastq.gz_fastqc_data.txt:md5,581fde09673f893ff5de8480146aec73", "Sample01_2.fastp.fastq.gz_summary.txt:md5,3b8e912a8e2d7de2bdc8f27048816d83", - "Sample02.fastp.json:md5,139e9a52b922c83bbf244d9a90d037e0", + "Sample02.fastp.json:md5,d1bf54e9c8723c30995cd7ce6cdc0a31", "Sample02_1.fastp.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144", "Sample02_1.fastp.fastq.gz.md5:md5,e5359534efdf527f23c892a395ffeda9", "Sample02_1.fastp.fastq.gz_fastqc_data.txt:md5,9395972202f7805db07a262b0c1a2a21", @@ -276,7 +276,7 @@ "Sample02_2.fastp.fastq.gz.md5:md5,2d7c845b972d1a8e49542c8aec7a0d30", "Sample02_2.fastp.fastq.gz_fastqc_data.txt:md5,5ab57f96cb140451bfce528f2c5aab2e", "Sample02_2.fastp.fastq.gz_summary.txt:md5,04f73bff5dd39c24da6e9cff5c99eafa", - "Sample03.fastp.json:md5,3696e4379f10e439f887fda0dd54059d", + "Sample03.fastp.json:md5,adaa33288edef3d1a8e6de4ef15c52a8", "Sample03_1.fastp.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f", "Sample03_1.fastp.fastq.gz.md5:md5,ba1c71c4edafdf5e80817e61b65a2423", "Sample03_1.fastp.fastq.gz_fastqc_data.txt:md5,ce8bafaa92fd7dad7e04c110a17ab9aa", @@ -285,7 +285,7 @@ "Sample03_2.fastp.fastq.gz.md5:md5,3f6fd888aeb09e976676304ed56b9549", "Sample03_2.fastp.fastq.gz_fastqc_data.txt:md5,6d5979154dc2d30b43561136e0adc1fa", "Sample03_2.fastp.fastq.gz_summary.txt:md5,122391b6c6e9332fe373a464e3e3ab09", - "Sample04.fastp.json:md5,04daf32a7c3a7072b3e7a585421ea4a6", + "Sample04.fastp.json:md5,f9c94ad8d70305fd217479151cf5bf11", "Sample04_1.fastp.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4", "Sample04_1.fastp.fastq.gz.md5:md5,be3faa610361cbcb5b6d988eaaa29132", "Sample04_1.fastp.fastq.gz_fastqc_data.txt:md5,7ff5aeecbcb95f39c6708f2d342c6217", From f6a0743d65694370d3a1f5c2c93e0cdafeb5899b Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Tue, 17 Dec 2024 20:29:53 +1100 Subject: [PATCH 07/10] update docs --- README.md | 1 + docs/output.md | 5 +++++ docs/usage.md | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 44ead372..8d3138dc 100755 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - [sgdemux](#sgdemux) - demultiplexing bgzipped fastq files produced by Singular Genomics (CONDITIONAL) - [fqtk](#fqtk) - a toolkit for working with FASTQ files, written in Rust (CONDITIONAL) - [mkfastq](#mkfastq) - converting bcl files to fastq, and demultiplexing for single-cell sequencing data (CONDITIONAL) +- [mgikit](#mgikit) - Demultiplex fastq files generated by MGI sequencers using [mgikit](https://github.com/sagc-bioinformatics/mgikit) (CONDITIONAL). 3. [checkqc](#checkqc) - (optional) Check quality criteria after demultiplexing (bcl2fastq only) 4. [fastp](#fastp) - Adapter and quality trimming diff --git a/docs/output.md b/docs/output.md index 49dbc39a..b47aba89 100755 --- a/docs/output.md +++ b/docs/output.md @@ -16,6 +16,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [sgdemux](#sgdemux) - demultiplexing bgzipped fastq files produced by Singular Genomics (CONDITIONAL) - [fqtk](#fqtk) - demultiplexing fastq files (CONDITIONAL) - [mkfastq](#mkfastq) - converting bcl files to fastq, and demultiplexing for single-cell sequencing data (CONDITIONAL) +- [mgikit](#mgikit) - Demultiplex fastq files generated by MGI sequencers using [mgikit](https://github.com/sagc-bioinformatics/mgikit) (CONDITIONAL). - [checkqc](#checkqc) - (optional) Check quality criteria after demultiplexing (bcl2fastq only) - [fastp](#fastp) - Adapter and quality trimming - [Falco](#falco) - Raw read QC @@ -136,6 +137,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d +### mgikit + +[mgikit](https://github.com/sagc-bioinformatics/mgikit) demultiplexes fastq files generated by MGI sequencers (CONDITIONAL). + ### fastp
diff --git a/docs/usage.md b/docs/usage.md index 1fd50c51..06652cc0 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -37,7 +37,7 @@ DDMMYY_SERIAL_NUMBER_FC3,/path/to/SampleSheet3.csv,3,/path/to/sequencer/output3 | `id` | Flowcell id | | `samplesheet` | Full path to the _flowcell_ `SampleSheet.csv` file containing the sample information and indexes | | `lane` | Optional lane number. When a lane number is provided, only the given lane will be demultiplexed | -| `flowcell` | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory | +| `flowcell` | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory. mgikit demultiplexing expects a path to a directory here containing the compressed fastq files and `BioInfo.csv` file. | An [example _pipeline_ samplesheet](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/flowcell_input.csv) has been provided with the pipeline. @@ -70,6 +70,7 @@ Each demultiplexing software uses a distinct _flowcell_ samplesheet format. Belo | **sgdemux** | [sgdemux SampleSheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/sim-data/out.sample_meta.csv) | | **fqtk** | [fqtk SampleSheet.csv](https://github.com/fulcrumgenomics/nf-core-test-datasets/raw/fqtk/testdata/sim-data/fqtk_samplesheet.csv) | | **bcl2fastq and bclconvert** | [bcl2fastq and bclconvert SampleSheet.csv](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/b2fq-samplesheet.csv) | +| **mgikit** | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) | ## Running the pipeline From 7760b0ce59bf9d97848b08b6c40fb51c7413c3d9 Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Tue, 17 Dec 2024 20:35:59 +1100 Subject: [PATCH 08/10] update docs --- docs/usage.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 06652cc0..860ce678 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -22,7 +22,7 @@ When using the demultiplexer fqtk, the _pipeline_ samplesheet must contain an ad --input '[path to pipeline samplesheet file]' ``` -#### Example: Pipeline samplesheet +### Example: Pipeline samplesheet ```csv title="samplesheet.csv" id,samplesheet,lane,flowcell @@ -43,7 +43,7 @@ An [example _pipeline_ samplesheet](https://raw.githubusercontent.com/nf-core/te Note that the run directory in the `flowcell` column must lead to a `tar.gz` for compatibility with the demultiplexers sgdemux and fqtk. -#### Example: Pipeline samplesheet for fqtk +### Example: Pipeline samplesheet for fqtk ```csv title="samplesheet.csv" id,samplesheet,lane,flowcell,per_flowcell_manifest @@ -70,7 +70,7 @@ Each demultiplexing software uses a distinct _flowcell_ samplesheet format. Belo | **sgdemux** | [sgdemux SampleSheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/sim-data/out.sample_meta.csv) | | **fqtk** | [fqtk SampleSheet.csv](https://github.com/fulcrumgenomics/nf-core-test-datasets/raw/fqtk/testdata/sim-data/fqtk_samplesheet.csv) | | **bcl2fastq and bclconvert** | [bcl2fastq and bclconvert SampleSheet.csv](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/b2fq-samplesheet.csv) | -| **mgikit** | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) | +| **mgikit** | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) | ## Running the pipeline @@ -199,7 +199,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` - - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow `24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. From ea53277adda280b08a9ad7f2cc32b9718a278a5f Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Tue, 17 Dec 2024 20:42:37 +1100 Subject: [PATCH 09/10] update docs --- docs/usage.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 860ce678..15e1f773 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,12 +32,12 @@ DDMMYY_SERIAL_NUMBER_FC2,/path/to/SampleSheet2.csv,1,/path/to/sequencer/output2 DDMMYY_SERIAL_NUMBER_FC3,/path/to/SampleSheet3.csv,3,/path/to/sequencer/output3 ``` -| Column | Description | -| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | -| `id` | Flowcell id | -| `samplesheet` | Full path to the _flowcell_ `SampleSheet.csv` file containing the sample information and indexes | -| `lane` | Optional lane number. When a lane number is provided, only the given lane will be demultiplexed | -| `flowcell` | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory. mgikit demultiplexing expects a path to a directory here containing the compressed fastq files and `BioInfo.csv` file. | +| Column | Description | +| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `id` | Flowcell id | +| `samplesheet` | Full path to the _flowcell_ `SampleSheet.csv` file containing the sample information and indexes | +| `lane` | Optional lane number. When a lane number is provided, only the given lane will be demultiplexed | +| `flowcell` | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory. `mgikit` demultiplexing expects a path to a directory here containing the compressed fastq files and `BioInfo.csv` file. | An [example _pipeline_ samplesheet](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/flowcell_input.csv) has been provided with the pipeline. @@ -70,7 +70,7 @@ Each demultiplexing software uses a distinct _flowcell_ samplesheet format. Belo | **sgdemux** | [sgdemux SampleSheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/sim-data/out.sample_meta.csv) | | **fqtk** | [fqtk SampleSheet.csv](https://github.com/fulcrumgenomics/nf-core-test-datasets/raw/fqtk/testdata/sim-data/fqtk_samplesheet.csv) | | **bcl2fastq and bclconvert** | [bcl2fastq and bclconvert SampleSheet.csv](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/b2fq-samplesheet.csv) | -| **mgikit** | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) | +| **mgikit** | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) | ## Running the pipeline From 10c22e4685914637b8a15fe68d60b2a1147ad1eb Mon Sep 17 00:00:00 2001 From: ziadbkh Date: Tue, 17 Dec 2024 21:46:32 +1100 Subject: [PATCH 10/10] make lint happy --- conf/test_mgikit.config | 4 ++-- tests/mgikit.nf.test.snap | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/test_mgikit.config b/conf/test_mgikit.config index 07968209..cbd5c3cd 100644 --- a/conf/test_mgikit.config +++ b/conf/test_mgikit.config @@ -13,8 +13,8 @@ // Limit resources so that this can run on GitHub Actions process { resourceLimits = [ - cpus: 2, - memory: '12.GB', + cpus: 1, + memory: '7.GB', time: '4.h' ] } diff --git a/tests/mgikit.nf.test.snap b/tests/mgikit.nf.test.snap index 74fc2b29..b308750c 100644 --- a/tests/mgikit.nf.test.snap +++ b/tests/mgikit.nf.test.snap @@ -258,7 +258,7 @@ "fastqc_sequence_duplication_levels_plot.txt:md5,0698ed12046a621c771a27c42ae43015", "multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2", "test-fc01.csv:md5,74dfac5602f25fe428510a500d3b4700", - "Sample01.fastp.json:md5,39affa02f97357ff249ab6557af88cd8", + "Sample01.fastp.json:md5,58bd930574e0d97a17e176af1f1a2de2", "Sample01_1.fastp.fastq.gz:md5,19752026de713f514637fab29e4ffac4", "Sample01_1.fastp.fastq.gz.md5:md5,ff1685317a46c2e4f38a601a400c6ea4", "Sample01_1.fastp.fastq.gz_fastqc_data.txt:md5,9dbc0eb3858105006a5c08fbf2b199d2", @@ -267,7 +267,7 @@ "Sample01_2.fastp.fastq.gz.md5:md5,b842540b836a847a3cbda9cd2bd9602a", "Sample01_2.fastp.fastq.gz_fastqc_data.txt:md5,581fde09673f893ff5de8480146aec73", "Sample01_2.fastp.fastq.gz_summary.txt:md5,3b8e912a8e2d7de2bdc8f27048816d83", - "Sample02.fastp.json:md5,d1bf54e9c8723c30995cd7ce6cdc0a31", + "Sample02.fastp.json:md5,139e9a52b922c83bbf244d9a90d037e0", "Sample02_1.fastp.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144", "Sample02_1.fastp.fastq.gz.md5:md5,e5359534efdf527f23c892a395ffeda9", "Sample02_1.fastp.fastq.gz_fastqc_data.txt:md5,9395972202f7805db07a262b0c1a2a21", @@ -276,7 +276,7 @@ "Sample02_2.fastp.fastq.gz.md5:md5,2d7c845b972d1a8e49542c8aec7a0d30", "Sample02_2.fastp.fastq.gz_fastqc_data.txt:md5,5ab57f96cb140451bfce528f2c5aab2e", "Sample02_2.fastp.fastq.gz_summary.txt:md5,04f73bff5dd39c24da6e9cff5c99eafa", - "Sample03.fastp.json:md5,adaa33288edef3d1a8e6de4ef15c52a8", + "Sample03.fastp.json:md5,3696e4379f10e439f887fda0dd54059d", "Sample03_1.fastp.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f", "Sample03_1.fastp.fastq.gz.md5:md5,ba1c71c4edafdf5e80817e61b65a2423", "Sample03_1.fastp.fastq.gz_fastqc_data.txt:md5,ce8bafaa92fd7dad7e04c110a17ab9aa", @@ -285,7 +285,7 @@ "Sample03_2.fastp.fastq.gz.md5:md5,3f6fd888aeb09e976676304ed56b9549", "Sample03_2.fastp.fastq.gz_fastqc_data.txt:md5,6d5979154dc2d30b43561136e0adc1fa", "Sample03_2.fastp.fastq.gz_summary.txt:md5,122391b6c6e9332fe373a464e3e3ab09", - "Sample04.fastp.json:md5,f9c94ad8d70305fd217479151cf5bf11", + "Sample04.fastp.json:md5,04daf32a7c3a7072b3e7a585421ea4a6", "Sample04_1.fastp.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4", "Sample04_1.fastp.fastq.gz.md5:md5,be3faa610361cbcb5b6d988eaaa29132", "Sample04_1.fastp.fastq.gz_fastqc_data.txt:md5,7ff5aeecbcb95f39c6708f2d342c6217",