From ccf2f6cd21c854a4153e57453e153980fc20910a Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Fri, 6 Dec 2024 13:42:58 +1100
Subject: [PATCH 01/10] minor change

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 8558c86e..1ccf2a99 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -97,7 +97,7 @@
             "properties": {
                 "demultiplexer": {
                     "type": "string",
-                    "enum": ["bases2fastq", "bcl2fastq", "bclconvert", "fqtk", "sgdemux", "mkfastq"],
+                    "enum": ["bases2fastq", "bcl2fastq", "bclconvert", "fqtk", "sgdemux", "mkfastq", "mgikit"],
                     "description": "Demultiplexer to use.",
                     "fa_icon": "fas fa-microscope",
                     "default": "bclconvert"

From 4fa5f0b88034dc2ecc0bf1cd4d027bd29b520917 Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Fri, 6 Dec 2024 20:14:11 +1100
Subject: [PATCH 02/10] add mgikit module

---
 modules.json                                  |   5 +
 .../mgikit/demultiplex/environment.yml        |   5 +
 modules/nf-core/mgikit/demultiplex/main.nf    |  81 ++++++++
 modules/nf-core/mgikit/demultiplex/meta.yml   | 152 +++++++++++++++
 .../mgikit/demultiplex/tests/main.nf.test     | 105 ++++++++++
 .../demultiplex/tests/main.nf.test.snap       | 181 ++++++++++++++++++
 6 files changed, 529 insertions(+)
 create mode 100644 modules/nf-core/mgikit/demultiplex/environment.yml
 create mode 100644 modules/nf-core/mgikit/demultiplex/main.nf
 create mode 100644 modules/nf-core/mgikit/demultiplex/meta.yml
 create mode 100644 modules/nf-core/mgikit/demultiplex/tests/main.nf.test
 create mode 100644 modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap

diff --git a/modules.json b/modules.json
index c73432ce..8fef246e 100644
--- a/modules.json
+++ b/modules.json
@@ -60,6 +60,11 @@
                         "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"]
                     },
+                    "mgikit/demultiplex": {
+                        "branch": "master",
+                        "git_sha": "0bf42a3bdf105ddc58f6cc5523c86b4617c4ed04",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
diff --git a/modules/nf-core/mgikit/demultiplex/environment.yml b/modules/nf-core/mgikit/demultiplex/environment.yml
new file mode 100644
index 00000000..9ae21494
--- /dev/null
+++ b/modules/nf-core/mgikit/demultiplex/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::mgikit=0.1.6
diff --git a/modules/nf-core/mgikit/demultiplex/main.nf b/modules/nf-core/mgikit/demultiplex/main.nf
new file mode 100644
index 00000000..df64578d
--- /dev/null
+++ b/modules/nf-core/mgikit/demultiplex/main.nf
@@ -0,0 +1,81 @@
+process MGIKIT_DEMULTIPLEX {
+    tag {"$run_id"}
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mgikit:0.1.6--h4c94732_0' :
+        'biocontainers/mgikit:0.1.6--h4c94732_0' }"
+
+    input:
+    tuple val(meta), path(samplesheet), path(run_dir)
+
+    output:
+    tuple val(meta), path("${prefix}/*.fastq.gz")                                                    , emit: fastq
+    tuple val(meta), path("${prefix}_undetermined/*.fastq.gz")                                       , optional:true, emit: undetermined
+    tuple val(meta), path("${prefix}_ambiguous/*.fastq.gz")                                          , optional:true, emit: ambiguous
+    tuple val(meta), path("${prefix}/*mgikit.undetermined_barcode*")                                 , emit: undetermined_reports, optional:true
+    tuple val(meta), path("${prefix}/*mgikit.ambiguous_barcode*")                                    , emit: ambiguous_reports, optional:true
+    tuple val(meta), path("${prefix}/*mgikit.general")                                               , emit: general_info_reports
+    tuple val(meta), path("${prefix}/*mgikit.info")                                                  , emit: index_reports
+    tuple val(meta), path("${prefix}/*mgikit.sample_stats")                                          , emit: sample_stat_reports
+    tuple val(meta), path("${prefix}/*mgikit.{info,general,ambiguous_barcode,undetermined_barcode}") , emit: qc_reports
+    path("versions.yml")                                                                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    run_id = meta.lane ? "${meta.id}-${meta.lane}" : "${meta.id}"
+    prefix = task.ext.prefix ?: "out-${run_id}"
+
+    """
+    mgikit demultiplex \\
+        -i "${run_dir}" \\
+        -s "${samplesheet}" \\
+        -o "${prefix}" \\
+        ${args}
+
+    if find ${prefix} -name 'Undetermined*.fastq.gz' -print -quit | grep -q .; then
+        mkdir -p "${prefix}_undetermined"
+        mv ${prefix}/Undetermined*.fastq.gz ${prefix}_undetermined/
+    fi
+
+    if find ${prefix} -name 'Ambiguous*.fastq.gz' -print -quit | grep -q .; then
+        mkdir -p "${prefix}_ambiguous"
+        mv ${prefix}/Ambiguous*.fastq.gz ${prefix}_ambiguous/
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        mgikit: \$(mgikit --version 2>&1 | grep 'MGIKIT - MGI data demultipexing kit' | sed -e 's/MGIKIT - MGI data demultipexing kit. //g')
+    END_VERSIONS
+    """
+
+    stub:
+    run_id = meta.lane ? "${meta.id}-${meta.lane}" : "${meta.id}"
+    prefix = task.ext.prefix ?: "out-${run_id}"
+    """
+    mkdir "${prefix}"
+    mkdir -p "${prefix}_undetermined"
+
+    touch "${prefix}/FC1.L01.mgikit.general"
+    touch "${prefix}/FC1.L01.mgikit.info"
+    touch "${prefix}/FC1.L01.mgikit.undetermined_barcode"
+    touch "${prefix}/FC1.L01.mgikit.sample_stats"
+
+    echo "@R001:0001:FC1:1:60:1:3 1:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-001_S1_L01_R1_001.fastq.gz"
+    echo "@R001:0001:FC1:1:60:1:3 2:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-001_S1_L01_R2_001.fastq.gz"
+    echo "@R001:0001:FC1:1:60:1:3 1:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-002_S2_L01_R1_001.fastq.gz"
+    echo "@R001:0001:FC1:1:60:1:3 2:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}/23-002_S2_L01_R2_001.fastq.gz"
+
+    echo "@R001:0001:FC1:1:60:1:3 1:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}_undetermined/Undetermined_L01_R1_001.fastq.gz"
+    echo "@R001:0001:FC1:1:60:1:3 2:N:0:GACGAATG\\nNNNNNNNN\\n+\\nDDDDDDDD" | gzip > "${prefix}_undetermined/Undetermined_L01_R2_001.fastq.gz"
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        mgikit: \$(mgikit --version 2>&1 | grep 'MGIKIT - MGI data demultipexing kit' | sed -e 's/MGIKIT - MGI data demultipexing kit. //g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/mgikit/demultiplex/meta.yml b/modules/nf-core/mgikit/demultiplex/meta.yml
new file mode 100644
index 00000000..e4282538
--- /dev/null
+++ b/modules/nf-core/mgikit/demultiplex/meta.yml
@@ -0,0 +1,152 @@
+name: "mgikit_demultiplex"
+description: Demultiplex MGI fastq files
+keywords:
+  - demultiplex
+  - mgi
+  - fastq
+tools:
+  - "mgikit demultiplex":
+      description: "Demultiplex MGI fastq files"
+      homepage: "https://sagc-bioinformatics.github.io/mgikit/"
+      documentation: "https://sagc-bioinformatics.github.io/mgikit/"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - samplesheet:
+        type: file
+        description: "Input samplesheet"
+        pattern: "*.{csv}"
+    - run_dir:
+        type: file
+        description: |
+          Input run directory containing BioInfo.csv and fastq data.
+          fastq files should be in MGI format and can be either single or paired end.
+output:
+  - fastq:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*.fastq.gz"
+      - ${prefix}/*.fastq.gz:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*.fastq.gz"
+  - undetermined:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "Undetermined*.fastq.gz"
+      - ${prefix}_undetermined/*.fastq.gz:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "Undetermined*.fastq.gz"
+  - ambiguous:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "Ambiguous*.fastq.gz"
+      - ${prefix}_ambiguous/*.fastq.gz:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "Ambiguous*.fastq.gz"
+  - undetermined_reports:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.undetermined_barcode*"
+      - ${prefix}/*mgikit.undetermined_barcode*:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.undetermined_barcode*"
+  - ambiguous_reports:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.ambiguous_barcode*"
+      - ${prefix}/*mgikit.ambiguous_barcode*:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.ambiguous_barcode*"
+  - general_info_reports:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.general"
+      - ${prefix}/*mgikit.general:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.general"
+  - index_reports:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.info"
+      - ${prefix}/*mgikit.info:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.info"
+  - sample_stat_reports:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.sample_stats"
+      - ${prefix}/*mgikit.sample_stats:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "*mgikit.sample_stats"
+  - qc_reports:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "mgikit.{info,general,ambiguous_barcode,undetermined_barcode}"
+      - ${prefix}/*mgikit.{info,general,ambiguous_barcode,undetermined_barcode}:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+          pattern: "mgikit.{info,general,ambiguous_barcode,undetermined_barcode}"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@ziadbkh"
diff --git a/modules/nf-core/mgikit/demultiplex/tests/main.nf.test b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test
new file mode 100644
index 00000000..1228a30c
--- /dev/null
+++ b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test
@@ -0,0 +1,105 @@
+nextflow_process {
+
+    name "Test Process MGIKIT_DEMULTIPLEX"
+    script "modules/nf-core/mgikit/demultiplex/main.nf"
+    process "MGIKIT_DEMULTIPLEX"
+    tag "mgikit"
+    tag "mgikit/demultiplex"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "unzip"
+
+    setup {
+        run("UNZIP") {
+            script "modules/nf-core/unzip/main.nf"
+            process {
+                """
+                input[0] = [
+                    [ id: 'fc01', lane:1 ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/mgi/fc01.zip', checkIfExists: true)
+                ]
+                """
+            }
+        }
+    }
+
+    test("run mgikit demultiplex without errors") {
+
+        when {
+            process {
+                """
+                input[0] = UNZIP.out.unzipped_archive.map{[
+                    [ id: 'test', lane:1 ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/mgi/fc01_sample_sheet.csv', checkIfExists: true),
+                    it[1]
+                ]}
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            with(process.out) {
+                assert fastq.size() == 1
+                assert fastq[0][1].any { file(it).name.endsWith(".fastq.gz") }
+                
+                assert undetermined.size() == 1
+                assert undetermined[0][1].any { file(it).name.startsWith("Undetermined") && file(it).name.endsWith(".fastq.gz") }
+                
+                assert ambiguous.size() == 0
+                
+                assert undetermined_reports.size() == 1
+                assert undetermined_reports[0][1].any { file(it).name.contains("mgikit.undetermined_barcode") }
+                
+                assert ambiguous_reports.size() == 0
+                
+                assert general_info_reports.size() == 1
+                assert file(general_info_reports[0][1]).name.contains("mgikit.general")
+                
+                assert index_reports.size() == 1
+                assert file(index_reports[0][1]).name.contains("mgikit.info")
+                
+                assert sample_stat_reports.size() == 1
+                assert file(sample_stat_reports[0][1]).name.contains("mgikit.sample_stats")
+                
+                assert qc_reports.size() == 1
+                assert qc_reports[0][1].size() == 3
+                
+                assert file(versions[0]).text.contains("mgikit")
+            }
+        }
+
+    }
+
+    test("run stub mode of mgikit demultiplex without errors") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = UNZIP.out.unzipped_archive.map{[
+                    [ id: 'test', lane:1 ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/mgi/fc01_sample_sheet.csv', checkIfExists: true),
+                    it[1]
+                ]}
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            with(process.out) {
+                assert fastq.size() == 1
+                assert fastq[0][1].size() == 4
+                assert undetermined.size() == 1
+                assert undetermined[0][1].size() == 2
+                assert general_info_reports.size() == 1
+                assert index_reports.size() == 1
+                assert sample_stat_reports.size() == 1
+                assert file(versions[0]).text.contains("mgikit")
+            }
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap
new file mode 100644
index 00000000..a300c21c
--- /dev/null
+++ b/modules/nf-core/mgikit/demultiplex/tests/main.nf.test.snap
@@ -0,0 +1,181 @@
+{
+    "Should work with stub run": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        [
+                            "23-001_S1_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339",
+                            "23-001_S1_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28",
+                            "23-002_S2_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339",
+                            "23-002_S2_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28"
+                        ]
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        [
+                            "Undetermined_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339",
+                            "Undetermined_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28"
+                        ]
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "4": [
+                    
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "6": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "7": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.sample_stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "8": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        [
+                            "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "9": [
+                    "versions.yml:md5,085c8140a4b50230ec3a0974e358562d"
+                ],
+                "ambiguous": [
+                    
+                ],
+                "ambiguous_reports": [
+                    
+                ],
+                "fastq": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        [
+                            "23-001_S1_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339",
+                            "23-001_S1_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28",
+                            "23-002_S2_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339",
+                            "23-002_S2_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28"
+                        ]
+                    ]
+                ],
+                "general_info_reports": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index_reports": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "qc_reports": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        [
+                            "FC1.L01.mgikit.general:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "FC1.L01.mgikit.info:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "sample_stat_reports": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.sample_stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "undetermined": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        [
+                            "Undetermined_L01_R1_001.fastq.gz:md5,c8c3d4e857944bce7f5bbebf2a8dd339",
+                            "Undetermined_L01_R2_001.fastq.gz:md5,a2ed84ae93b89206153041e8d9f4aa28"
+                        ]
+                    ]
+                ],
+                "undetermined_reports": [
+                    [
+                        {
+                            "id": "test",
+                            "lane": "1"
+                        },
+                        "FC1.L01.mgikit.undetermined_barcode:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,085c8140a4b50230ec3a0974e358562d"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "23.04.1"
+        },
+        "timestamp": "2024-11-21T21:23:03.175331921"
+    }
+}
\ No newline at end of file

From 1486d1166edcde3fe68c0eaf1cc624a0150dbfb8 Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Fri, 6 Dec 2024 20:28:23 +1100
Subject: [PATCH 03/10] adding workflows

---
 subworkflows/local/mgikit_demultiplex/main.nf | 98 +++++++++++++++++++
 workflows/demultiplex.nf                      | 10 ++
 2 files changed, 108 insertions(+)
 create mode 100644 subworkflows/local/mgikit_demultiplex/main.nf

diff --git a/subworkflows/local/mgikit_demultiplex/main.nf b/subworkflows/local/mgikit_demultiplex/main.nf
new file mode 100644
index 00000000..1a702f9a
--- /dev/null
+++ b/subworkflows/local/mgikit_demultiplex/main.nf
@@ -0,0 +1,98 @@
+#!/usr/bin/env nextflow
+
+//
+// Demultiplex Element Biosciences bases data using bases2fastq
+//
+
+include { MGIKIT_DEMULTIPLEX as DEMULTIPLEX } from "../../../modules/nf-core/mgikit/demultiplex/main"
+
+workflow MGIKIT_DEMULTIPLEX {
+    take:
+        ch_flowcell     // [[id:"", lane:""],samplesheet.csv, path/to/bases/files]
+
+    main:
+        DEMULTIPLEX( ch_flowcell )
+
+        // Generate meta for each fastq
+        ch_fastq_with_meta = generate_fastq_meta(DEMULTIPLEX.out.fastq)
+
+    emit:
+        fastq                   = ch_fastq_with_meta
+        unassigned              = DEMULTIPLEX.out.undetermined
+        ambiguous               = DEMULTIPLEX.out.ambiguous
+        qc_reports              = DEMULTIPLEX.out.qc_reports;
+        versions                = DEMULTIPLEX.out.versions
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    FUNCTIONS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+// Add meta values to fastq channel
+def generate_fastq_meta(ch_reads) {
+    // Create a tuple with the meta.id and the fastq
+    ch_reads.transpose().map{
+        fc_meta, fastq ->
+        def meta = [
+            "id": fastq.getSimpleName().toString().replaceAll(/_S\d+_L0\d+_R\d+.*$/, ""),
+            "samplename": fastq.getSimpleName().toString() - ~/_S\d+_L0\d+_R\d+.*$/,
+            "readgroup": [:],
+            "fcid": fc_meta.id,
+            "lane": fc_meta.lane
+        ]
+        meta.readgroup = readgroup_from_fastq(fastq)
+        meta.readgroup.SM = meta.samplename
+
+        return [ meta , fastq ]
+    }
+    // Group by meta.id for PE samples
+    .groupTuple(by: [0])
+    // Add meta.single_end
+    .map {
+        meta, fastq ->
+        if (fastq.size() == 1){
+            meta.single_end = true
+        } else {
+            meta.single_end = false
+        }
+        return [ meta, fastq.flatten() ]
+    }
+}
+
+// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040
+def readgroup_from_fastq(path) {
+    // expected format:
+    // xx:yy:FLOWCELLID:LANE:... (seven fields)
+
+    def line
+
+    path.withInputStream {
+        InputStream gzipStream = new java.util.zip.GZIPInputStream(it)
+        Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
+        BufferedReader buffered = new BufferedReader(decoder)
+        line = buffered.readLine()
+    }
+    assert line.startsWith('@')
+    line = line.substring(1)
+    def fields = line.split(':')
+    //println(line);
+    //println(fields);
+    def rg = [:]
+
+    // https://www.elementbiosciences.com/resources/user-guides/workflow/bases2fastq
+    // "@<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos>:UMI <read>:N:0:<index sequence>"
+    sequencer_serial = fields[0]
+    run_nubmer       = fields[1]
+    fcid             = fields[2]
+    lane             = fields[3]
+    index            = fields[-1] =~ /[GATC+-]/ ? fields[-1] : ""
+
+    rg.ID = [fcid,lane].join(".")
+    rg.PU = [fcid, lane, index].findAll().join(".")
+    // TODO: @edmundmiller verify if this is correct
+    rg.PL = "ELEMENT"
+
+    return rg
+}
diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf
index 9597883e..0bf5387c 100644
--- a/workflows/demultiplex.nf
+++ b/workflows/demultiplex.nf
@@ -14,6 +14,7 @@ include { BASES_DEMULTIPLEX                                             } from '
 include { FQTK_DEMULTIPLEX                                              } from '../subworkflows/local/fqtk_demultiplex/main'
 include { MKFASTQ_DEMULTIPLEX                                           } from '../subworkflows/local/mkfastq_demultiplex/main'
 include { SINGULAR_DEMULTIPLEX                                          } from '../subworkflows/local/singular_demultiplex/main'
+include { MGIKIT_DEMULTIPLEX                                            } from '../subworkflows/local/mgikit_demultiplex/main'
 include { RUNDIR_CHECKQC                                                } from '../subworkflows/local/rundir_checkqc/main'
 include { FASTQ_TO_SAMPLESHEET as FASTQ_TO_SAMPLESHEET_RNASEQ           } from '../modules/local/fastq_to_samplesheet/main'
 include { FASTQ_TO_SAMPLESHEET as FASTQ_TO_SAMPLESHEET_ATACSEQ          } from '../modules/local/fastq_to_samplesheet/main'
@@ -142,6 +143,7 @@ workflow DEMULTIPLEX {
     // Re-join the metadata and the untarred run directory with the samplesheet
 
     if (demultiplexer in ['bclconvert', 'bcl2fastq']) ch_flowcells_tar_merged = ch_flowcells_tar.samplesheets.join(ch_flowcells_tar.run_dirs, failOnMismatch:true, failOnDuplicate:true)
+    else if (demultiplexer == 'mgikit'){ ch_flowcells_tar_merged = Channel.empty() }
     else {
         ch_flowcells_tar_merged = ch_flowcells_tar.samplesheets.join( UNTAR_FLOWCELL ( ch_flowcells_tar.run_dirs ).untar, failOnMismatch:true, failOnDuplicate:true )
         ch_versions = ch_versions.mix(UNTAR_FLOWCELL.out.versions)
@@ -217,6 +219,14 @@ workflow DEMULTIPLEX {
             ch_raw_fastq = ch_raw_fastq.mix(MKFASTQ_DEMULTIPLEX.out.fastq)
             ch_versions = ch_versions.mix(MKFASTQ_DEMULTIPLEX.out.versions)
             break
+        case 'mgikit':
+            // MODULE: mgikit
+            // Runs when "demultiplexer" is set to "mgikit"
+            MGIKIT_DEMULTIPLEX ( ch_flowcells )
+            ch_raw_fastq = ch_raw_fastq.mix(MGIKIT_DEMULTIPLEX.out.fastq)
+            ch_multiqc_files = ch_multiqc_files.mix(MGIKIT_DEMULTIPLEX.out.qc_reports.map { meta, metrics -> return metrics} )
+            ch_versions = ch_versions.mix(MGIKIT_DEMULTIPLEX.out.versions)
+            break
         default:
             error "Unknown demultiplexer: ${demultiplexer}"
     }

From 180f7cad12279dd50265ab30b4ee4138cf603563 Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Fri, 6 Dec 2024 20:31:52 +1100
Subject: [PATCH 04/10] make lint happy

---
 subworkflows/local/mgikit_demultiplex/main.nf | 196 +++++++++---------
 1 file changed, 98 insertions(+), 98 deletions(-)

diff --git a/subworkflows/local/mgikit_demultiplex/main.nf b/subworkflows/local/mgikit_demultiplex/main.nf
index 1a702f9a..89fc6fa6 100644
--- a/subworkflows/local/mgikit_demultiplex/main.nf
+++ b/subworkflows/local/mgikit_demultiplex/main.nf
@@ -1,98 +1,98 @@
-#!/usr/bin/env nextflow
-
-//
-// Demultiplex Element Biosciences bases data using bases2fastq
-//
-
-include { MGIKIT_DEMULTIPLEX as DEMULTIPLEX } from "../../../modules/nf-core/mgikit/demultiplex/main"
-
-workflow MGIKIT_DEMULTIPLEX {
-    take:
-        ch_flowcell     // [[id:"", lane:""],samplesheet.csv, path/to/bases/files]
-
-    main:
-        DEMULTIPLEX( ch_flowcell )
-
-        // Generate meta for each fastq
-        ch_fastq_with_meta = generate_fastq_meta(DEMULTIPLEX.out.fastq)
-
-    emit:
-        fastq                   = ch_fastq_with_meta
-        unassigned              = DEMULTIPLEX.out.undetermined
-        ambiguous               = DEMULTIPLEX.out.ambiguous
-        qc_reports              = DEMULTIPLEX.out.qc_reports;
-        versions                = DEMULTIPLEX.out.versions
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    FUNCTIONS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-// Add meta values to fastq channel
-def generate_fastq_meta(ch_reads) {
-    // Create a tuple with the meta.id and the fastq
-    ch_reads.transpose().map{
-        fc_meta, fastq ->
-        def meta = [
-            "id": fastq.getSimpleName().toString().replaceAll(/_S\d+_L0\d+_R\d+.*$/, ""),
-            "samplename": fastq.getSimpleName().toString() - ~/_S\d+_L0\d+_R\d+.*$/,
-            "readgroup": [:],
-            "fcid": fc_meta.id,
-            "lane": fc_meta.lane
-        ]
-        meta.readgroup = readgroup_from_fastq(fastq)
-        meta.readgroup.SM = meta.samplename
-
-        return [ meta , fastq ]
-    }
-    // Group by meta.id for PE samples
-    .groupTuple(by: [0])
-    // Add meta.single_end
-    .map {
-        meta, fastq ->
-        if (fastq.size() == 1){
-            meta.single_end = true
-        } else {
-            meta.single_end = false
-        }
-        return [ meta, fastq.flatten() ]
-    }
-}
-
-// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040
-def readgroup_from_fastq(path) {
-    // expected format:
-    // xx:yy:FLOWCELLID:LANE:... (seven fields)
-
-    def line
-
-    path.withInputStream {
-        InputStream gzipStream = new java.util.zip.GZIPInputStream(it)
-        Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
-        BufferedReader buffered = new BufferedReader(decoder)
-        line = buffered.readLine()
-    }
-    assert line.startsWith('@')
-    line = line.substring(1)
-    def fields = line.split(':')
-    //println(line);
-    //println(fields);
-    def rg = [:]
-
-    // https://www.elementbiosciences.com/resources/user-guides/workflow/bases2fastq
-    // "@<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos>:UMI <read>:N:0:<index sequence>"
-    sequencer_serial = fields[0]
-    run_nubmer       = fields[1]
-    fcid             = fields[2]
-    lane             = fields[3]
-    index            = fields[-1] =~ /[GATC+-]/ ? fields[-1] : ""
-
-    rg.ID = [fcid,lane].join(".")
-    rg.PU = [fcid, lane, index].findAll().join(".")
-    // TODO: @edmundmiller verify if this is correct
-    rg.PL = "ELEMENT"
-
-    return rg
-}
+#!/usr/bin/env nextflow
+
+//
+// Demultiplex Element Biosciences bases data using bases2fastq
+//
+
+include { MGIKIT_DEMULTIPLEX as DEMULTIPLEX } from "../../../modules/nf-core/mgikit/demultiplex/main"
+
+workflow MGIKIT_DEMULTIPLEX {
+    take:
+        ch_flowcell     // [[id:"", lane:""],samplesheet.csv, path/to/bases/files]
+
+    main:
+        DEMULTIPLEX( ch_flowcell )
+
+        // Generate meta for each fastq
+        ch_fastq_with_meta = generate_fastq_meta(DEMULTIPLEX.out.fastq)
+
+    emit:
+        fastq                   = ch_fastq_with_meta
+        unassigned              = DEMULTIPLEX.out.undetermined
+        ambiguous               = DEMULTIPLEX.out.ambiguous
+        qc_reports              = DEMULTIPLEX.out.qc_reports;
+        versions                = DEMULTIPLEX.out.versions
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    FUNCTIONS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+// Add meta values to fastq channel
+def generate_fastq_meta(ch_reads) {
+    // Create a tuple with the meta.id and the fastq
+    ch_reads.transpose().map{
+        fc_meta, fastq ->
+        def meta = [
+            "id": fastq.getSimpleName().toString().replaceAll(/_S\d+_L0\d+_R\d+.*$/, ""),
+            "samplename": fastq.getSimpleName().toString() - ~/_S\d+_L0\d+_R\d+.*$/,
+            "readgroup": [:],
+            "fcid": fc_meta.id,
+            "lane": fc_meta.lane
+        ]
+        meta.readgroup = readgroup_from_fastq(fastq)
+        meta.readgroup.SM = meta.samplename
+
+        return [ meta , fastq ]
+    }
+    // Group by meta.id for PE samples
+    .groupTuple(by: [0])
+    // Add meta.single_end
+    .map {
+        meta, fastq ->
+        if (fastq.size() == 1){
+            meta.single_end = true
+        } else {
+            meta.single_end = false
+        }
+        return [ meta, fastq.flatten() ]
+    }
+}
+
+// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040
+def readgroup_from_fastq(path) {
+    // expected format:
+    // xx:yy:FLOWCELLID:LANE:... (seven fields)
+
+    def line
+
+    path.withInputStream {
+        InputStream gzipStream = new java.util.zip.GZIPInputStream(it)
+        Reader decoder = new InputStreamReader(gzipStream, 'ASCII')
+        BufferedReader buffered = new BufferedReader(decoder)
+        line = buffered.readLine()
+    }
+    assert line.startsWith('@')
+    line = line.substring(1)
+    def fields = line.split(':')
+    //println(line);
+    //println(fields);
+    def rg = [:]
+
+    // https://www.elementbiosciences.com/resources/user-guides/workflow/bases2fastq
+    // "@<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos>:UMI <read>:N:0:<index sequence>"
+    sequencer_serial = fields[0]
+    run_nubmer       = fields[1]
+    fcid             = fields[2]
+    lane             = fields[3]
+    index            = fields[-1] =~ /[GATC+-]/ ? fields[-1] : ""
+
+    rg.ID = [fcid,lane].join(".")
+    rg.PU = [fcid, lane, index].findAll().join(".")
+    // TODO: @edmundmiller verify if this is correct
+    rg.PL = "ELEMENT"
+
+    return rg
+}

From 6329da061632262ec7f7cca00787f39393e6fed7 Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Mon, 16 Dec 2024 22:49:40 +1100
Subject: [PATCH 05/10] adding mgikit tests

---
 conf/test_mgikit.config   |  32 ++++
 nextflow.config           |   1 +
 nf-test.config            |   1 -
 tests/mgikit.nf.test      |  36 +++++
 tests/mgikit.nf.test.snap | 305 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 374 insertions(+), 1 deletion(-)
 create mode 100644 conf/test_mgikit.config
 create mode 100644 tests/mgikit.nf.test
 create mode 100644 tests/mgikit.nf.test.snap

diff --git a/conf/test_mgikit.config b/conf/test_mgikit.config
new file mode 100644
index 00000000..07968209
--- /dev/null
+++ b/conf/test_mgikit.config
@@ -0,0 +1,32 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/demultiplex -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+// Limit resources so that this can run on GitHub Actions
+process {
+    resourceLimits = [
+        cpus: 2,
+        memory: '12.GB',
+        time: '4.h'
+    ]
+}
+
+params {
+    config_profile_name        = 'Test mgikit profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function with mgikit'
+
+    // Input data
+    input         = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/demultiplex/testdata/mgi/mgikit_input.csv'
+    demultiplexer = 'mgikit'
+    skip_tools    = "checkqc,samshee"
+}
+
+
diff --git a/nextflow.config b/nextflow.config
index 0d82db47..f825159f 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -190,6 +190,7 @@ profiles {
     test_checkqc            { includeConfig 'conf/test_checkqc.config'     }
     test_kraken             { includeConfig 'conf/test_kraken.config'      }
     test_two_lanes          { includeConfig 'conf/test_two_lanes.config'   }
+    test_mgikit             { includeConfig 'conf/test_mgikit.config'   }
 
 }
 
diff --git a/nf-test.config b/nf-test.config
index d441a08a..bed04032 100644
--- a/nf-test.config
+++ b/nf-test.config
@@ -18,4 +18,3 @@ config {
         load "nft-utils@0.0.3"
     }
 }
-
diff --git a/tests/mgikit.nf.test b/tests/mgikit.nf.test
new file mode 100644
index 00000000..4158e1dd
--- /dev/null
+++ b/tests/mgikit.nf.test
@@ -0,0 +1,36 @@
+nextflow_pipeline {
+    name "Test Workflow main.nf - MGIKIT"
+    script "../main.nf"
+    profile "test_mgikit,docker"
+    tag "mgikit"
+    tag "pipeline"
+
+    test("mgikit") {
+
+        when {
+            params {
+                outdir        = "$outputDir"
+            }
+        }
+
+        then {
+            // stable_name: All files + folders in ${params.outdir}/ with a stable name
+            def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}'])
+            // stable_path: All files in ${params.outdir}/ with stable content
+            def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore')
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(
+                    // Number of successful tasks
+                    workflow.trace.succeeded().size(),
+                    // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_pipeline_software_mqc_versions.yml"),
+                    // All stable path name, with a relative path
+                    stable_name,
+                    // All files with stable contents
+                    stable_path
+                ).match() }
+            )
+        }
+    }
+}
diff --git a/tests/mgikit.nf.test.snap b/tests/mgikit.nf.test.snap
new file mode 100644
index 00000000..245f4798
--- /dev/null
+++ b/tests/mgikit.nf.test.snap
@@ -0,0 +1,305 @@
+{
+    "mgikit": {
+        "content": [
+            21,
+            {
+                "DEMULTIPLEX": {
+                    "mgikit": "0.1.6"
+                },
+                "FALCO": {
+                    "falco": "1.2.1"
+                },
+                "FASTP": {
+                    "fastp": "0.23.4"
+                },
+                "MD5SUM": {
+                    "md5sum": 8.3
+                },
+                "Workflow": {
+                    "nf-core/demultiplex": "v1.5.4"
+                }
+            },
+            [
+                "demultiplex",
+                "demultiplex/out-test-fc01-1",
+                "demultiplex/out-test-fc01-1/FC01.L01.mgikit.general",
+                "demultiplex/out-test-fc01-1/FC01.L01.mgikit.info",
+                "demultiplex/out-test-fc01-1/FC01.L01.mgikit.sample_stats",
+                "demultiplex/out-test-fc01-1/FC01.L01.mgikit.undetermined_barcode",
+                "demultiplex/out-test-fc01-1/FC01.L01.mgikit.undetermined_barcode.complete",
+                "demultiplex/out-test-fc01-1/Sample01_S1_L01_R1_001.fastq.gz",
+                "demultiplex/out-test-fc01-1/Sample01_S1_L01_R2_001.fastq.gz",
+                "demultiplex/out-test-fc01-1/Sample02_S2_L01_R1_001.fastq.gz",
+                "demultiplex/out-test-fc01-1/Sample02_S2_L01_R2_001.fastq.gz",
+                "demultiplex/out-test-fc01-1/Sample03_S3_L01_R1_001.fastq.gz",
+                "demultiplex/out-test-fc01-1/Sample03_S3_L01_R2_001.fastq.gz",
+                "demultiplex/out-test-fc01-1/Sample04_S4_L01_R1_001.fastq.gz",
+                "demultiplex/out-test-fc01-1/Sample04_S4_L01_R2_001.fastq.gz",
+                "demultiplex/out-test-fc01-1_undetermined",
+                "demultiplex/out-test-fc01-1_undetermined/Undetermined_L01_R1_001.fastq.gz",
+                "demultiplex/out-test-fc01-1_undetermined/Undetermined_L01_R2_001.fastq.gz",
+                "multiqc",
+                "multiqc/multiqc_data",
+                "multiqc/multiqc_data/fastp-insert-size-plot.txt",
+                "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_After_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_Before_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_After_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_Before_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_After_filtering.txt",
+                "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_Before_filtering.txt",
+                "multiqc/multiqc_data/fastp_filtered_reads_plot.txt",
+                "multiqc/multiqc_data/fastqc-status-check-heatmap.txt",
+                "multiqc/multiqc_data/fastqc_overrepresented_sequences_plot.txt",
+                "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt",
+                "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt",
+                "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt",
+                "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt",
+                "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt",
+                "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt",
+                "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt",
+                "multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt",
+                "multiqc/multiqc_data/multiqc.log",
+                "multiqc/multiqc_data/multiqc_citations.txt",
+                "multiqc/multiqc_data/multiqc_data.json",
+                "multiqc/multiqc_data/multiqc_fastp.txt",
+                "multiqc/multiqc_data/multiqc_fastqc.txt",
+                "multiqc/multiqc_data/multiqc_general_stats.txt",
+                "multiqc/multiqc_data/multiqc_software_versions.txt",
+                "multiqc/multiqc_data/multiqc_sources.txt",
+                "multiqc/multiqc_plots",
+                "multiqc/multiqc_plots/pdf",
+                "multiqc/multiqc_plots/pdf/fastp-insert-size-plot.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_After_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_After_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_After_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_Before_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_After_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_Before_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_After_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_Before_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_After_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_Before_filtering.pdf",
+                "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-cnt.pdf",
+                "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-pct.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_overrepresented_sequences_plot.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf",
+                "multiqc/multiqc_plots/pdf/fastqc_top_overrepresented_sequences_table.pdf",
+                "multiqc/multiqc_plots/pdf/general_stats_table.pdf",
+                "multiqc/multiqc_plots/png",
+                "multiqc/multiqc_plots/png/fastp-insert-size-plot.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_After_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_Before_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_After_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_Before_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_After_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_Before_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_After_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_Before_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_After_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_Before_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_After_filtering.png",
+                "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_Before_filtering.png",
+                "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-cnt.png",
+                "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-pct.png",
+                "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png",
+                "multiqc/multiqc_plots/png/fastqc_overrepresented_sequences_plot.png",
+                "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png",
+                "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png",
+                "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png",
+                "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png",
+                "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png",
+                "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png",
+                "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png",
+                "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png",
+                "multiqc/multiqc_plots/png/fastqc_top_overrepresented_sequences_table.png",
+                "multiqc/multiqc_plots/png/general_stats_table.png",
+                "multiqc/multiqc_plots/svg",
+                "multiqc/multiqc_plots/svg/fastp-insert-size-plot.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_After_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_Before_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_After_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_Before_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_After_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_Before_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_After_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_Before_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_After_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_Before_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_After_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_Before_filtering.svg",
+                "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-cnt.svg",
+                "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-pct.svg",
+                "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg",
+                "multiqc/multiqc_plots/svg/fastqc_overrepresented_sequences_plot.svg",
+                "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg",
+                "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg",
+                "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg",
+                "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg",
+                "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg",
+                "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg",
+                "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg",
+                "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg",
+                "multiqc/multiqc_plots/svg/fastqc_top_overrepresented_sequences_table.svg",
+                "multiqc/multiqc_plots/svg/general_stats_table.svg",
+                "multiqc/multiqc_report.html",
+                "pipeline_info",
+                "pipeline_info/nf_core_pipeline_software_mqc_versions.yml",
+                "samplesheet",
+                "samplesheet/atacseq_samplesheet.csv",
+                "samplesheet/rnaseq_samplesheet.csv",
+                "samplesheet/taxprofiler_samplesheet.csv",
+                "test-fc01",
+                "test-fc01.csv",
+                "test-fc01/L001",
+                "test-fc01/L001/Sample01.fastp.html",
+                "test-fc01/L001/Sample01.fastp.json",
+                "test-fc01/L001/Sample01_1.fastp.fastq.gz",
+                "test-fc01/L001/Sample01_1.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample01_1.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample01_1.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample01_1.fastp.fastq.gz_summary.txt",
+                "test-fc01/L001/Sample01_2.fastp.fastq.gz",
+                "test-fc01/L001/Sample01_2.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample01_2.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample01_2.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample01_2.fastp.fastq.gz_summary.txt",
+                "test-fc01/L001/Sample02.fastp.html",
+                "test-fc01/L001/Sample02.fastp.json",
+                "test-fc01/L001/Sample02_1.fastp.fastq.gz",
+                "test-fc01/L001/Sample02_1.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample02_1.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample02_1.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample02_1.fastp.fastq.gz_summary.txt",
+                "test-fc01/L001/Sample02_2.fastp.fastq.gz",
+                "test-fc01/L001/Sample02_2.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample02_2.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample02_2.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample02_2.fastp.fastq.gz_summary.txt",
+                "test-fc01/L001/Sample03.fastp.html",
+                "test-fc01/L001/Sample03.fastp.json",
+                "test-fc01/L001/Sample03_1.fastp.fastq.gz",
+                "test-fc01/L001/Sample03_1.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample03_1.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample03_1.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample03_1.fastp.fastq.gz_summary.txt",
+                "test-fc01/L001/Sample03_2.fastp.fastq.gz",
+                "test-fc01/L001/Sample03_2.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample03_2.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample03_2.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample03_2.fastp.fastq.gz_summary.txt",
+                "test-fc01/L001/Sample04.fastp.html",
+                "test-fc01/L001/Sample04.fastp.json",
+                "test-fc01/L001/Sample04_1.fastp.fastq.gz",
+                "test-fc01/L001/Sample04_1.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample04_1.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample04_1.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample04_1.fastp.fastq.gz_summary.txt",
+                "test-fc01/L001/Sample04_2.fastp.fastq.gz",
+                "test-fc01/L001/Sample04_2.fastp.fastq.gz.md5",
+                "test-fc01/L001/Sample04_2.fastp.fastq.gz_fastqc_data.txt",
+                "test-fc01/L001/Sample04_2.fastp.fastq.gz_fastqc_report.html",
+                "test-fc01/L001/Sample04_2.fastp.fastq.gz_summary.txt"
+            ],
+            [
+                "FC01.L01.mgikit.general:md5,624b0d43c3995fde2c122c447e780191",
+                "FC01.L01.mgikit.info:md5,69a72a1c43d47032828d8b97c6cf8807",
+                "FC01.L01.mgikit.sample_stats:md5,20d5b9f4a430fd34579e0f70b42f73f0",
+                "FC01.L01.mgikit.undetermined_barcode:md5,c1960b64d4cc5c141d742aa1b6f57a31",
+                "FC01.L01.mgikit.undetermined_barcode.complete:md5,c1960b64d4cc5c141d742aa1b6f57a31",
+                "Sample01_S1_L01_R1_001.fastq.gz:md5,19752026de713f514637fab29e4ffac4",
+                "Sample01_S1_L01_R2_001.fastq.gz:md5,f9015fa4a73977dd093b8b2e4a67a316",
+                "Sample02_S2_L01_R1_001.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144",
+                "Sample02_S2_L01_R2_001.fastq.gz:md5,7348f1bf066a69ee48ed3ee6afa2f91b",
+                "Sample03_S3_L01_R1_001.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f",
+                "Sample03_S3_L01_R2_001.fastq.gz:md5,38ba38d0d71343e6125fc15b63483ad4",
+                "Sample04_S4_L01_R1_001.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4",
+                "Sample04_S4_L01_R2_001.fastq.gz:md5,cd88081007614f5bed7975b04e25b7e3",
+                "Undetermined_L01_R1_001.fastq.gz:md5,63ce3d442f4f8c42e0d9addea9273315",
+                "Undetermined_L01_R2_001.fastq.gz:md5,0af27de6fc08cce579e160c31fbe61b9",
+                "fastp-insert-size-plot.txt:md5,c3c048605296e5adfd1761bc1871de3b",
+                
+                "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fff",
+                "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64ff",
+                "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,31f25acdfc17539444577329c37bb0",
+                "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,31f25acdfc17539444577329c37bb",
+                "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904",
+                "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90",
+                "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904",
+                "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90",
+
+                "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,0ac96d6e7126238688e36613c525b309",
+                "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,0ac96d6e7126238688e36613c525b309",
+                "fastp-seq-quality-plot_Read_2_After_filtering.txt:md5,0eb066261c3786efd7735fcb261a02c4",
+                "fastp-seq-quality-plot_Read_2_Before_filtering.txt:md5,0eb066261c3786efd7735fcb261a02c4",
+                "fastp_filtered_reads_plot.txt:md5,4b41e2709debc2e3359f2382c31670c6",
+                "fastqc_overrepresented_sequences_plot.txt:md5,22b07e7ba524ece1c795a18197ef9342",
+                "fastqc_per_base_n_content_plot.txt:md5,4b4849f2db8f29bd4d6dcd5f190a40d4",
+                "fastqc_per_base_sequence_quality_plot.txt:md5,f9103469a4225c35800d1b9eca0c15ea",
+                "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,ef8bf33d0d608d9899d03db587773a5f",
+                "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,b327efa563517705181f6434672199f",
+                "fastqc_per_sequence_quality_scores_plot.txt:md5,5f2772bb49befe838597266ebc7c772b",
+                "fastqc_sequence_counts_plot.txt:md5,7f22bfc8a332ead3f4629f44b8967b69",
+                "fastqc_sequence_duplication_levels_plot.txt:md5,0698ed12046a621c771a27c42ae43015",
+                "multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2",
+                "test-fc01.csv:md5,74dfac5602f25fe428510a500d3b4700",
+                "Sample01.fastp.json:md5,58bd930574e0d97a17e176af1f1a2de2",
+                "Sample01_1.fastp.fastq.gz:md5,19752026de713f514637fab29e4ffac4",
+                "Sample01_1.fastp.fastq.gz.md5:md5,ff1685317a46c2e4f38a601a400c6ea4",
+                "Sample01_1.fastp.fastq.gz_fastqc_data.txt:md5,9dbc0eb3858105006a5c08fbf2b199d2",
+                "Sample01_1.fastp.fastq.gz_summary.txt:md5,8dd26c87ee66aaa4e7bb5f02ea05b8df",
+                "Sample01_2.fastp.fastq.gz:md5,f9015fa4a73977dd093b8b2e4a67a316",
+                "Sample01_2.fastp.fastq.gz.md5:md5,b842540b836a847a3cbda9cd2bd9602a",
+                "Sample01_2.fastp.fastq.gz_fastqc_data.txt:md5,581fde09673f893ff5de8480146aec73",
+                "Sample01_2.fastp.fastq.gz_summary.txt:md5,3b8e912a8e2d7de2bdc8f27048816d83",
+                "Sample02.fastp.json:md5,139e9a52b922c83bbf244d9a90d037e0",
+                "Sample02_1.fastp.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144",
+                "Sample02_1.fastp.fastq.gz.md5:md5,e5359534efdf527f23c892a395ffeda9",
+                "Sample02_1.fastp.fastq.gz_fastqc_data.txt:md5,9395972202f7805db07a262b0c1a2a21",
+                "Sample02_1.fastp.fastq.gz_summary.txt:md5,5cc98027a0a13d930b52f42c18d5dbd2",
+                "Sample02_2.fastp.fastq.gz:md5,7348f1bf066a69ee48ed3ee6afa2f91b",
+                "Sample02_2.fastp.fastq.gz.md5:md5,2d7c845b972d1a8e49542c8aec7a0d30",
+                "Sample02_2.fastp.fastq.gz_fastqc_data.txt:md5,5ab57f96cb140451bfce528f2c5aab2e",
+                "Sample02_2.fastp.fastq.gz_summary.txt:md5,04f73bff5dd39c24da6e9cff5c99eafa",
+                "Sample03.fastp.json:md5,3696e4379f10e439f887fda0dd54059d",
+                "Sample03_1.fastp.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f",
+                "Sample03_1.fastp.fastq.gz.md5:md5,ba1c71c4edafdf5e80817e61b65a2423",
+                "Sample03_1.fastp.fastq.gz_fastqc_data.txt:md5,ce8bafaa92fd7dad7e04c110a17ab9aa",
+                "Sample03_1.fastp.fastq.gz_summary.txt:md5,8cd319cb78678224290dc56d9d46b7fd",
+                "Sample03_2.fastp.fastq.gz:md5,38ba38d0d71343e6125fc15b63483ad4",
+                "Sample03_2.fastp.fastq.gz.md5:md5,3f6fd888aeb09e976676304ed56b9549",
+                "Sample03_2.fastp.fastq.gz_fastqc_data.txt:md5,6d5979154dc2d30b43561136e0adc1fa",
+                "Sample03_2.fastp.fastq.gz_summary.txt:md5,122391b6c6e9332fe373a464e3e3ab09",
+                "Sample04.fastp.json:md5,04daf32a7c3a7072b3e7a585421ea4a6",
+                "Sample04_1.fastp.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4",
+                "Sample04_1.fastp.fastq.gz.md5:md5,be3faa610361cbcb5b6d988eaaa29132",
+                "Sample04_1.fastp.fastq.gz_fastqc_data.txt:md5,7ff5aeecbcb95f39c6708f2d342c6217",
+                "Sample04_1.fastp.fastq.gz_summary.txt:md5,5fea6afad96654dbd2c100943d45d6e8",
+                "Sample04_2.fastp.fastq.gz:md5,cd88081007614f5bed7975b04e25b7e3",
+                "Sample04_2.fastp.fastq.gz.md5:md5,ae878e68b8e0861b290a20472cb71bf9",
+                "Sample04_2.fastp.fastq.gz_fastqc_data.txt:md5,d64d9b1a348735f42f939307c04228bf",
+                "Sample04_2.fastp.fastq.gz_summary.txt:md5,20d5e8b942681419b885dca77ab9d250"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.1"
+        },
+        "timestamp": "2024-12-08T18:24:13.756472"
+    }
+}
\ No newline at end of file

From eac2342d559fda6ae9c22e8162b8260e6a887136 Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Mon, 16 Dec 2024 23:05:50 +1100
Subject: [PATCH 06/10] fix tests

---
 tests/mgikit.nf.test.snap | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tests/mgikit.nf.test.snap b/tests/mgikit.nf.test.snap
index 245f4798..74fc2b29 100644
--- a/tests/mgikit.nf.test.snap
+++ b/tests/mgikit.nf.test.snap
@@ -16,7 +16,7 @@
                     "md5sum": 8.3
                 },
                 "Workflow": {
-                    "nf-core/demultiplex": "v1.5.4"
+                    "nf-core/demultiplex": "v1.5.4dev"
                 }
             },
             [
@@ -234,14 +234,14 @@
                 "Undetermined_L01_R2_001.fastq.gz:md5,0af27de6fc08cce579e160c31fbe61b9",
                 "fastp-insert-size-plot.txt:md5,c3c048605296e5adfd1761bc1871de3b",
                 
-                "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fff",
-                "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64ff",
-                "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,31f25acdfc17539444577329c37bb0",
-                "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,31f25acdfc17539444577329c37bb",
-                "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904",
-                "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90",
-                "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904",
-                "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b90",
+                "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fffac",
+                "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,dfec6e4e9f65cf657e3b214ca64fffac",
+                "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,31f25acdfc17539444577329c37bb0e5",
+                "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,31f25acdfc17539444577329c37bb0e5",
+                "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d",
+                "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d",
+                "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d",
+                "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,a12afa6f3391f2abfd2bd5938c5b904d",
 
                 "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,0ac96d6e7126238688e36613c525b309",
                 "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,0ac96d6e7126238688e36613c525b309",
@@ -252,13 +252,13 @@
                 "fastqc_per_base_n_content_plot.txt:md5,4b4849f2db8f29bd4d6dcd5f190a40d4",
                 "fastqc_per_base_sequence_quality_plot.txt:md5,f9103469a4225c35800d1b9eca0c15ea",
                 "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,ef8bf33d0d608d9899d03db587773a5f",
-                "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,b327efa563517705181f6434672199f",
+                "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,b327efa563517705181f6434672199f4",
                 "fastqc_per_sequence_quality_scores_plot.txt:md5,5f2772bb49befe838597266ebc7c772b",
                 "fastqc_sequence_counts_plot.txt:md5,7f22bfc8a332ead3f4629f44b8967b69",
                 "fastqc_sequence_duplication_levels_plot.txt:md5,0698ed12046a621c771a27c42ae43015",
                 "multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2",
                 "test-fc01.csv:md5,74dfac5602f25fe428510a500d3b4700",
-                "Sample01.fastp.json:md5,58bd930574e0d97a17e176af1f1a2de2",
+                "Sample01.fastp.json:md5,39affa02f97357ff249ab6557af88cd8",
                 "Sample01_1.fastp.fastq.gz:md5,19752026de713f514637fab29e4ffac4",
                 "Sample01_1.fastp.fastq.gz.md5:md5,ff1685317a46c2e4f38a601a400c6ea4",
                 "Sample01_1.fastp.fastq.gz_fastqc_data.txt:md5,9dbc0eb3858105006a5c08fbf2b199d2",
@@ -267,7 +267,7 @@
                 "Sample01_2.fastp.fastq.gz.md5:md5,b842540b836a847a3cbda9cd2bd9602a",
                 "Sample01_2.fastp.fastq.gz_fastqc_data.txt:md5,581fde09673f893ff5de8480146aec73",
                 "Sample01_2.fastp.fastq.gz_summary.txt:md5,3b8e912a8e2d7de2bdc8f27048816d83",
-                "Sample02.fastp.json:md5,139e9a52b922c83bbf244d9a90d037e0",
+                "Sample02.fastp.json:md5,d1bf54e9c8723c30995cd7ce6cdc0a31",
                 "Sample02_1.fastp.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144",
                 "Sample02_1.fastp.fastq.gz.md5:md5,e5359534efdf527f23c892a395ffeda9",
                 "Sample02_1.fastp.fastq.gz_fastqc_data.txt:md5,9395972202f7805db07a262b0c1a2a21",
@@ -276,7 +276,7 @@
                 "Sample02_2.fastp.fastq.gz.md5:md5,2d7c845b972d1a8e49542c8aec7a0d30",
                 "Sample02_2.fastp.fastq.gz_fastqc_data.txt:md5,5ab57f96cb140451bfce528f2c5aab2e",
                 "Sample02_2.fastp.fastq.gz_summary.txt:md5,04f73bff5dd39c24da6e9cff5c99eafa",
-                "Sample03.fastp.json:md5,3696e4379f10e439f887fda0dd54059d",
+                "Sample03.fastp.json:md5,adaa33288edef3d1a8e6de4ef15c52a8",
                 "Sample03_1.fastp.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f",
                 "Sample03_1.fastp.fastq.gz.md5:md5,ba1c71c4edafdf5e80817e61b65a2423",
                 "Sample03_1.fastp.fastq.gz_fastqc_data.txt:md5,ce8bafaa92fd7dad7e04c110a17ab9aa",
@@ -285,7 +285,7 @@
                 "Sample03_2.fastp.fastq.gz.md5:md5,3f6fd888aeb09e976676304ed56b9549",
                 "Sample03_2.fastp.fastq.gz_fastqc_data.txt:md5,6d5979154dc2d30b43561136e0adc1fa",
                 "Sample03_2.fastp.fastq.gz_summary.txt:md5,122391b6c6e9332fe373a464e3e3ab09",
-                "Sample04.fastp.json:md5,04daf32a7c3a7072b3e7a585421ea4a6",
+                "Sample04.fastp.json:md5,f9c94ad8d70305fd217479151cf5bf11",
                 "Sample04_1.fastp.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4",
                 "Sample04_1.fastp.fastq.gz.md5:md5,be3faa610361cbcb5b6d988eaaa29132",
                 "Sample04_1.fastp.fastq.gz_fastqc_data.txt:md5,7ff5aeecbcb95f39c6708f2d342c6217",

From f6a0743d65694370d3a1f5c2c93e0cdafeb5899b Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Tue, 17 Dec 2024 20:29:53 +1100
Subject: [PATCH 07/10] update docs

---
 README.md      | 1 +
 docs/output.md | 5 +++++
 docs/usage.md  | 3 ++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 44ead372..8d3138dc 100755
--- a/README.md
+++ b/README.md
@@ -42,6 +42,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 - [sgdemux](#sgdemux) - demultiplexing bgzipped fastq files produced by Singular Genomics (CONDITIONAL)
 - [fqtk](#fqtk) - a toolkit for working with FASTQ files, written in Rust (CONDITIONAL)
 - [mkfastq](#mkfastq) - converting bcl files to fastq, and demultiplexing for single-cell sequencing data (CONDITIONAL)
+- [mgikit](#mgikit) - Demultiplex fastq files generated by MGI sequencers using [mgikit](https://github.com/sagc-bioinformatics/mgikit) (CONDITIONAL).
 
 3. [checkqc](#checkqc) - (optional) Check quality criteria after demultiplexing (bcl2fastq only)
 4. [fastp](#fastp) - Adapter and quality trimming
diff --git a/docs/output.md b/docs/output.md
index 49dbc39a..b47aba89 100755
--- a/docs/output.md
+++ b/docs/output.md
@@ -16,6 +16,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [sgdemux](#sgdemux) - demultiplexing bgzipped fastq files produced by Singular Genomics (CONDITIONAL)
 - [fqtk](#fqtk) - demultiplexing fastq files (CONDITIONAL)
 - [mkfastq](#mkfastq) - converting bcl files to fastq, and demultiplexing for single-cell sequencing data (CONDITIONAL)
+- [mgikit](#mgikit) - Demultiplex fastq files generated by MGI sequencers using [mgikit](https://github.com/sagc-bioinformatics/mgikit) (CONDITIONAL).
 - [checkqc](#checkqc) - (optional) Check quality criteria after demultiplexing (bcl2fastq only)
 - [fastp](#fastp) - Adapter and quality trimming
 - [Falco](#falco) - Raw read QC
@@ -136,6 +137,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 </details>
 
+### mgikit
+
+[mgikit](https://github.com/sagc-bioinformatics/mgikit) demultiplexes fastq files generated by MGI sequencers (CONDITIONAL).
+
 ### fastp
 
 <details markdown="1">
diff --git a/docs/usage.md b/docs/usage.md
index 1fd50c51..06652cc0 100755
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -37,7 +37,7 @@ DDMMYY_SERIAL_NUMBER_FC3,/path/to/SampleSheet3.csv,3,/path/to/sequencer/output3
 | `id`          | Flowcell id                                                                                                                                         |
 | `samplesheet` | Full path to the _flowcell_ `SampleSheet.csv` file containing the sample information and indexes                                                    |
 | `lane`        | Optional lane number. When a lane number is provided, only the given lane will be demultiplexed                                                     |
-| `flowcell`    | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory |
+| `flowcell`    | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory. mgikit demultiplexing expects a path to a directory here containing the compressed fastq files and `BioInfo.csv` file. |
 
 An [example _pipeline_ samplesheet](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/flowcell_input.csv) has been provided with the pipeline.
 
@@ -70,6 +70,7 @@ Each demultiplexing software uses a distinct _flowcell_ samplesheet format. Belo
 | **sgdemux**                  | [sgdemux SampleSheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/sim-data/out.sample_meta.csv)                             |
 | **fqtk**                     | [fqtk SampleSheet.csv](https://github.com/fulcrumgenomics/nf-core-test-datasets/raw/fqtk/testdata/sim-data/fqtk_samplesheet.csv)                       |
 | **bcl2fastq and bclconvert** | [bcl2fastq and bclconvert SampleSheet.csv](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/b2fq-samplesheet.csv) |
+| **mgikit** | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) |
 
 ## Running the pipeline
 

From 7760b0ce59bf9d97848b08b6c40fb51c7413c3d9 Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Tue, 17 Dec 2024 20:35:59 +1100
Subject: [PATCH 08/10] update docs

---
 docs/usage.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 06652cc0..860ce678 100755
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -22,7 +22,7 @@ When using the demultiplexer fqtk, the _pipeline_ samplesheet must contain an ad
 --input '[path to pipeline samplesheet file]'
 ```
 
-#### Example: Pipeline samplesheet
+### Example: Pipeline samplesheet
 
 ```csv title="samplesheet.csv"
 id,samplesheet,lane,flowcell
@@ -43,7 +43,7 @@ An [example _pipeline_ samplesheet](https://raw.githubusercontent.com/nf-core/te
 
 Note that the run directory in the `flowcell` column must lead to a `tar.gz` for compatibility with the demultiplexers sgdemux and fqtk.
 
-#### Example: Pipeline samplesheet for fqtk
+### Example: Pipeline samplesheet for fqtk
 
 ```csv title="samplesheet.csv"
 id,samplesheet,lane,flowcell,per_flowcell_manifest
@@ -70,7 +70,7 @@ Each demultiplexing software uses a distinct _flowcell_ samplesheet format. Belo
 | **sgdemux**                  | [sgdemux SampleSheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/sim-data/out.sample_meta.csv)                             |
 | **fqtk**                     | [fqtk SampleSheet.csv](https://github.com/fulcrumgenomics/nf-core-test-datasets/raw/fqtk/testdata/sim-data/fqtk_samplesheet.csv)                       |
 | **bcl2fastq and bclconvert** | [bcl2fastq and bclconvert SampleSheet.csv](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/b2fq-samplesheet.csv) |
-| **mgikit** | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) |
+| **mgikit**                   | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) |
 
 ## Running the pipeline
 
@@ -199,7 +199,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
 - `apptainer`
   - A generic configuration profile to be used with [Apptainer](https://apptainer.org/)
 - `wave`
-  - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later).
+  - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow `24.03.0-edge` or later).
 - `conda`
   - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer.
 

From ea53277adda280b08a9ad7f2cc32b9718a278a5f Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Tue, 17 Dec 2024 20:42:37 +1100
Subject: [PATCH 09/10] update docs

---
 docs/usage.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 860ce678..15e1f773 100755
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -32,12 +32,12 @@ DDMMYY_SERIAL_NUMBER_FC2,/path/to/SampleSheet2.csv,1,/path/to/sequencer/output2
 DDMMYY_SERIAL_NUMBER_FC3,/path/to/SampleSheet3.csv,3,/path/to/sequencer/output3
 ```
 
-| Column        | Description                                                                                                                                         |
-| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `id`          | Flowcell id                                                                                                                                         |
-| `samplesheet` | Full path to the _flowcell_ `SampleSheet.csv` file containing the sample information and indexes                                                    |
-| `lane`        | Optional lane number. When a lane number is provided, only the given lane will be demultiplexed                                                     |
-| `flowcell`    | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory. mgikit demultiplexing expects a path to a directory here containing the compressed fastq files and `BioInfo.csv` file. |
+| Column        | Description                                                                                                                                                                                                                                                                   |
+| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `id`          | Flowcell id                                                                                                                                                                                                                                                                   |
+| `samplesheet` | Full path to the _flowcell_ `SampleSheet.csv` file containing the sample information and indexes                                                                                                                                                                              |
+| `lane`        | Optional lane number. When a lane number is provided, only the given lane will be demultiplexed                                                                                                                                                                               |
+| `flowcell`    | Full path to the Illumina sequencer output directory (often referred as run directory) or a `tar.gz` file containing the contents of said directory. `mgikit` demultiplexing expects a path to a directory here containing the compressed fastq files and `BioInfo.csv` file. |
 
 An [example _pipeline_ samplesheet](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/flowcell_input.csv) has been provided with the pipeline.
 
@@ -70,7 +70,7 @@ Each demultiplexing software uses a distinct _flowcell_ samplesheet format. Belo
 | **sgdemux**                  | [sgdemux SampleSheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/sim-data/out.sample_meta.csv)                             |
 | **fqtk**                     | [fqtk SampleSheet.csv](https://github.com/fulcrumgenomics/nf-core-test-datasets/raw/fqtk/testdata/sim-data/fqtk_samplesheet.csv)                       |
 | **bcl2fastq and bclconvert** | [bcl2fastq and bclconvert SampleSheet.csv](https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/b2fq-samplesheet.csv) |
-| **mgikit**                   | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv) |
+| **mgikit**                   | [mgikit samplesheet.csv](https://github.com/nf-core/test-datasets/blob/demultiplex/testdata/mgi/fc01_sample_sheet.csv)                                 |
 
 ## Running the pipeline
 

From 10c22e4685914637b8a15fe68d60b2a1147ad1eb Mon Sep 17 00:00:00 2001
From: ziadbkh <ziadbkh@gmail.com>
Date: Tue, 17 Dec 2024 21:46:32 +1100
Subject: [PATCH 10/10] make lint happy

---
 conf/test_mgikit.config   | 4 ++--
 tests/mgikit.nf.test.snap | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/conf/test_mgikit.config b/conf/test_mgikit.config
index 07968209..cbd5c3cd 100644
--- a/conf/test_mgikit.config
+++ b/conf/test_mgikit.config
@@ -13,8 +13,8 @@
 // Limit resources so that this can run on GitHub Actions
 process {
     resourceLimits = [
-        cpus: 2,
-        memory: '12.GB',
+        cpus: 1,
+        memory: '7.GB',
         time: '4.h'
     ]
 }
diff --git a/tests/mgikit.nf.test.snap b/tests/mgikit.nf.test.snap
index 74fc2b29..b308750c 100644
--- a/tests/mgikit.nf.test.snap
+++ b/tests/mgikit.nf.test.snap
@@ -258,7 +258,7 @@
                 "fastqc_sequence_duplication_levels_plot.txt:md5,0698ed12046a621c771a27c42ae43015",
                 "multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2",
                 "test-fc01.csv:md5,74dfac5602f25fe428510a500d3b4700",
-                "Sample01.fastp.json:md5,39affa02f97357ff249ab6557af88cd8",
+                "Sample01.fastp.json:md5,58bd930574e0d97a17e176af1f1a2de2",
                 "Sample01_1.fastp.fastq.gz:md5,19752026de713f514637fab29e4ffac4",
                 "Sample01_1.fastp.fastq.gz.md5:md5,ff1685317a46c2e4f38a601a400c6ea4",
                 "Sample01_1.fastp.fastq.gz_fastqc_data.txt:md5,9dbc0eb3858105006a5c08fbf2b199d2",
@@ -267,7 +267,7 @@
                 "Sample01_2.fastp.fastq.gz.md5:md5,b842540b836a847a3cbda9cd2bd9602a",
                 "Sample01_2.fastp.fastq.gz_fastqc_data.txt:md5,581fde09673f893ff5de8480146aec73",
                 "Sample01_2.fastp.fastq.gz_summary.txt:md5,3b8e912a8e2d7de2bdc8f27048816d83",
-                "Sample02.fastp.json:md5,d1bf54e9c8723c30995cd7ce6cdc0a31",
+                "Sample02.fastp.json:md5,139e9a52b922c83bbf244d9a90d037e0",
                 "Sample02_1.fastp.fastq.gz:md5,7f942bf3ef88ef04431ed0d40baae144",
                 "Sample02_1.fastp.fastq.gz.md5:md5,e5359534efdf527f23c892a395ffeda9",
                 "Sample02_1.fastp.fastq.gz_fastqc_data.txt:md5,9395972202f7805db07a262b0c1a2a21",
@@ -276,7 +276,7 @@
                 "Sample02_2.fastp.fastq.gz.md5:md5,2d7c845b972d1a8e49542c8aec7a0d30",
                 "Sample02_2.fastp.fastq.gz_fastqc_data.txt:md5,5ab57f96cb140451bfce528f2c5aab2e",
                 "Sample02_2.fastp.fastq.gz_summary.txt:md5,04f73bff5dd39c24da6e9cff5c99eafa",
-                "Sample03.fastp.json:md5,adaa33288edef3d1a8e6de4ef15c52a8",
+                "Sample03.fastp.json:md5,3696e4379f10e439f887fda0dd54059d",
                 "Sample03_1.fastp.fastq.gz:md5,56afcab8b820419bddec4982fc85b60f",
                 "Sample03_1.fastp.fastq.gz.md5:md5,ba1c71c4edafdf5e80817e61b65a2423",
                 "Sample03_1.fastp.fastq.gz_fastqc_data.txt:md5,ce8bafaa92fd7dad7e04c110a17ab9aa",
@@ -285,7 +285,7 @@
                 "Sample03_2.fastp.fastq.gz.md5:md5,3f6fd888aeb09e976676304ed56b9549",
                 "Sample03_2.fastp.fastq.gz_fastqc_data.txt:md5,6d5979154dc2d30b43561136e0adc1fa",
                 "Sample03_2.fastp.fastq.gz_summary.txt:md5,122391b6c6e9332fe373a464e3e3ab09",
-                "Sample04.fastp.json:md5,f9c94ad8d70305fd217479151cf5bf11",
+                "Sample04.fastp.json:md5,04daf32a7c3a7072b3e7a585421ea4a6",
                 "Sample04_1.fastp.fastq.gz:md5,dcf0ad68be6ff6c21ebceedaa4eaa4c4",
                 "Sample04_1.fastp.fastq.gz.md5:md5,be3faa610361cbcb5b6d988eaaa29132",
                 "Sample04_1.fastp.fastq.gz_fastqc_data.txt:md5,7ff5aeecbcb95f39c6708f2d342c6217",