From 43cd24c8ecbb76832087713b5a0e2db7628b642f Mon Sep 17 00:00:00 2001 From: Pelin Icer Baykal Date: Fri, 31 Mar 2023 12:47:36 +0200 Subject: [PATCH] add minimap2 aligner --- workflow/envs/minimap_align.yaml | 7 ++ workflow/rules/align.smk | 102 ++++++++++++++++++++++++++++ workflow/schemas/config_schema.json | 55 ++++++++++++++- 3 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 workflow/envs/minimap_align.yaml diff --git a/workflow/envs/minimap_align.yaml b/workflow/envs/minimap_align.yaml new file mode 100644 index 00000000..984117d6 --- /dev/null +++ b/workflow/envs/minimap_align.yaml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - minimap2=2.24 + - samtools=1.10 diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 973858fd..f676f5b4 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -629,6 +629,108 @@ elif config.general["aligner"] == "bowtie": {params.SAMTOOLS} view -h -F 4 -F 2048 -o "{output.REF}" "{output.TMP_SAM} 2> >(tee -a {log.errfile} >&2) """ +elif config.general["aligner"] == "minimap": + + rule ref_minimap_index: + input: + reference_file, + output: + reference_file+'.mmi', + params: + MINIMAP=config.applications["minimap"], + log: + outfile="references/minimap.index.out.log", + errfile="references/minimap.index.err.log", + conda: + config.ref_minimap_index["conda"] + benchmark: + "references/ref_minimap_index.benchmark" + group: + "align" + resources: + disk_mb=1250, + mem_mb=config.ref_minimap_index["mem"], + runtime=config.ref_minimap_index["time"], + threads: 1 + shell: + """ + {params.MINIMAP} -t {threads} -d {output} {input} 2> >(tee {log.errfile} >&2) + """ + + if config.input["paired"]: + + rule minimap_align: + input: + target=reference_file+'.mmi', + # FASTQ=input_align_gz, + R1="{dataset}/preprocessed_data/R1.fastq.gz", + R2="{dataset}/preprocessed_data/R2.fastq.gz", + + # INDEX=multiext(reference_file,*bwa_idx_ext), + output: + REF=temp_with_prefix("{dataset}/alignments/REF_aln.sam"), + TMP_SAM=temp_with_prefix("{dataset}/alignments/tmp_aln.sam"), + params: + EXTRA=config.minimap_align["extra"], + PRESET=config.minimap_align["preset"], + MINIMAP=config.applications["minimap"], + SAMTOOLS=config.applications["samtools"], + log: + outfile="{dataset}/alignments/minimap_align.log", + errfile="{dataset}/alignments/minimap_align.err.log", + conda: + config.minimap_align["conda"] + benchmark: + "{dataset}/alignments/minimap_align.benchmark" + group: + "align" + resources: + disk_mb=1250, + mem_mb=config.minimap_align["mem"], + runtime=config.minimap_align["time"], + threads: config.minimap_align["threads"] + shell: + """ + {params.MINIMAP} -t {threads} -a -x {params.PRESET} {input.target} {input.R1} {input.R2} > {output.TMP_SAM} 2> >(tee -a {log.errfile} >&2) + {params.SAMTOOLS} view -h -f 2 -F 2048 -o "{output.REF}" "{output.TMP_SAM}" 2> >(tee -a {log.errfile} >&2) + """ + else: + + rule minimap_align_se: + input: + target=reference_file+'.mmi', + # FASTQ=input_align_gz, + R1="{dataset}/preprocessed_data/R1.fastq.gz", + # R2="{dataset}/preprocessed_data/R2.fastq.gz", + + # INDEX=multiext(reference_file,*bwa_idx_ext), + output: + REF=temp_with_prefix("{dataset}/alignments/REF_aln.sam"), + TMP_SAM=temp_with_prefix("{dataset}/alignments/tmp_aln.sam"), + params: + EXTRA=config.minimap_align["extra"], + PRESET=config.minimap_align["preset"], + MINIMAP=config.applications["minimap"], + SAMTOOLS=config.applications["samtools"], + log: + outfile="{dataset}/alignments/minimap_align.log", + errfile="{dataset}/alignments/minimap_align.err.log", + conda: + config.minimap_align["conda"] + benchmark: + "{dataset}/alignments/minimap_align.benchmark" + group: + "align" + resources: + disk_mb=1250, + mem_mb=config.minimap_align["mem"], + runtime=config.minimap_align["time"], + threads: config.minimap_align["threads"] + shell: + """ + {params.MINIMAP} -t {threads} -a -x {params.PRESET} {params.EXTRA} {input.target} {input.R1} > {output.TMP_SAM} 2> >(tee -a {log.errfile} >&2) + {params.SAMTOOLS} view -h -f 2 -F 2048 -o "{output.REF}" "{output.TMP_SAM}" 2> >(tee -a {log.errfile} >&2) + """ # NOTE ngshmmalignb also generate consensus so check there too. # if config.general["aligner"] == "ngshmmalign": diff --git a/workflow/schemas/config_schema.json b/workflow/schemas/config_schema.json index 9b7efa95..88c197b9 100644 --- a/workflow/schemas/config_schema.json +++ b/workflow/schemas/config_schema.json @@ -16,7 +16,7 @@ }, "aligner": { "type": "string", - "enum": ["ngshmmalign","bwa","bowtie"], + "enum": ["ngshmmalign","bwa","bowtie","minimap"], "default": "ngshmmalign", "description": "There are three options for mapping reads, either using [`ngshmmalign`](https://github.com/cbg-ethz/ngshmmalign), [BWA MEM (`bwa`)](https://github.com/lh3/bwa) [^1], or [Bowtie 2 (`bowtie`)](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) [^2]. To use a different aligner than the default, indicate which aligner you want to use by setting the property aligner.\n\n[^1]: Li, H. Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. 2013.\n[^2]: Langmead, B. and Salzberg, S. Fast gapped-read alignment with Bowtie 2. Nature Methods. 2012.\n\n**Note**: Some virus-specific base configuration specified in `virus_base_config` might change this option's default to a more appropriate aligner for that virus, e.g., depending on its usual diversity and mutation rate.\nYou are still free to override that default in your configuration shall the need arise.", "examples": ["bowtie"] @@ -311,6 +311,10 @@ "type": "string", "default": "bowtie2" }, + "minimap": { + "type": "string", + "default": "minimap2" + }, "samtools": { "type": "string", "default": "samtools" @@ -786,6 +790,55 @@ "default": {}, "type": "object" }, + "ref_minimap_index": { + "properties": { + "mem": { + "type": "integer", + "default": 2000 + }, + "time": { + "type": "integer", + "default": 235 + }, + "conda": { + "type": "string", + "default": "{VPIPE_BASEDIR}/envs/minimap_align.yaml" + } + }, + "default": {}, + "type": "object" + }, + "minimap_align": { + "properties": { + "mem": { + "type": "integer", + "default": 1250 + }, + "time": { + "type": "integer", + "default": 235 + }, + "threads": { + "type": "integer" + }, + "preset": { + "type": "string", + "default": "sr", + "description": "Specify minimap2 presets." + }, + "conda": { + "type": "string", + "default": "{VPIPE_BASEDIR}/envs/minimap_align.yaml" + }, + "extra": { + "type": "string", + "default": "", + "description": "With property `extra`, users can pass additional options to run minimap2. For more details on minimap2 configurable options refer to the software [documentation](https://github.com/lh3/minimap2)." + } + }, + "default": {}, + "type": "object" + }, "primerstrim" : { "properties": { "mem": {