Skip to content

Commit

Permalink
Added additional Kraken and BLAST options, better reporting of OOM on…
Browse files Browse the repository at this point in the history
… SLURM
  • Loading branch information
richardmleggett committed Oct 10, 2024
1 parent f1685ae commit 6d88d66
Show file tree
Hide file tree
Showing 12 changed files with 81 additions and 27 deletions.
Binary file modified bin/MARTiEngine.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion bin/marti
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/sh

JAVA_ARGS="-Xms1g -Xmx16g"
JAVA_ARGS="-Xms1g -Xmx32g"

# Change this directory to point to the MARTi bin
MARTI_DIR=/Users/leggettr/Documents/github/MARTi/bin
Expand Down
1 change: 1 addition & 0 deletions docs/source/table4.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ BlastThreads 4 Number of threads to use when running BLAST. Note: for SLURM sch
Memory 16G For SLURM scheduler, the memory to use per BLAST job. Passed with the SLURM --mem parameter.
Queue ei-medium The job submission queue to use. Can be left out and the default queue (see above) will be used. Currently only required for SLURM and equates to the partition name.
Dust 15 64 1 Dust string to be passed on to all blast commands for this blast process (optional).
Options -ungapped Any additional options to pass to BLAST
3 changes: 2 additions & 1 deletion docs/source/table_kraken2.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
Kraken2Process n/a Defines the start of a Krakaen2 process
Kraken2Process n/a Defines the start of a Kraken2 process
Name k2_refseq Name of process
Database /path/to/db/ Path to directory containing Kraken2 database
UseToClassify n/a Use Kraken2 results for classification (can only be set for 1 classification process)
Kraken2Threads 4 Number of threads to use when running Kraken2. Note: for SLURM scheduler, MARTi also uses this value for the SLURM --cpus-per-task option.
Memory 16G For SLURM scheduler, the memory to use per Kraken2 job. Passed with the SLURM --mem parameter.
Queue ei-medium The job submission queue to use. Can be left out and the default queue (see above) will be used. Currently only required for SLURM and equates to the partition name.
Options --confidence 0.01 Any additional options to pass to Kraken2
2 changes: 1 addition & 1 deletion engine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>leggett</groupId>
<artifactId>MARTiEngine</artifactId>
<version>0.9.13</version>
<version>0.9.18</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand Down
22 changes: 22 additions & 0 deletions engine/src/main/java/uk/ac/earlham/lcaparse/LCAParseOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -338,4 +338,26 @@ private void setSortHitsByBitscore() {
public boolean sortHitsbyBitscore() {
return sortHitsByBitScore;
}

public void displayOptions() {
System.out.println("inputFilename="+inputFilename);
System.out.println("outputPrefix="+outputPrefix);
System.out.println("taxonomyDirectory="+taxonomyDirectory);
System.out.println("mapFilename="+mapFilename);
System.out.println("fileFormat="+fileFormat);
System.out.println("maxHitsToConsider="+maxHitsToConsider);
System.out.println("scorePercent="+scorePercent);
System.out.println("minIdentity="+minIdentity);
System.out.println("minLength="+minLength);
System.out.println("minQueryCoverage="+minQueryCoverage);
System.out.println("minCombinedScore="+minCombinedScore);
System.out.println("limitToSpecies="+limitToSpecies);
System.out.println("expectedTaxon="+expectedTaxon);
System.out.println("relatedTaxon="+relatedTaxon);
System.out.println("doingMakeMap="+doingMakeMap);
System.out.println("doingRanks="+doingRanks);
System.out.println("withWarnings="+withWarnings);
System.out.println("doingAnnotate="+doingAnnotate);
System.out.println("sortHitsByBitScore="+sortHitsByBitScore);
}
}
15 changes: 14 additions & 1 deletion engine/src/main/java/uk/ac/earlham/lcaparse/PAFHit.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,20 @@ public PAFHit(Taxonomy t, AccessionTaxonConvertor atc, String line) {
System.exit(1);
}

taxonId = accTaxConvert.getTaxonFromAccession(targetName);
if (targetName.startsWith("taxid|")) {
String rest = targetName.substring(6);
if (rest.contains("|")) {
String taxonIdString = rest.substring(0, rest.indexOf("|"));
//System.out.println("Got string "+taxonIdString);
taxonId = Integer.parseInt(taxonIdString);
//System.out.println("And taxon ID is "+taxonId);
} else {
System.out.println("Strangely formatted target name: "+targetName+" ("+rest+")");
}
} else {
taxonId = accTaxConvert.getTaxonFromAccession(targetName);
}

if (taxonId == -1) {
taxonomy.warnTaxa(targetName);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public String readConfigFile(BufferedReader br) {
dustString = tokens[1].replaceAll("^'+", "").replaceAll("'+$", "");
} else if (tokens[0].compareTo("options") == 0) {
processOptions = tokens[1];
}else {
} else {
keepReading = false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* @author Richard M. Leggett
*/
public class MARTiEngine {
public final static String VERSION_STRING = "v0.9.18";
public final static String VERSION_STRING = "v0.9.18d";
public final static long SERIAL_VERSION = 3L;
public final static boolean SHOW_NOTES = false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,15 @@ private synchronized void runKraken2(String inputPathname) {
String command = "";
JobScheduler jobScheduler = options.getJobScheduler();
String identifier = "kraken2_"+inputPathname;
String processOptions = k2p.getProcessOptions();

command = "kraken2" +
command = "kraken2";

if (processOptions.length() > 0) {
command = command + " " + processOptions;
}

command = command +
" --db " + database +
" --output " + classificationFilePath +
" --threads " + numThreads + " " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public class Kraken2Process {
private String jobQueue = null;
private int minHitGroups = 1;
private int primaryAssignments = 1;

private String processOptions = "";

public Kraken2Process(MARTiEngineOptions o) {
options = o;
Expand Down Expand Up @@ -58,6 +58,8 @@ public String readConfigFile(BufferedReader br) {
numThreads = Integer.parseInt(tokens[1]);
} else if (tokens[0].compareTo("UseToClassify") == 0) {
classifyThis = true;
} else if (tokens[0].compareTo("options") == 0) {
processOptions = tokens[1];
} else {
keepReading = false;
}
Expand Down Expand Up @@ -131,5 +133,8 @@ public int getNumThreads() {
public int getNumPrimaryAssignments() {
return primaryAssignments;
}


public String getProcessOptions() {
return processOptions;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -310,25 +310,30 @@ public int getDependency(int n) {
}

private void parseJobState(String stateString) {
switch(stateString) {
case "BOOT_FAIL": jobState = STATE_BOOT_FAIL; break;
case "CANCELLED": jobState = STATE_CANCELLED; break;
case "CANCELLED+": jobState = STATE_CANCELLED; break;
case "COMPLETED": jobState = STATE_COMPLETED; break;
case "DEADLINE": jobState = STATE_DEADLINE; break;
case "FAILED": jobState = STATE_FAILED; break;
case "NODE_FAIL": jobState = STATE_NODE_FAIL; break;
case "OUT_OF_MEMORY": jobState = STATE_OOM; break;
case "PENDING": jobState = STATE_PENDING; break;
case "PREEMPTED": jobState = STATE_PREEMPTED; break;
case "RUNNING": jobState = STATE_RUNNING; break;
case "REQUEUED": jobState = STATE_REQUEUED; break;
case "RESIZING": jobState = STATE_RESIZING; break;
case "REVOKED": jobState = STATE_REVOKED; break;
case "SUSPENDED": jobState = STATE_SUSPENDED; break;
case "TIMEOUT": jobState = STATE_TIMEOUT; break;
default: jobState = STATE_UNKNOWN; break;
if (stateString.startsWith("OUT_OF_ME")) {
jobState = STATE_OOM;
} else {
switch(stateString) {
case "BOOT_FAIL": jobState = STATE_BOOT_FAIL; break;
case "CANCELLED": jobState = STATE_CANCELLED; break;
case "CANCELLED+": jobState = STATE_CANCELLED; break;
case "COMPLETED": jobState = STATE_COMPLETED; break;
case "DEADLINE": jobState = STATE_DEADLINE; break;
case "FAILED": jobState = STATE_FAILED; break;
case "NODE_FAIL": jobState = STATE_NODE_FAIL; break;
case "OUT_OF_MEMORY": jobState = STATE_OOM; break;
case "PENDING": jobState = STATE_PENDING; break;
case "PREEMPTED": jobState = STATE_PREEMPTED; break;
case "RUNNING": jobState = STATE_RUNNING; break;
case "REQUEUED": jobState = STATE_REQUEUED; break;
case "RESIZING": jobState = STATE_RESIZING; break;
case "REVOKED": jobState = STATE_REVOKED; break;
case "SUSPENDED": jobState = STATE_SUSPENDED; break;
case "TIMEOUT": jobState = STATE_TIMEOUT; break;
default: jobState = STATE_UNKNOWN; break;
}
}

slurmLog.println("Job "+internalJobId+" state parsed "+jobState);
}

Expand Down

0 comments on commit 6d88d66

Please sign in to comment.