Skip to content

Commit

Permalink
Fixed BLAST parsing bug and better trapping of BLAST errors
Browse files Browse the repository at this point in the history
  • Loading branch information
richardmleggett committed Jun 16, 2023
1 parent d496236 commit 18ea534
Show file tree
Hide file tree
Showing 13 changed files with 70 additions and 16 deletions.
Binary file modified bin/MARTiEngine.jar
Binary file not shown.
Binary file modified docs/.DS_Store
Binary file not shown.
4 changes: 2 additions & 2 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ Prerequisites
-------------
In order to run the MARTi Engine (back-end), you also need to install the following on the machine where it will be running:

* **BLAST (2.10 or greater)** - `download from NCBI <https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download>`_ or, optionally, install with `homebrew on Mac <https://brew.sh>`_. On Ubuntu, using apt-get may install an older version. In which case, it may be easiest to download executables from the NCBI link above.
* **BLAST (2.12.0 or greater)** - `download from NCBI <https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download>`_ or, optionally, install with `homebrew on Mac <https://brew.sh>`_. On Ubuntu, using apt-get may install an older version. In which case, it may be easiest to download executables from the NCBI link above.
* **BLAST databases** - what you'll need will depend on what you're trying to do. But you might want to start with the nt database, `available from the NCBI Blast FTP site <https://ftp.ncbi.nlm.nih.gov/blast/db/>`_.
* **NCBI taxonomy** - you can `download this from the NCBI taxonomy FTP site <https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/>`_. You need the taxdump files, specifically the nodes.dmp and names.dmp files.
* **Java Run Time Environment (OpenJDK 16.0.2 or greater)** - the simplest option is to `install OpenJDK <https://openjdk.java.net>`_. Note, on Macs, the documentation for OpenJDK isn't great. Once you download the JDK, you need to move the directory into /Library/Java/VirtualMachines (`as described here <https://java.tutorials24x7.com/blog/how-to-install-openjdk-14-on-mac>`_).
* **Java Run Time Environment (OpenJDK 16.0.2 or greater)** - the simplest option is to `install OpenJDK <https://openjdk.java.net>`_. Note, on Macs, the documentation for OpenJDK isn't great. Once you download the JDK, you need to move the directory into /Library/Java/VirtualMachines (`as described here <https://java.tutorials24x7.com/blog/how-to-install-openjdk-14-on-mac>`_). Or install with `homebrew <https://brew.sh>`_ (use brew install openjdk).

In order to run the MARTi GUI, you also need to install the following on the computer where it will be running:

Expand Down
8 changes: 6 additions & 2 deletions engine/src/main/java/uk/ac/earlham/lcaparse/BlastHit.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public BlastHit(Taxonomy t, AccessionTaxonConvertor atc, String line, int format
cacheTaxonIdPath();
}
}

private void parseNanoOKWithStitle(String[] fields ) {
// NanoOK14: "qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle staxids"
// NanoOK15: "qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle qcovs staxids"
Expand Down Expand Up @@ -96,7 +96,11 @@ private void parseNanoOKWithStitle(String[] fields ) {
queryCoverage = Double.parseDouble(fields[13]);
String taxaString = fields[14];
String[] taxa = taxaString.split(";");
taxonId = Integer.parseInt(taxa[0]);
try {
taxonId = Integer.parseInt(taxa[0]);
} catch (NumberFormatException e) {
taxonId = -2;
}
}

validAlignment = true;
Expand Down
6 changes: 5 additions & 1 deletion engine/src/main/java/uk/ac/earlham/marti/amr/AMRResults.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,10 @@ public void analyseChunk(AMRAnalysisTask aat) {

WalkOutChunk woc = new WalkOutChunk(options, options.getReadClassifier().getTaxonomy(), aat.getOriginalChunkNumber(), aat.getProcessedChunkNumber());
woc.load(aat.getCARDBlastFilename(), aat.getNtBlastFilename());
options.getLog().println("Chunks loaded, processing hits");
woc.processHits(wor);
readCountWithAMRHits += woc.getReadCountWithAMRHits();
options.getLog().println("Read count with AMR hits "+readCountWithAMRHits);

writeJSON(aat.getProcessedChunkNumber());

Expand All @@ -89,6 +91,8 @@ public void writeJSON(int cn) {
// chunkTime object
JsonObjectBuilder chunkTimes = Json.createObjectBuilder();

options.getLog().println("Writing JSON for chunk "+cn);

for (int c=1; c<=wor.getMaxChunkNumber(); c++) {
chunkTimes.add(Integer.toString(c), wor.getChunkTime(c));
}
Expand Down Expand Up @@ -213,7 +217,7 @@ public void writeJSON(int cn) {
e.printStackTrace();
System.exit(1);
}

options.getLog().println("JSON written for chunk "+cn);
}

public CARDOntology getOntology() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ private void runBlast(String fastaPathname) {

if (options.getStopProcessingAfter() > 0) {
if (numberOfReadsProcessed > options.getStopProcessingAfter()) {
options.getLog().println("Number of FASTQ reads processed ("+numberOfReadsProcessed+") exceeeds limit ("+options.getStopProcessingAfter()+"). Sending STOP command.");
options.getLog().println("Note: Number of FASTQ reads processed ("+numberOfReadsProcessed+") exceeeds limit ("+options.getStopProcessingAfter()+"). Sending STOP command.");
options.stopProcessing();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,38 @@ public synchronized void addFile(String blastProcessName, int i, String queryFil

public boolean checkBLASTCompleted(ReadClassifierItem f, int exitValue) {
String blastLogFilename = f.getLogFile();

//options.getLog().println("Checking BLAST log for errors "+blastLogFilename);
//options.getLog().println(" Exit value was "+exitValue);

boolean completed = true;

if (exitValue != 0) {
return false;
completed = false;
//options.getLog().println(" Exit value was "+exitValue);
} else {
options.getLog().println("Checking BLAST log for errors "+blastLogFilename);
try {
File bf = new File(blastLogFilename);
if (bf.exists()) {
BufferedReader br;
String line;
br = new BufferedReader(new FileReader(blastLogFilename));
while ((line = br.readLine()) != null) {
if (line.toLowerCase().contains("error")) {
options.getLog().printlnLogAndScreen("Error: Stopping due to error message in "+blastLogFilename);
completed = false;
}
}
br.close();
} else {
options.getLog().println("WARNING: Couldn't find BLAST log "+blastLogFilename);
options.getLog().println("This is usually a bad sign, but continuing to run.");
}
} catch (Exception e) {
System.out.println("checkBLASTCompleted exception");
e.printStackTrace();
System.exit(1);
}
}

return true;
return completed;
}

// /**
Expand Down Expand Up @@ -272,6 +295,7 @@ public synchronized void checkForFilesToClassify() {
} else {
System.out.println("Error: Failed BLAST "+f.getBlastFile() + " exit value "+ js.getExitValue(thisId));
options.getLog().println("Error: Failed BLAST "+f.getBlastFile());
js.markJobAsFailed(thisId);
}
} else {
options.getLog().println(MARTiLog.LOGLEVEL_NOTCOMPLETED, "Not completed " + f.blastProcessName + " - " + f.blastFile + " - " + f.getJobId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ public void run() {
}
}

options.getLog().println("MARTiAnalysisRunnable finalising");
options.getLog().println("Thread exiting");
options.getLog().println("MARTiAnalysisRunnable thread exiting");
}

public void exitThread() {
options.getLog().printlnLogAndScreen("Exiting MARTiAnalsisRunnable thread");
keepRunning = false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.*;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.zip.*;
import uk.ac.earlham.marti.amr.WalkOutRead;
import uk.ac.earlham.marti.schedule.*;

/**
Expand Down Expand Up @@ -135,6 +136,9 @@ public static void main(String[] args) throws InterruptedException {

options.getReadClassifier().initialise();

// DEBUG - Test WalkoutRead
WalkOutRead wor = new WalkOutRead("test", options, options.getReadClassifier().getTaxonomy());

process(options);

//memoryReport();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ public void run() {
// endChunks();
// }

options.getLog().println("Thread exiting");
options.getLog().println("ReadFilterRunnable thread exiting");

outputStats();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ public interface JobScheduler {
public int submitJob(String[] commands, String logFilename, boolean submitJob);
public boolean checkJobCompleted(int i);
public int getExitValue(int i);
public void markJobAsFailed(int i);
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.lang.management.ManagementFactory;
import java.io.File;
import java.util.LinkedList;
import java.util.concurrent.ConcurrentHashMap;
import uk.ac.earlham.marti.core.MARTiEngineOptions;
import uk.ac.earlham.marti.core.MARTiLog;

Expand All @@ -18,8 +19,10 @@
*/
public class SimpleJobScheduler implements JobScheduler {
private static final int MAX_QUICK_JOB_ID = 100000;
private ConcurrentHashMap<Integer, SimpleJobSchedulerJob> allJobs = new ConcurrentHashMap<Integer, SimpleJobSchedulerJob>();
private LinkedList<SimpleJobSchedulerJob> pendingJobs = new LinkedList<SimpleJobSchedulerJob>();
private LinkedList<SimpleJobSchedulerJob> runningJobs = new LinkedList<SimpleJobSchedulerJob>();
private LinkedList<SimpleJobSchedulerJob> failedJobs = new LinkedList<SimpleJobSchedulerJob>();
//private LinkedList<SimpleJobSchedulerJob> finishedJobs = new LinkedList<SimpleJobSchedulerJob>();
private MARTiLog schedulerLog = new MARTiLog();
private MARTiEngineOptions options;
Expand Down Expand Up @@ -84,6 +87,7 @@ public synchronized int submitJob(String[] commands, String logFilename, boolean

SimpleJobSchedulerJob j = new SimpleJobSchedulerJob(jobId, commands, logFilename, dontRunIt);
pendingJobs.add(j);
allJobs.put(jobId, j);
schedulerLog.println("Submitted job\t"+jobId+"\t"+j.getCommand());
return jobId++;
}
Expand Down Expand Up @@ -209,6 +213,11 @@ public synchronized int getPendingJobCount() {
}

public synchronized int getFailedJobCount() {
return 0;
return failedJobs.size();
}

public synchronized void markJobAsFailed(int i) {
SimpleJobSchedulerJob ssj = allJobs.get(i);
failedJobs.add(ssj);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,12 @@ public synchronized void manageQueue() {
public synchronized int getFailedJobCount() {
return failedJobs.size();
}

public synchronized void markJobAsFailed(int i) {
SlurmSchedulerJob ssj = allJobs.get(i);
if (runningJobs.containsKey(i)) {
runningJobs.remove(i);
}
failedJobs.put(i, ssj);
}
}

0 comments on commit 18ea534

Please sign in to comment.