diff --git a/bin/MARTiEngine.jar b/bin/MARTiEngine.jar index 4471de6..73df114 100644 Binary files a/bin/MARTiEngine.jar and b/bin/MARTiEngine.jar differ diff --git a/docs/.DS_Store b/docs/.DS_Store index b1aa948..69f56d9 100644 Binary files a/docs/.DS_Store and b/docs/.DS_Store differ diff --git a/docs/source/installation.rst b/docs/source/installation.rst index b556d7e..00dd304 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -18,10 +18,10 @@ Prerequisites ------------- In order to run the MARTi Engine (back-end), you also need to install the following on the machine where it will be running: -* **BLAST (2.10 or greater)** - `download from NCBI `_ or, optionally, install with `homebrew on Mac `_. On Ubuntu, using apt-get may install an older version. In which case, it may be easiest to download executables from the NCBI link above. +* **BLAST (2.12.0 or greater)** - `download from NCBI `_ or, optionally, install with `homebrew on Mac `_. On Ubuntu, using apt-get may install an older version. In which case, it may be easiest to download executables from the NCBI link above. * **BLAST databases** - what you'll need will depend on what you're trying to do. But you might want to start with the nt database, `available from the NCBI Blast FTP site `_. * **NCBI taxonomy** - you can `download this from the NCBI taxonomy FTP site `_. You need the taxdump files, specifically the nodes.dmp and names.dmp files. -* **Java Run Time Environment (OpenJDK 16.0.2 or greater)** - the simplest option is to `install OpenJDK `_. Note, on Macs, the documentation for OpenJDK isn't great. Once you download the JDK, you need to move the directory into /Library/Java/VirtualMachines (`as described here `_). +* **Java Run Time Environment (OpenJDK 16.0.2 or greater)** - the simplest option is to `install OpenJDK `_. Note, on Macs, the documentation for OpenJDK isn't great. Once you download the JDK, you need to move the directory into /Library/Java/VirtualMachines (`as described here `_). Or install with `homebrew `_ (use brew install openjdk). In order to run the MARTi GUI, you also need to install the following on the computer where it will be running: diff --git a/engine/src/main/java/uk/ac/earlham/lcaparse/BlastHit.java b/engine/src/main/java/uk/ac/earlham/lcaparse/BlastHit.java index 2c4ab8a..79b03b7 100644 --- a/engine/src/main/java/uk/ac/earlham/lcaparse/BlastHit.java +++ b/engine/src/main/java/uk/ac/earlham/lcaparse/BlastHit.java @@ -63,7 +63,7 @@ public BlastHit(Taxonomy t, AccessionTaxonConvertor atc, String line, int format cacheTaxonIdPath(); } } - + private void parseNanoOKWithStitle(String[] fields ) { // NanoOK14: "qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle staxids" // NanoOK15: "qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle qcovs staxids" @@ -96,7 +96,11 @@ private void parseNanoOKWithStitle(String[] fields ) { queryCoverage = Double.parseDouble(fields[13]); String taxaString = fields[14]; String[] taxa = taxaString.split(";"); - taxonId = Integer.parseInt(taxa[0]); + try { + taxonId = Integer.parseInt(taxa[0]); + } catch (NumberFormatException e) { + taxonId = -2; + } } validAlignment = true; diff --git a/engine/src/main/java/uk/ac/earlham/marti/amr/AMRResults.java b/engine/src/main/java/uk/ac/earlham/marti/amr/AMRResults.java index 790d39c..952b2c5 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/amr/AMRResults.java +++ b/engine/src/main/java/uk/ac/earlham/marti/amr/AMRResults.java @@ -69,8 +69,10 @@ public void analyseChunk(AMRAnalysisTask aat) { WalkOutChunk woc = new WalkOutChunk(options, options.getReadClassifier().getTaxonomy(), aat.getOriginalChunkNumber(), aat.getProcessedChunkNumber()); woc.load(aat.getCARDBlastFilename(), aat.getNtBlastFilename()); + options.getLog().println("Chunks loaded, processing hits"); woc.processHits(wor); readCountWithAMRHits += woc.getReadCountWithAMRHits(); + options.getLog().println("Read count with AMR hits "+readCountWithAMRHits); writeJSON(aat.getProcessedChunkNumber()); @@ -89,6 +91,8 @@ public void writeJSON(int cn) { // chunkTime object JsonObjectBuilder chunkTimes = Json.createObjectBuilder(); + options.getLog().println("Writing JSON for chunk "+cn); + for (int c=1; c<=wor.getMaxChunkNumber(); c++) { chunkTimes.add(Integer.toString(c), wor.getChunkTime(c)); } @@ -213,7 +217,7 @@ public void writeJSON(int cn) { e.printStackTrace(); System.exit(1); } - + options.getLog().println("JSON written for chunk "+cn); } public CARDOntology getOntology() { diff --git a/engine/src/main/java/uk/ac/earlham/marti/blast/BlastProcessRunnable.java b/engine/src/main/java/uk/ac/earlham/marti/blast/BlastProcessRunnable.java index 5e6e7cc..404eae5 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/blast/BlastProcessRunnable.java +++ b/engine/src/main/java/uk/ac/earlham/marti/blast/BlastProcessRunnable.java @@ -129,7 +129,7 @@ private void runBlast(String fastaPathname) { if (options.getStopProcessingAfter() > 0) { if (numberOfReadsProcessed > options.getStopProcessingAfter()) { - options.getLog().println("Number of FASTQ reads processed ("+numberOfReadsProcessed+") exceeeds limit ("+options.getStopProcessingAfter()+"). Sending STOP command."); + options.getLog().println("Note: Number of FASTQ reads processed ("+numberOfReadsProcessed+") exceeeds limit ("+options.getStopProcessingAfter()+"). Sending STOP command."); options.stopProcessing(); } } diff --git a/engine/src/main/java/uk/ac/earlham/marti/classify/ReadClassifier.java b/engine/src/main/java/uk/ac/earlham/marti/classify/ReadClassifier.java index fd19fa9..f896c39 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/classify/ReadClassifier.java +++ b/engine/src/main/java/uk/ac/earlham/marti/classify/ReadClassifier.java @@ -76,15 +76,38 @@ public synchronized void addFile(String blastProcessName, int i, String queryFil public boolean checkBLASTCompleted(ReadClassifierItem f, int exitValue) { String blastLogFilename = f.getLogFile(); - - //options.getLog().println("Checking BLAST log for errors "+blastLogFilename); - //options.getLog().println(" Exit value was "+exitValue); - + boolean completed = true; + if (exitValue != 0) { - return false; + completed = false; + //options.getLog().println(" Exit value was "+exitValue); + } else { + options.getLog().println("Checking BLAST log for errors "+blastLogFilename); + try { + File bf = new File(blastLogFilename); + if (bf.exists()) { + BufferedReader br; + String line; + br = new BufferedReader(new FileReader(blastLogFilename)); + while ((line = br.readLine()) != null) { + if (line.toLowerCase().contains("error")) { + options.getLog().printlnLogAndScreen("Error: Stopping due to error message in "+blastLogFilename); + completed = false; + } + } + br.close(); + } else { + options.getLog().println("WARNING: Couldn't find BLAST log "+blastLogFilename); + options.getLog().println("This is usually a bad sign, but continuing to run."); + } + } catch (Exception e) { + System.out.println("checkBLASTCompleted exception"); + e.printStackTrace(); + System.exit(1); + } } - return true; + return completed; } // /** @@ -272,6 +295,7 @@ public synchronized void checkForFilesToClassify() { } else { System.out.println("Error: Failed BLAST "+f.getBlastFile() + " exit value "+ js.getExitValue(thisId)); options.getLog().println("Error: Failed BLAST "+f.getBlastFile()); + js.markJobAsFailed(thisId); } } else { options.getLog().println(MARTiLog.LOGLEVEL_NOTCOMPLETED, "Not completed " + f.blastProcessName + " - " + f.blastFile + " - " + f.getJobId()); diff --git a/engine/src/main/java/uk/ac/earlham/marti/core/MARTiAnalysisRunnable.java b/engine/src/main/java/uk/ac/earlham/marti/core/MARTiAnalysisRunnable.java index 66ec012..e7da302 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/core/MARTiAnalysisRunnable.java +++ b/engine/src/main/java/uk/ac/earlham/marti/core/MARTiAnalysisRunnable.java @@ -70,11 +70,11 @@ public void run() { } } - options.getLog().println("MARTiAnalysisRunnable finalising"); - options.getLog().println("Thread exiting"); + options.getLog().println("MARTiAnalysisRunnable thread exiting"); } public void exitThread() { + options.getLog().printlnLogAndScreen("Exiting MARTiAnalsisRunnable thread"); keepRunning = false; } } diff --git a/engine/src/main/java/uk/ac/earlham/marti/core/MARTiEngine.java b/engine/src/main/java/uk/ac/earlham/marti/core/MARTiEngine.java index b1ddb8d..6321a3b 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/core/MARTiEngine.java +++ b/engine/src/main/java/uk/ac/earlham/marti/core/MARTiEngine.java @@ -8,6 +8,7 @@ import java.util.*; import java.util.concurrent.ThreadPoolExecutor; import java.util.zip.*; +import uk.ac.earlham.marti.amr.WalkOutRead; import uk.ac.earlham.marti.schedule.*; /** @@ -135,6 +136,9 @@ public static void main(String[] args) throws InterruptedException { options.getReadClassifier().initialise(); + // DEBUG - Test WalkoutRead + WalkOutRead wor = new WalkOutRead("test", options, options.getReadClassifier().getTaxonomy()); + process(options); //memoryReport(); diff --git a/engine/src/main/java/uk/ac/earlham/marti/filter/ReadFilterRunnable.java b/engine/src/main/java/uk/ac/earlham/marti/filter/ReadFilterRunnable.java index 60fd640..6ea5973 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/filter/ReadFilterRunnable.java +++ b/engine/src/main/java/uk/ac/earlham/marti/filter/ReadFilterRunnable.java @@ -348,7 +348,7 @@ public void run() { // endChunks(); // } - options.getLog().println("Thread exiting"); + options.getLog().println("ReadFilterRunnable thread exiting"); outputStats(); } diff --git a/engine/src/main/java/uk/ac/earlham/marti/schedule/JobScheduler.java b/engine/src/main/java/uk/ac/earlham/marti/schedule/JobScheduler.java index 2aaddc5..7197004 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/schedule/JobScheduler.java +++ b/engine/src/main/java/uk/ac/earlham/marti/schedule/JobScheduler.java @@ -18,4 +18,5 @@ public interface JobScheduler { public int submitJob(String[] commands, String logFilename, boolean submitJob); public boolean checkJobCompleted(int i); public int getExitValue(int i); + public void markJobAsFailed(int i); } diff --git a/engine/src/main/java/uk/ac/earlham/marti/schedule/SimpleJobScheduler.java b/engine/src/main/java/uk/ac/earlham/marti/schedule/SimpleJobScheduler.java index 249b6ee..2ef6df3 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/schedule/SimpleJobScheduler.java +++ b/engine/src/main/java/uk/ac/earlham/marti/schedule/SimpleJobScheduler.java @@ -8,6 +8,7 @@ import java.lang.management.ManagementFactory; import java.io.File; import java.util.LinkedList; +import java.util.concurrent.ConcurrentHashMap; import uk.ac.earlham.marti.core.MARTiEngineOptions; import uk.ac.earlham.marti.core.MARTiLog; @@ -18,8 +19,10 @@ */ public class SimpleJobScheduler implements JobScheduler { private static final int MAX_QUICK_JOB_ID = 100000; + private ConcurrentHashMap allJobs = new ConcurrentHashMap(); private LinkedList pendingJobs = new LinkedList(); private LinkedList runningJobs = new LinkedList(); + private LinkedList failedJobs = new LinkedList(); //private LinkedList finishedJobs = new LinkedList(); private MARTiLog schedulerLog = new MARTiLog(); private MARTiEngineOptions options; @@ -84,6 +87,7 @@ public synchronized int submitJob(String[] commands, String logFilename, boolean SimpleJobSchedulerJob j = new SimpleJobSchedulerJob(jobId, commands, logFilename, dontRunIt); pendingJobs.add(j); + allJobs.put(jobId, j); schedulerLog.println("Submitted job\t"+jobId+"\t"+j.getCommand()); return jobId++; } @@ -209,6 +213,11 @@ public synchronized int getPendingJobCount() { } public synchronized int getFailedJobCount() { - return 0; + return failedJobs.size(); + } + + public synchronized void markJobAsFailed(int i) { + SimpleJobSchedulerJob ssj = allJobs.get(i); + failedJobs.add(ssj); } } diff --git a/engine/src/main/java/uk/ac/earlham/marti/schedule/SlurmScheduler.java b/engine/src/main/java/uk/ac/earlham/marti/schedule/SlurmScheduler.java index 1057c2e..3d32c29 100644 --- a/engine/src/main/java/uk/ac/earlham/marti/schedule/SlurmScheduler.java +++ b/engine/src/main/java/uk/ac/earlham/marti/schedule/SlurmScheduler.java @@ -188,4 +188,12 @@ public synchronized void manageQueue() { public synchronized int getFailedJobCount() { return failedJobs.size(); } + + public synchronized void markJobAsFailed(int i) { + SlurmSchedulerJob ssj = allJobs.get(i); + if (runningJobs.containsKey(i)) { + runningJobs.remove(i); + } + failedJobs.put(i, ssj); + } }