updated example projects and README files

ryandkuster · Mar 9, 2020 · 9a56fd7 · 9a56fd7
1 parent fd9edfd
commit 9a56fd7
Showing 7 changed files with 32 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -99,7 +99,7 @@ Using a text editor, save a file containing any of the following variables as a
 |non_genomic|number of non-genomic bases not found in barcode sequence (e.g. 'T' complementary to A-tailing library prep)|integer|
 |end_score|end-trim once entire window >= this Q score|integer between 0 and 40|
 |window|size of window to test for >= end_trim|integer within read length|
-|min_l|minimum read length to retain for end-trimming and adapter removal|integer > 0|
+|min_len|minimum read length to retain for end-trimming and adapter removal|integer > 0|
 |q_min|Q score minimum (Phred value 0-40) applied to q_percent variable|integer between 0 and 40|
 |q_percent|percentage of reads >= q_min Q scores|number between 0 and 100|
 |adapter_match|number of base matches to identify adapters (requires 'adapters.txt')|integer (recommend 12)|
@@ -125,6 +125,7 @@ R2_bases_ls = ['TCC', 'TCT']
 non_genomic = 1
 end_score = 30
 window = 10
+min_len = 50
 q_min = 30
 q_percent = 95
 ```
@@ -134,7 +135,7 @@ q_percent = 95
 
 *In this case, samples were double-digested with AluI and HaeIII and A-tailed before adapter ligation (**R1_bases_ls = ['TCC', 'TCT']** and **R2_bases_ls = ['TCC', 'TCT']**). Only reads containing these motifs will pass to subsequent steps. As the T complement from A-tailing introduces an artificial residue not present in the specimen sequenced, it can simultaneously be removed alongside motif detection (**non_genomic = 1**).*
 
-*Automatic end-trimming will be performed based on Q score. Here, groups of bases are considered within a moving window of 10 bases at a time (**window = 10**) until that window consists only of the desired Q score at or above 30 (**end_score = 30**). It is at this point that the read is trimmed.*
+*Automatic end-trimming will be performed based on Q score. Here, groups of bases are considered within a moving window of 10 bases at a time (**window = 10**) until that window consists only of the desired Q score at or above 30 (**end_score = 30**). It is at this point that the read is trimmed. Reads that are less than 50 bp will be discarded (**min_len = 50**)*
 
 *Only reads that have a Q score of 30 (**q_min = 30**) acrosss at least 95 percent of the read (**q_percent = 95**) will pass to subsequent steps. If a R1 read or an R2 read passes while its partner fails, it will be placed into a single-end read subfolder and the failing read will be discarded.*
 

diff --git a/composer.py b/composer.py
@@ -48,7 +48,7 @@ def __init__(self):
         self.non_genomic = False
         self.end_score = False
         self.window = False
-        self.min_l = 0
+        self.min_len = 1
         self.adapters = ''
         self.adapter_match = False
         self.q_min = False
@@ -501,19 +501,19 @@ def scallop_end_multi():
     curr = dir_make('end_trimmed')
     paired_setup(curr)
     scallop_part = partial(scallop_comp, c.in1_ls, c.in2_ls, None, None,
-                           c.end_score, c.window, c.min_l, curr)
+                           c.end_score, c.window, c.min_len, curr)
     pool_multi(scallop_part, c.in1_ls)
     if c.singles_ls:
         scallop_part = partial(scallop_comp, [], [], None, None, c.end_score,
-                               c.window, c.min_l, curr)
+                               c.window, c.min_len, curr)
         pool_multi(scallop_part, c.singles_ls)
     paired_takedown(curr)
     temp_ls = pathfinder(curr)
     if c.all_qc:
         temp_ls = walkthrough(curr, scallop_end_multi, temp_ls,
                               end_score=c.end_score,
                               window=c.window,
-                              min_l=c.min_l)
+                              min_len=c.min_len)
     return temp_ls
 
 
@@ -524,11 +524,11 @@ def porifera_multi():
     curr = dir_make('adapted')
     paired_setup(curr)
     porifera_part = partial(porifera_comp, curr, c.in1_ls, c.in2_ls,
-                            c.adapters, c.bcs_dict, c.adapter_match, c.min_l)
+                            c.adapters, c.bcs_dict, c.adapter_match, c.min_len)
     pool_multi(porifera_part, c.in1_ls)
     if c.singles_ls:
         porifera_part = partial(porifera_comp, curr, [], [], c.adapters,
-                                c.bcs_dict, c.adapter_match, c.min_l)
+                                c.bcs_dict, c.adapter_match, c.min_len)
         pool_multi(porifera_part, c.singles_ls)
     paired_takedown(curr)
     temp_ls = pathfinder(curr)
@@ -661,7 +661,9 @@ def tidy_up():
 
 def summary_file():
     end_time = str(datetime.datetime.now()).split('.')[0]
-    log = ('ngscomposer version ' + version + '\n\n' +
+    log = ('ngscomposer version ' + version + '\n' +
+           'see https://github.com/ryandkuster/ngscomposer/releases '\
+           'for newest release info\n\n' +
            'start ' + c.start_time + '\n' +
            'end   ' + end_time + '\n\n' +
            'paired = ' + str(c.paired) + '\n' +
@@ -679,7 +681,7 @@ def summary_file():
            'non_genomic = ' + str(c.non_genomic) + '\n' +
            'end_score = ' + str(c.end_score) + '\n' +
            'window = ' + str(c.window) + '\n' +
-           'min_l = ' + str(c.min_l) + '\n' +
+           'min_len = ' + str(c.min_len) + '\n' +
            'adapters = ' + str(c.adapters) + '\n' +
            'adapter_match = ' + str(c.adapter_match) + '\n' +
            'q_min = ' + str(c.q_min) + '\n' +
@@ -707,7 +709,7 @@ def summary_file():
         '    rotifer.py  - motif detection\n' +
         '    porifera.py - adapter removal\n\n' +
         '    krill.py    - quality filtering\n' +
-        'see https://github.com/ryandkuster/ngs-composer for full usage notes\n\n' +
+        'see https://github.com/ryandkuster/ngscomposer for full usage notes\n\n' +
         ''), formatter_class=RawTextHelpFormatter)
     parser.add_argument('-i', type=str, required=True,
                         help='the full or relative path to the project directory')

diff --git a/examples/project1/conf.py b/examples/project1/conf.py
@@ -2,15 +2,15 @@
 procs = 1
 alt_dir = False
 initial_qc = True
-all_qc = False
+all_qc = 'summary'
 walkaway = True
 front_trim = 6
 mismatch = 1
 R1_bases_ls = ['TCC', 'TCT']
 R2_bases_ls = ['TCC', 'TCT']
 non_genomic = 1
 q_min = 30
-q_percent = 95
-trim_mode = False
-auto_trim = False
+q_percent = 90
+end_score = 30
+window = 10
 rm_transit = True
diff --git a/examples/project2/conf.py b/examples/project2/conf.py
@@ -8,6 +8,6 @@
 mismatch = 1
 q_min = 30
 q_percent = 95
-trim_mode = 'quartile'
-auto_trim = 30
+end_score = 30
+window = 10
 rm_transit = False
diff --git a/examples/project3/adapters.txt b/examples/project3/adapters.txt
@@ -0,0 +1 @@
+ACACTCTTTCCCTACACGACGCTCTTCCGATCT
diff --git a/examples/project3/conf.py b/examples/project3/conf.py
@@ -9,8 +9,10 @@
 R1_bases_ls = ['TCC', 'TCT']
 R2_bases_ls = ['TCC', 'TCT']
 non_genomic = 1
+end_score = 30
+window = 10
+min_len = 100
+adapter_match = 12
 q_min = 30
 q_percent = 95
-trim_mode = 'quartile'
-auto_trim = 30
 rm_transit = True
diff --git a/tools/README.md b/tools/README.md
@@ -46,6 +46,12 @@ Example:
 $ python3 scallop.py -r1 1_R1.fastq -f 6
 ```
 
+or
+
+```bash
+$ python3 scallop.py -r1 1_R1.fastq -w 10 -e 30 -l 50
+```
+
 The output files are automatically named with "trimmed" prefix (e.g. "trimmed.1_R1.fastq")
 
 ## Anemone - demultiplexing of single-end or paired-end barcoded libraries
@@ -143,7 +149,7 @@ As with Krill, paired-end output files will indicate when pairing has been retai
 
 Example:
 ```bash
-$ python3 porifera.py -r1 1_R1.fastq -a1 adapters.txt -n 18 -m 3
+$ python3 porifera.py -r1 1_R1.fastq -a1 adapters.txt -m 12 -k 8 -r 1 
 ```
 
 Example adapter file: