Skip to content

Commit

Permalink
updated example projects and README files
Browse files Browse the repository at this point in the history
ryandkuster committed Mar 9, 2020
1 parent fd9edfd commit 9a56fd7
Showing 7 changed files with 32 additions and 20 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -99,7 +99,7 @@ Using a text editor, save a file containing any of the following variables as a
|non_genomic|number of non-genomic bases not found in barcode sequence (e.g. 'T' complementary to A-tailing library prep)|integer|
|end_score|end-trim once entire window >= this Q score|integer between 0 and 40|
|window|size of window to test for >= end_trim|integer within read length|
|min_l|minimum read length to retain for end-trimming and adapter removal|integer > 0|
|min_len|minimum read length to retain for end-trimming and adapter removal|integer > 0|
|q_min|Q score minimum (Phred value 0-40) applied to q_percent variable|integer between 0 and 40|
|q_percent|percentage of reads >= q_min Q scores|number between 0 and 100|
|adapter_match|number of base matches to identify adapters (requires 'adapters.txt')|integer (recommend 12)|
@@ -125,6 +125,7 @@ R2_bases_ls = ['TCC', 'TCT']
non_genomic = 1
end_score = 30
window = 10
min_len = 50
q_min = 30
q_percent = 95
```
@@ -134,7 +135,7 @@ q_percent = 95

*In this case, samples were double-digested with AluI and HaeIII and A-tailed before adapter ligation (**R1_bases_ls = ['TCC', 'TCT']** and **R2_bases_ls = ['TCC', 'TCT']**). Only reads containing these motifs will pass to subsequent steps. As the T complement from A-tailing introduces an artificial residue not present in the specimen sequenced, it can simultaneously be removed alongside motif detection (**non_genomic = 1**).*

*Automatic end-trimming will be performed based on Q score. Here, groups of bases are considered within a moving window of 10 bases at a time (**window = 10**) until that window consists only of the desired Q score at or above 30 (**end_score = 30**). It is at this point that the read is trimmed.*
*Automatic end-trimming will be performed based on Q score. Here, groups of bases are considered within a moving window of 10 bases at a time (**window = 10**) until that window consists only of the desired Q score at or above 30 (**end_score = 30**). It is at this point that the read is trimmed. Reads that are less than 50 bp will be discarded (**min_len = 50**)*

*Only reads that have a Q score of 30 (**q_min = 30**) acrosss at least 95 percent of the read (**q_percent = 95**) will pass to subsequent steps. If a R1 read or an R2 read passes while its partner fails, it will be placed into a single-end read subfolder and the failing read will be discarded.*

20 changes: 11 additions & 9 deletions composer.py
Original file line number Diff line number Diff line change
@@ -48,7 +48,7 @@ def __init__(self):
self.non_genomic = False
self.end_score = False
self.window = False
self.min_l = 0
self.min_len = 1
self.adapters = ''
self.adapter_match = False
self.q_min = False
@@ -501,19 +501,19 @@ def scallop_end_multi():
curr = dir_make('end_trimmed')
paired_setup(curr)
scallop_part = partial(scallop_comp, c.in1_ls, c.in2_ls, None, None,
c.end_score, c.window, c.min_l, curr)
c.end_score, c.window, c.min_len, curr)
pool_multi(scallop_part, c.in1_ls)
if c.singles_ls:
scallop_part = partial(scallop_comp, [], [], None, None, c.end_score,
c.window, c.min_l, curr)
c.window, c.min_len, curr)
pool_multi(scallop_part, c.singles_ls)
paired_takedown(curr)
temp_ls = pathfinder(curr)
if c.all_qc:
temp_ls = walkthrough(curr, scallop_end_multi, temp_ls,
end_score=c.end_score,
window=c.window,
min_l=c.min_l)
min_len=c.min_len)
return temp_ls


@@ -524,11 +524,11 @@ def porifera_multi():
curr = dir_make('adapted')
paired_setup(curr)
porifera_part = partial(porifera_comp, curr, c.in1_ls, c.in2_ls,
c.adapters, c.bcs_dict, c.adapter_match, c.min_l)
c.adapters, c.bcs_dict, c.adapter_match, c.min_len)
pool_multi(porifera_part, c.in1_ls)
if c.singles_ls:
porifera_part = partial(porifera_comp, curr, [], [], c.adapters,
c.bcs_dict, c.adapter_match, c.min_l)
c.bcs_dict, c.adapter_match, c.min_len)
pool_multi(porifera_part, c.singles_ls)
paired_takedown(curr)
temp_ls = pathfinder(curr)
@@ -661,7 +661,9 @@ def tidy_up():

def summary_file():
end_time = str(datetime.datetime.now()).split('.')[0]
log = ('ngscomposer version ' + version + '\n\n' +
log = ('ngscomposer version ' + version + '\n' +
'see https://github.com/ryandkuster/ngscomposer/releases '\
'for newest release info\n\n' +
'start ' + c.start_time + '\n' +
'end ' + end_time + '\n\n' +
'paired = ' + str(c.paired) + '\n' +
@@ -679,7 +681,7 @@ def summary_file():
'non_genomic = ' + str(c.non_genomic) + '\n' +
'end_score = ' + str(c.end_score) + '\n' +
'window = ' + str(c.window) + '\n' +
'min_l = ' + str(c.min_l) + '\n' +
'min_len = ' + str(c.min_len) + '\n' +
'adapters = ' + str(c.adapters) + '\n' +
'adapter_match = ' + str(c.adapter_match) + '\n' +
'q_min = ' + str(c.q_min) + '\n' +
@@ -707,7 +709,7 @@ def summary_file():
' rotifer.py - motif detection\n' +
' porifera.py - adapter removal\n\n' +
' krill.py - quality filtering\n' +
'see https://github.com/ryandkuster/ngs-composer for full usage notes\n\n' +
'see https://github.com/ryandkuster/ngscomposer for full usage notes\n\n' +
''), formatter_class=RawTextHelpFormatter)
parser.add_argument('-i', type=str, required=True,
help='the full or relative path to the project directory')
8 changes: 4 additions & 4 deletions examples/project1/conf.py
Original file line number Diff line number Diff line change
@@ -2,15 +2,15 @@
procs = 1
alt_dir = False
initial_qc = True
all_qc = False
all_qc = 'summary'
walkaway = True
front_trim = 6
mismatch = 1
R1_bases_ls = ['TCC', 'TCT']
R2_bases_ls = ['TCC', 'TCT']
non_genomic = 1
q_min = 30
q_percent = 95
trim_mode = False
auto_trim = False
q_percent = 90
end_score = 30
window = 10
rm_transit = True
4 changes: 2 additions & 2 deletions examples/project2/conf.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,6 @@
mismatch = 1
q_min = 30
q_percent = 95
trim_mode = 'quartile'
auto_trim = 30
end_score = 30
window = 10
rm_transit = False
1 change: 1 addition & 0 deletions examples/project3/adapters.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
6 changes: 4 additions & 2 deletions examples/project3/conf.py
Original file line number Diff line number Diff line change
@@ -9,8 +9,10 @@
R1_bases_ls = ['TCC', 'TCT']
R2_bases_ls = ['TCC', 'TCT']
non_genomic = 1
end_score = 30
window = 10
min_len = 100
adapter_match = 12
q_min = 30
q_percent = 95
trim_mode = 'quartile'
auto_trim = 30
rm_transit = True
8 changes: 7 additions & 1 deletion tools/README.md
Original file line number Diff line number Diff line change
@@ -46,6 +46,12 @@ Example:
$ python3 scallop.py -r1 1_R1.fastq -f 6
```

or

```bash
$ python3 scallop.py -r1 1_R1.fastq -w 10 -e 30 -l 50
```

The output files are automatically named with "trimmed" prefix (e.g. "trimmed.1_R1.fastq")

## Anemone - demultiplexing of single-end or paired-end barcoded libraries
@@ -143,7 +149,7 @@ As with Krill, paired-end output files will indicate when pairing has been retai

Example:
```bash
$ python3 porifera.py -r1 1_R1.fastq -a1 adapters.txt -n 18 -m 3
$ python3 porifera.py -r1 1_R1.fastq -a1 adapters.txt -m 12 -k 8 -r 1
```

Example adapter file:

0 comments on commit 9a56fd7

Please sign in to comment.