Skip to content

Commit

Permalink
Merge branch 'no_overhangs'
Browse files Browse the repository at this point in the history
  • Loading branch information
ryandkuster committed Apr 3, 2024
2 parents fc08561 + 28f1810 commit bd78b7c
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 75 deletions.
19 changes: 10 additions & 9 deletions readsynth.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,12 +300,6 @@ def create_adapters(args):
a2 = ['CAAGCAGAAGACGGCATACGAGATGTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG',
'CTGTCTCTTATACACATCTCCGAGCCCACGAGACATCTCGTATGCCGTCTTCTGCTTG',
'rs2']
m1 = list(args.motif_dt1.keys())[0]
m2 = list(args.motif_dt2.keys())[0]
a1[0] = a1[0] + args.m1[0][:args.motif_dt1[m1]]
a1[1] = args.m1[0][args.motif_dt1[m1]+1:] + a1[1]
a2[0] = a2[0] + args.m2[0][:args.motif_dt2[m2]]
a2[1] = args.m2[0][args.motif_dt2[m2]+1:] + a2[1]

return [a1], [a2]

Expand Down Expand Up @@ -545,11 +539,16 @@ def process_df(df, digest_file, args):
df = df.reset_index(drop=True)

"""
add a quick step that removes appropriate over/underhang
add a quick step that calculates length based on the adjusted length
of sequences after removal of appropriate over/underhang
"""

df['seq_backup'] = df['seq'].copy()
df['revc_backup'] = df['revc'].copy()

for mot, front in args.motif_dt.items():
back = args.motif_len[mot] - front

df.loc[(df['m1'] == mot) & (df['reverse'] == 0), 'seq'] = \
df['seq'].str[front:]
if back != 0:
Expand All @@ -572,9 +571,11 @@ def process_df(df, digest_file, args):
df.loc[(df['m2'] == mot) & (df['reverse'] == 1), 'revc'] = \
df['revc'].str[:-back]


df['length'] = df['seq'].str.len()
#df = df[(df['seq'].str.len() > 0) & (df['revc'].str.len() > 0)]
df['seq'] = df['seq_backup']
df['revc'] = df['revc_backup']
df.drop('seq_backup', axis=1, inplace=True)
df.drop('revc_backup', axis=1, inplace=True)

df = df.sort_values(by=['length'])
df = df.reset_index(drop=True)
Expand Down
32 changes: 16 additions & 16 deletions test/test_data/genomes/hhai_hhai_process_df_test.csv
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
seq,start,end,m1,m2,internal,reverse,revc,length
CAAAAAAAAAAGCG,10,24,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CAAAAAAAAAAGCG,48,62,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CAAAAAAAAAAGCG,62,76,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CAAAAAAAAAAGCG,180,194,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CTTTTTTTTTTGCG,10,24,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CTTTTTTTTTTGCG,48,62,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CTTTTTTTTTTGCG,62,76,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CTTTTTTTTTTGCG,180,194,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CAAAAAAAAAAGCGCAAAAAAAAAAGCG,48,76,GCGC,GCGC,1,0,CTTTTTTTTTTGCGCTTTTTTTTTTGCG,28
CAAAAAAAAAATTAAAAAAAAAAAAGCG,100,128,GCGC,GCGC,0,0,CTTTTTTTTTTTTAATTTTTTTTTTGCG,28
CAAAAAAAAAATTAAAAAAAAAAAAGCG,152,180,GCGC,GCGC,0,0,CTTTTTTTTTTTTAATTTTTTTTTTGCG,28
CTTTTTTTTTTGCGCTTTTTTTTTTGCG,48,76,GCGC,GCGC,1,1,CAAAAAAAAAAGCGCAAAAAAAAAAGCG,28
CTTTTTTTTTTTTAATTTTTTTTTTGCG,100,128,GCGC,GCGC,0,1,CAAAAAAAAAATTAAAAAAAAAAAAGCG,28
CTTTTTTTTTTTTAATTTTTTTTTTGCG,152,180,GCGC,GCGC,0,1,CAAAAAAAAAATTAAAAAAAAAAAAGCG,28
CAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCG,152,194,GCGC,GCGC,1,0,CTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCG,42
CTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCG,152,194,GCGC,GCGC,1,1,CAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCG,42
GCGCAAAAAAAAAAGCGC,10,24,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCAAAAAAAAAAGCGC,48,62,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCAAAAAAAAAAGCGC,62,76,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCAAAAAAAAAAGCGC,180,194,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCTTTTTTTTTTGCGC,10,24,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCTTTTTTTTTTGCGC,48,62,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCTTTTTTTTTTGCGC,62,76,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCTTTTTTTTTTGCGC,180,194,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCAAAAAAAAAAGCGCAAAAAAAAAAGCGC,48,76,GCGC,GCGC,1,0,GCGCTTTTTTTTTTGCGCTTTTTTTTTTGCGC,28
GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,100,128,GCGC,GCGC,0,0,GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,28
GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,152,180,GCGC,GCGC,0,0,GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,28
GCGCTTTTTTTTTTGCGCTTTTTTTTTTGCGC,48,76,GCGC,GCGC,1,1,GCGCAAAAAAAAAAGCGCAAAAAAAAAAGCGC,28
GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,100,128,GCGC,GCGC,0,1,GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,28
GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,152,180,GCGC,GCGC,0,1,GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,28
GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCGC,152,194,GCGC,GCGC,1,0,GCGCTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,42
GCGCTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,152,194,GCGC,GCGC,1,1,GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCGC,42
32 changes: 16 additions & 16 deletions test/test_data/genomes/hhai_hhai_raw_digest_test1.fasta.csv
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
seq,start,end,m1,m2,internal,reverse,revc,length
CAAAAAAAAAAGCG,10,24,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CAAAAAAAAAAGCG,48,62,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CAAAAAAAAAAGCG,62,76,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CAAAAAAAAAAGCG,180,194,GCGC,GCGC,0,0,CTTTTTTTTTTGCG,14
CTTTTTTTTTTGCG,10,24,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CTTTTTTTTTTGCG,48,62,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CTTTTTTTTTTGCG,62,76,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CTTTTTTTTTTGCG,180,194,GCGC,GCGC,0,1,CAAAAAAAAAAGCG,14
CAAAAAAAAAAGCGCAAAAAAAAAAGCG,48,76,GCGC,GCGC,1,0,CTTTTTTTTTTGCGCTTTTTTTTTTGCG,28
CAAAAAAAAAATTAAAAAAAAAAAAGCG,100,128,GCGC,GCGC,0,0,CTTTTTTTTTTTTAATTTTTTTTTTGCG,28
CAAAAAAAAAATTAAAAAAAAAAAAGCG,152,180,GCGC,GCGC,0,0,CTTTTTTTTTTTTAATTTTTTTTTTGCG,28
CTTTTTTTTTTGCGCTTTTTTTTTTGCG,48,76,GCGC,GCGC,1,1,CAAAAAAAAAAGCGCAAAAAAAAAAGCG,28
CTTTTTTTTTTTTAATTTTTTTTTTGCG,100,128,GCGC,GCGC,0,1,CAAAAAAAAAATTAAAAAAAAAAAAGCG,28
CTTTTTTTTTTTTAATTTTTTTTTTGCG,152,180,GCGC,GCGC,0,1,CAAAAAAAAAATTAAAAAAAAAAAAGCG,28
CAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCG,152,194,GCGC,GCGC,1,0,CTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCG,42
CTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCG,152,194,GCGC,GCGC,1,1,CAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCG,42
GCGCAAAAAAAAAAGCGC,10,24,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCAAAAAAAAAAGCGC,48,62,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCAAAAAAAAAAGCGC,62,76,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCAAAAAAAAAAGCGC,180,194,GCGC,GCGC,0,0,GCGCTTTTTTTTTTGCGC,14
GCGCTTTTTTTTTTGCGC,10,24,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCTTTTTTTTTTGCGC,48,62,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCTTTTTTTTTTGCGC,62,76,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCTTTTTTTTTTGCGC,180,194,GCGC,GCGC,0,1,GCGCAAAAAAAAAAGCGC,14
GCGCAAAAAAAAAAGCGCAAAAAAAAAAGCGC,48,76,GCGC,GCGC,1,0,GCGCTTTTTTTTTTGCGCTTTTTTTTTTGCGC,28
GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,100,128,GCGC,GCGC,0,0,GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,28
GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,152,180,GCGC,GCGC,0,0,GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,28
GCGCTTTTTTTTTTGCGCTTTTTTTTTTGCGC,48,76,GCGC,GCGC,1,1,GCGCAAAAAAAAAAGCGCAAAAAAAAAAGCGC,28
GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,100,128,GCGC,GCGC,0,1,GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,28
GCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,152,180,GCGC,GCGC,0,1,GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGC,28
GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCGC,152,194,GCGC,GCGC,1,0,GCGCTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,42
GCGCTTTTTTTTTTGCGCTTTTTTTTTTTTAATTTTTTTTTTGCGC,152,194,GCGC,GCGC,1,1,GCGCAAAAAAAAAATTAAAAAAAAAAAAGCGCAAAAAAAAAAGCGC,42
10 changes: 5 additions & 5 deletions test/test_data/genomes/hhai_msei_process_df_test.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
seq,start,end,m1,m2,internal,reverse,revc,length
CAAAAAAAAAAT,100,114,GCGC,TTAA,0,0,TAATTTTTTTTTTGCG,12
CTTTTTTTTTTT,114,128,TTAA,GCGC,0,1,TAAAAAAAAAAAAGCG,12
CAAAAAAAAAAT,152,166,GCGC,TTAA,0,0,TAATTTTTTTTTTGCG,12
CTTTTTTTTTTT,166,180,TTAA,GCGC,0,1,TAAAAAAAAAAAAGCG,12
CTTTTTTTTTTGCGCTTTTTTTTTTT,166,194,TTAA,GCGC,1,1,TAAAAAAAAAAAAGCGCAAAAAAAAAAGCG,26
GCGCAAAAAAAAAATTAA,100,114,GCGC,TTAA,0,0,TTAATTTTTTTTTTGCGC,12
GCGCTTTTTTTTTTTTAA,114,128,TTAA,GCGC,0,1,TTAAAAAAAAAAAAGCGC,12
GCGCAAAAAAAAAATTAA,152,166,GCGC,TTAA,0,0,TTAATTTTTTTTTTGCGC,12
GCGCTTTTTTTTTTTTAA,166,180,TTAA,GCGC,0,1,TTAAAAAAAAAAAAGCGC,12
GCGCTTTTTTTTTTGCGCTTTTTTTTTTTTAA,166,194,TTAA,GCGC,1,1,TTAAAAAAAAAAAAGCGCAAAAAAAAAAGCGC,26
10 changes: 5 additions & 5 deletions test/test_data/genomes/hhai_msei_raw_digest_test1.fasta.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
seq,start,end,m1,m2,internal,reverse,revc,length
CAAAAAAAAAAT,100,114,GCGC,TTAA,0,0,TAATTTTTTTTTTGCG,12
CTTTTTTTTTTT,114,128,TTAA,GCGC,0,1,TAAAAAAAAAAAAGCG,12
CAAAAAAAAAAT,152,166,GCGC,TTAA,0,0,TAATTTTTTTTTTGCG,12
CTTTTTTTTTTT,166,180,TTAA,GCGC,0,1,TAAAAAAAAAAAAGCG,12
CTTTTTTTTTTGCGCTTTTTTTTTTT,166,194,TTAA,GCGC,1,1,TAAAAAAAAAAAAGCGCAAAAAAAAAAGCG,26
GCGCAAAAAAAAAATTAA,100,114,GCGC,TTAA,0,0,TTAATTTTTTTTTTGCGC,12
GCGCTTTTTTTTTTTTAA,114,128,TTAA,GCGC,0,1,TTAAAAAAAAAAAAGCGC,12
GCGCAAAAAAAAAATTAA,152,166,GCGC,TTAA,0,0,TTAATTTTTTTTTTGCGC,12
GCGCTTTTTTTTTTTTAA,166,180,TTAA,GCGC,0,1,TTAAAAAAAAAAAAGCGC,12
GCGCTTTTTTTTTTGCGCTTTTTTTTTTTTAA,166,194,TTAA,GCGC,1,1,TTAAAAAAAAAAAAGCGCAAAAAAAAAAGCGC,26
28 changes: 4 additions & 24 deletions test/test_readsynth.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,31 +142,11 @@ def test_get_adapters_1(self):

def test_create_adapters_1(self):
args = Variables()
args.motif_dt1 = {'GCGC': 3}
args.motif_dt2 = {'TTAA': 1}
args.m1 = ['GCG/C']
args.m2 = ['T/TAA']
a1 = [['AATGATACGGCGACCACCGAGATCTACACTCGTCGGCAGCGTCAGATGTGTATAAGAGACAGGCG',
'CCTGTCTCTTATACACATCTGACGCTGCCGACGAGTGTAGATCTCGGTGGTCGCCGTATCATT',
'rs1']]
a2 = [['CAAGCAGAAGACGGCATACGAGATGTCTCGTGGGCTCGGAGATGTGTATAAGAGACAGT',
'TAACTGTCTCTTATACACATCTCCGAGCCCACGAGACATCTCGTATGCCGTCTTCTGCTTG',
'rs2']]
args.a1, args.a2 = rs.create_adapters(args)
self.assertEqual(args.a1, a1)
self.assertEqual(args.a2, a2)

def test_create_adapters_2(self):
args = Variables()
args.motif_dt1 = {'GAATTC': 1}
args.motif_dt2 = {'TTAA': 1}
args.m1 = ['G/AATTC']
args.m2 = ['T/TAA']
a1 = [['AATGATACGGCGACCACCGAGATCTACACTCGTCGGCAGCGTCAGATGTGTATAAGAGACAGG',
'AATTCCTGTCTCTTATACACATCTGACGCTGCCGACGAGTGTAGATCTCGGTGGTCGCCGTATCATT',
a1 = [['AATGATACGGCGACCACCGAGATCTACACTCGTCGGCAGCGTCAGATGTGTATAAGAGACAG',
'CTGTCTCTTATACACATCTGACGCTGCCGACGAGTGTAGATCTCGGTGGTCGCCGTATCATT',
'rs1']]
a2 = [['CAAGCAGAAGACGGCATACGAGATGTCTCGTGGGCTCGGAGATGTGTATAAGAGACAGT',
'TAACTGTCTCTTATACACATCTCCGAGCCCACGAGACATCTCGTATGCCGTCTTCTGCTTG',
a2 = [['CAAGCAGAAGACGGCATACGAGATGTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG',
'CTGTCTCTTATACACATCTCCGAGCCCACGAGACATCTCGTATGCCGTCTTCTGCTTG',
'rs2']]
args.a1, args.a2 = rs.create_adapters(args)
self.assertEqual(args.a1, a1)
Expand Down
2 changes: 2 additions & 0 deletions vignettes/helius/run_readsynth.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mkdir ecori_msei_10M_output

python3 ../../readsynth.py \
-g abundances.csv \
-o ecori_msei_10M_output/ \
Expand Down
2 changes: 2 additions & 0 deletions vignettes/small_test/run_readsynth.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mkdir small_ecori_msei_1M_output

python3 ../../readsynth.py \
-g abundances.csv \
-o small_ecori_msei_1M_output/ \
Expand Down
2 changes: 2 additions & 0 deletions vignettes/small_test/run_readsynth_error.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mkdir small_ecori_msei_1M_error_output

python3 ../readsynth/readsynth.py \
-g abundances.csv \
-o small_ecori_msei_1M_error_output/ \
Expand Down

0 comments on commit bd78b7c

Please sign in to comment.