Skip to content

Commit

Permalink
adds support for multiple directory paths
Browse files Browse the repository at this point in the history
  • Loading branch information
brwnj committed Jun 22, 2018
1 parent 69df713 commit ad61564
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 26 deletions.
34 changes: 22 additions & 12 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,31 @@ we can annotate across SILVA using:
--reference-database silva \
mothur_sop_data

.. tip::
The data directory can optionally be a pattern containing a wildcard,
such as::

hundo annotate \
--filter-adapters qc_references/adapters.fa.gz \
--filter-contaminants qc_references/phix174.fa.gz \
--out-dir mothur_sop_silva \
--database-dir annotation_references \
--reference-database silva \
'mothur_sop_data/F3D14*S20*.fastq.gz'
The data directory can optionally be a pattern containing a wildcard,
such as::

The string must be contained between single quotes so it isn't expanded
into a space delimited list.
hundo annotate \
--filter-adapters qc_references/adapters.fa.gz \
--filter-contaminants qc_references/phix174.fa.gz \
--out-dir mothur_sop_silva \
--database-dir annotation_references \
--reference-database silva \
'mothur_sop_data/F3D14*S20*.fastq.gz'

The string must be contained between single quotes so it isn't expanded
into a space delimited list.

Or when data is spread across multiple directories, you can use a combination
of paths and patterns in a comma separated list, like::

hundo annotate \
--filter-adapters qc_references/adapters.fa.gz \
--filter-contaminants qc_references/phix174.fa.gz \
--out-dir mothur_sop_silva \
--database-dir annotation_references \
--reference-database silva \
'collection1/LM_*.fastq.gz,collection2/rawdata'

Dependencies are installed by default in the results directory defined
on the command line as ``--out-dir``. If you want to re-use dependencies
Expand Down
29 changes: 17 additions & 12 deletions hundo/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,36 @@ def count_sequences(filename):
return sum(buf.count(b'\n') for buf in f_gen) / 4


def get_sample_files(fastq_dir, prefilter_file_size):
if not fastq_dir:
def get_sample_files(fastq_dirs, prefilter_file_size):
if not fastq_dirs:
logger.error(
(
"'fastq_dir' has not been set -- this directory "
"should contain your input FASTQs"
)
)
sys.exit(1)
if os.path.isfile(fastq_dir):
if os.path.isfile(fastq_dirs):
logger.error(
(
"'fastq_dir' must be a directory or a file pattern, "
"'fastq_dir' must be a directory, a file pattern, or a comma "
"separated list of both of those things "
"e.g. '*.fastq', with single quotes surrounding the pattern."
)
)
sys.exit(1)
if "*" in fastq_dir:
from glob import glob

logger.info("Finding samples matching %s" % fastq_dir)
raw_fastq_files = glob(fastq_dir)
else:
logger.info("Finding samples in %s" % fastq_dir)
raw_fastq_files = [os.path.join(fastq_dir, i) for i in os.listdir(fastq_dir)]
# grab all of the possible fastq file paths
# user is permitted to send a single path, a single pattern, or a
# comma separated list of paths and patterns
raw_fastq_files = list()
for fastq_dir in fastq_dirs.split(","):
if "*" in fastq_dir:
from glob import glob
logger.info("Finding samples matching %s" % fastq_dir)
raw_fastq_files.extend(glob(fastq_dir))
else:
logger.info("Finding samples in %s" % fastq_dir)
raw_fastq_files.extend([os.path.join(fastq_dir, i) for i in os.listdir(fastq_dir)])
samples = dict()
seen = set()
omitted = dict()
Expand Down
2 changes: 1 addition & 1 deletion hundo/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.15"
__version__ = "1.1.16"
6 changes: 5 additions & 1 deletion hundo/hundo.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,10 @@ def run_annotate(
\b
https://hundo.rtfd.io
"""
fq_dir = list()
for input_dir in fastq_dir.replace(" ", "").split(","):
fq_dir.append(os.path.realpath(input_dir))
fq_dir = ",".join(fq_dir)
database_dir = os.path.realpath(database_dir)
filter_adapters = os.path.realpath(filter_adapters) if filter_adapters else ""
filter_contaminants = os.path.realpath(
Expand Down Expand Up @@ -565,7 +569,7 @@ def run_annotate(
jobs=jobs,
conda="" if no_conda else "--use-conda",
dryrun="--dryrun" if dryrun else "",
fq_dir=os.path.realpath(fastq_dir),
fq_dir=fq_dir,
author=author,
threads=threads,
database_dir=database_dir,
Expand Down

0 comments on commit ad61564

Please sign in to comment.