Skip to content

Commit

Permalink
Merge pull request #5 from aylingm/master
Browse files Browse the repository at this point in the history
Merged Martin's Final Pull Request
  • Loading branch information
richardmleggett authored Dec 13, 2017
2 parents bf4c30a + 12b4831 commit 346ce5f
Show file tree
Hide file tree
Showing 173 changed files with 18,499 additions and 4,442 deletions.
Empty file removed .Rhistory
Empty file.
2 changes: 2 additions & 0 deletions .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ obj
bin
src/cortex/metacortex_old.c
src/cortex/metacortex_new.c
.Rhistory
.DS_Store
Empty file modified LICENSE
100644 → 100755
Empty file.
100 changes: 48 additions & 52 deletions Makefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,52 +5,47 @@ ifndef CC
endif

ifdef MAC
# Change the following to point to your GCC binary
CC=/usr/local/Cellar/gcc/4.9.2_1/bin/gcc-4.9
#MACFLAG = -fnested-functions
$(warning On MacOS, make sure you set CC to point to your GCC binary - LLVM won't compile MetaCortex)
$(warning e.g. export CC=/usr/local/Cellar/gcc/7.1.0/bin/gcc-7)
endif

BIN = bin
LIB = lib

ifeq ($(MAXK),31)
BITFIELDS = 1
endif

ifeq ($(MAXK),63)
BITFIELDS = 2
endif

ifeq ($(MAXK),95)
BITFIELDS = 3
endif

ifeq ($(MAXK),127)
BITFIELDS = 4
endif

ifeq ($(MAXK),160)
BITFIELDS = 5
endif

ifeq ($(MAXK),192)
BITFIELDS = 6
endif

ifeq ($(MAXK),223)
BITFIELDS = 7
endif

ifeq ($(MAXK),255)
BITFIELDS = 8
endif

ifndef BITFIELDS
BITFIELDS = 1
MAXK = 31
endif

BIN_SUFFIX = $(MAXK)
ifndef MAXK
MAXK = 31
endif

ifeq ($(shell expr $(MAXK) \< 32),1)
BITFIELDS = 1
BIN_SUFFIX=31
else ifeq ($(shell expr $(MAXK) \< 64),1)
BITFIELDS = 2
BIN_SUFFIX=63
else ifeq ($(shell expr $(MAXK) \< 96),1)
BITFIELDS = 3
BIN_SUFFIX=95
else ifeq ($(shell expr $(MAXK) \< 128),1)
BITFIELDS = 4
BIN_SUFFIX=127
else ifeq ($(shell expr $(MAXK) \< 161),1)
BITFIELDS = 5
BIN_SUFFIX=159
else ifeq ($(shell expr $(MAXK) \< 193),1)
BITFIELDS = 6
BIN_SUFFIX=191
else ifeq ($(shell expr $(MAXK) \< 224),1)
BITFIELDS = 7
BIN_SUFFIX=223
else ifeq ($(shell expr $(MAXK) \< 256),1)
BITFIELDS = 8
BIN_SUFFIX=255
else
BITFIELDS = 1
BIN_SUFFIX=31
endif

#BIN_SUFFIX = $(MAXK)

# Main program includes
IDIR_BASIC =include/basic
Expand Down Expand Up @@ -89,10 +84,11 @@ ifdef 32_BITS
endif

# Compiler options
OPT = $(ARCH) -Wall -O3 $(MACFLAG) -DNUMBER_OF_BITFIELDS_IN_BINARY_KMER=$(BITFIELDS) -pthread -g
OPT = $(ARCH) -Wall -O3 -DNUMBER_OF_BITFIELDS_IN_BINARY_KMER=$(BITFIELDS) -pthread -g
#-Wno-duplicate-decl-specifier

ifdef DEBUG
OPT = $(ARCH) -Wall -O0 $(MACFLAG) -DNUMBER_OF_BITFIELDS_IN_BINARY_KMER=$(BITFIELDS) -g -pthread
OPT = $(ARCH) -Wall -O0 -DNUMBER_OF_BITFIELDS_IN_BINARY_KMER=$(BITFIELDS) -g -pthread
endif

ifdef DEBUG_PRINT_LABELS
Expand All @@ -114,7 +110,7 @@ CFLAGS_METACORTEX_TESTS = -I$(IDIR_CUNIT) $(CFLAGS_CUNIT) -I$(IDIR_BASIC) -I$(ID
CFLAGS_BASIC_TESTS = -I$(IDIR_CUNIT) $(CFLAGS_CUNIT) -I$(IDIR_BASIC) -I$(IDIR_BASIC_TESTS)

# Program objects
METACORTEX_OBJ = obj/cortex/file_format.o obj/cortex/flags.o obj/cortex/cleaning.o obj/cortex/path.o obj/cortex/perfect_path.o obj/cortex/branches.o obj/cortex/y_walk.o obj/cortex/cmd_line.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_value.o obj/cortex/hash_table.o obj/cortex/dB_graph.o obj/cortex/file_reader.o obj/cortex/metacortex.o obj/cortex/logger.o obj/cortex/metagraphs.o obj/cortex/coverage_walk.o obj/util/node_queue.o obj/cortex/graph_stats.o obj/cortex/bubble_find.o
METACORTEX_OBJ = obj/cortex/file_format.o obj/cortex/flags.o obj/cortex/cleaning.o obj/cortex/path.o obj/cortex/perfect_path.o obj/cortex/branches.o obj/cortex/y_walk.o obj/cortex/cmd_line.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_value.o obj/cortex/hash_table.o obj/cortex/dB_graph.o obj/cortex/file_reader.o obj/cortex/metacortex.o obj/cortex/logger.o obj/cortex/metagraphs.o obj/cortex/coverage_walk.o obj/util/node_queue.o obj/cortex/graph_stats.o obj/cortex/bubble_find.o obj/cortex/report_output.o
KMERINFO_OBJ = obj/cortex/flags.o obj/cortex/path.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_table.o obj/cortex/file_reader.o obj/util/kmerinfo.o obj/cortex/logger.o obj/cortex/hash_value.o
GRAPHOUT_OBJ = obj/cortex/flags.o obj/cortex/path.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_table.o obj/cortex/file_reader.o obj/cortex/dB_graph.o obj/util/graphout.o obj/cortex/perfect_path.o obj/cortex/logger.o obj/cortex/hash_value.o obj/util/graph_formats.o obj/util/node_queue.o obj/cortex/cleaning.o obj/cortex/coverage_walk.o obj/util/graph_tools.o obj/cortex/file_format.o
FILTERREADS_OBJ = obj/util/filter_reads.o obj/cortex/flags.o obj/cortex/path.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_table.o obj/cortex/file_reader.o obj/cortex/dB_graph.o obj/cortex/perfect_path.o obj/cortex/logger.o obj/cortex/hash_value.o obj/util/node_queue.o obj/cortex/cleaning.o obj/cortex/file_format.o
Expand All @@ -123,29 +119,29 @@ HASH_TABLE_TESTS_OBJ = obj/cortex/flags.o obj/test/run_hash_table_tests.o obj/co
GRAPH_TESTS_OBJ = obj/cortex/branches.o obj/cortex/file_format.o obj/test/test_dB_graph.o obj/cortex/logger.o obj/cortex/cleaning.o obj/cortex/perfect_path.o obj/cortex/path.o obj/cortex/flags.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_value.o obj/cortex/hash_table.o obj/cortex/dB_graph.o obj/cortex/file_reader.o obj/cortex/y_walk.o obj/test/test_file_reader.o obj/test/test_graph_element.o obj/test/run_dB_graph_tests.o

#Library objects
LIBRARY_OBJ = obj/cortex/file_format.o obj/cortex/analysis.o obj/cortex/flags.o obj/cortex/cleaning.o obj/cortex/path.o obj/cortex/perfect_path.o obj/cortex/branches.o obj/cortex/y_walk.o obj/cortex/cmd_line.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_value.o obj/cortex/hash_table.o obj/cortex/dB_graph.o obj/cortex/file_reader.o obj/cortex/metacortex.o obj/cortex/logger.o obj/cortex/metagraphs.o obj/cortex/coverage_walk.o obj/util/node_queue.o obj/cortex/graph_stats.o obj/cortex/bubble_find.o
LIBRARY_OBJ = obj/cortex/file_format.o obj/cortex/analysis.o obj/cortex/flags.o obj/cortex/cleaning.o obj/cortex/path.o obj/cortex/perfect_path.o obj/cortex/branches.o obj/cortex/y_walk.o obj/cortex/cmd_line.o obj/cortex/binary_kmer.o obj/cortex/seq.o obj/cortex/element.o obj/cortex/hash_value.o obj/cortex/hash_table.o obj/cortex/dB_graph.o obj/cortex/file_reader.o obj/cortex/metacortex.o obj/cortex/logger.o obj/cortex/metagraphs.o obj/cortex/coverage_walk.o obj/util/node_queue.o obj/cortex/graph_stats.o obj/cortex/bubble_find.o obj/cortex/report_output.o

# Main rules
metacortex : remove_objects $(METACORTEX_OBJ)
mkdir -p $(BIN); $(CC) -lm $(OPT) $(OPT_COLS) -o $(BIN)/metacortex_k$(BIN_SUFFIX) $(METACORTEX_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) $(OPT_COLS) -o $(BIN)/metacortex_k$(BIN_SUFFIX) $(METACORTEX_OBJ) -lm

kmerinfo: remove_objects $(KMERINFO_OBJ)
mkdir -p $(BIN); $(CC) -lm $(OPT) -o $(BIN)/kmerinfo $(KMERINFO_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) -o $(BIN)/kmerinfo $(KMERINFO_OBJ) -lm

graphout:remove_objects $(GRAPHOUT_OBJ)
mkdir -p $(BIN); $(CC) -lm $(OPT) -o $(BIN)/graphout_$(BIN_SUFFIX) $(GRAPHOUT_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) -o $(BIN)/graphout_$(BIN_SUFFIX) $(GRAPHOUT_OBJ) -lm

filterreads:remove_objects $(FILTERREADS_OBJ)
mkdir -p $(BIN); $(CC) -lm $(OPT) -o $(BIN)/filterreads_$(BIN_SUFFIX) $(FILTERREADS_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) -o $(BIN)/filterreads_$(BIN_SUFFIX) $(FILTERREADS_OBJ) -lm

run_basic_tests : remove_objects $(BASIC_TESTS_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) $(CFLAGS_CUNIT) $(CFLAGS_BASIC_TESTS) -lcunit -o $(BIN)/run_basic_tests_$(MAXK)$(READ_PAIR_SUFFIX) $(BASIC_TESTS_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) $(CFLAGS_CUNIT) $(CFLAGS_BASIC_TESTS) -lcunit -o $(BIN)/run_basic_tests_$(BIN_SUFFIX)$(READ_PAIR_SUFFIX) $(BASIC_TESTS_OBJ)

run_hash_table_tests : remove_objects $(HASH_TABLE_TESTS_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) $(CFLAGS_CUNIT) $(CFLAGS_HASH_TABLE_TESTS) -lcunit -o $(BIN)/run_hash_table_tests_$(MAXK) $(HASH_TABLE_TESTS_OBJ)
mkdir -p $(BIN); $(CC) $(OPT) $(CFLAGS_CUNIT) $(CFLAGS_HASH_TABLE_TESTS) -lcunit -o $(BIN)/run_hash_table_tests_$(BIN_SUFFIX) $(HASH_TABLE_TESTS_OBJ)

run_graph_tests : remove_objects $(GRAPH_TESTS_OBJ)
mkdir -p $(BIN); $(CC) $(LINKOPT) $(CFLAGS_CUNIT) -o $(BIN)/run_graph_tests_$(MAXK) $(GRAPH_TESTS_OBJ) -lcunit
mkdir -p $(BIN); $(CC) $(LINKOPT) $(CFLAGS_CUNIT) -o $(BIN)/run_graph_tests_$(BIN_SUFFIX) $(GRAPH_TESTS_OBJ) -lcunit

tests: remove_objects run_basic_tests run_hash_table_tests run_graph_tests

Expand Down
Empty file modified README.md
100644 → 100755
Empty file.
Empty file modified binders/ruby/Bio/Cortex.rb
100644 → 100755
Empty file.
Empty file modified binders/ruby/test.rb
100644 → 100755
Empty file.
Empty file modified data/test/basic/long_entries.fasta
100644 → 100755
Empty file.
Empty file modified data/test/basic/one_long_entry.fasta
100644 → 100755
Empty file.
Empty file modified data/test/count_kmers/filelist_repeats_in_same_entry
100644 → 100755
Empty file.
Empty file modified data/test/graph/chrom1.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/chrom2.fasta
100644 → 100755
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file modified data/test/graph/file_to_test_covg_of_reads.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/filelist_fasta_to_load_and_dump_as_bin
100644 → 100755
Empty file.
Empty file modified data/test/graph/generate_bubble_with_unequal_branch_sizes.fa
100644 → 100755
Empty file.
Empty file modified data/test/graph/list_paired_end_file3_left
100644 → 100755
Empty file.
Empty file modified data/test/graph/list_paired_end_file3_right
100644 → 100755
Empty file.
Empty file modified data/test/graph/long_entries.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/paired_end_file1_1.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/paired_end_file1_2.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/paired_end_file2_with_dup_1.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/paired_end_file2_with_dup_2.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/paired_end_file3_with_dups_1.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/paired_end_file3_with_dups_2.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/person.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/person2.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/person3.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/person3_with_errors.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/person3_with_errors_extended_file.fastq
100644 → 100755
Empty file.
Empty file modified data/test/graph/person_with_deletion_in_chrom.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/person_with_sv.fasta
100644 → 100755
Empty file.
Empty file modified data/test/graph/test_db_graph_intermediate_output_file
100644 → 100755
Empty file.
Empty file modified data/test/read_pair/rp_1.fastq
100644 → 100755
Empty file.
Empty file modified data/test/read_pair/rp_2.fastq
100644 → 100755
Empty file.
Empty file modified data/test/read_pair/rp_test.txt
100644 → 100755
Empty file.
Empty file modified data/test/solid/simple_test.csfasta
100644 → 100755
Empty file.
Empty file modified data/test/solid/simple_test_two_reads.csfasta
100644 → 100755
Empty file.
Empty file modified data/test/solid/simple_test_with_comment.csfasta
100644 → 100755
Empty file.
Empty file modified data/test/solid/test1.txt
100644 → 100755
Empty file.
Empty file modified data/test/solid/test2.txt
100644 → 100755
Empty file.
Empty file modified data/test/solid/test3.txt
100644 → 100755
Empty file.
Empty file modified data/test/solid/test_one_sec_multiple_input.csfasta
100644 → 100755
Empty file.
Empty file modified data/test/solid/test_one_sec_multiple_input.fa
100644 → 100755
Empty file.
Empty file modified data/test/solid/test_one_seq_cs.fa
100644 → 100755
Empty file.
Empty file modified data/test/solid/test_one_seq_cs.txt
100644 → 100755
Empty file.
Empty file modified data/test/solid/test_two_sec_multiple_input.csfasta
100644 → 100755
Empty file.
Empty file modified data/test/solid/test_two_sec_multiple_input.fa
100644 → 100755
Empty file.
Empty file modified data/test/solid/test_two_secs_multiple_input.fa
100644 → 100755
Empty file.
93 changes: 93 additions & 0 deletions degree_plots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
library(ggplot2)
library(scales)
library(grid)


#filename="/home/aylingm/testgenome/patronol/contigs_46_100k.1_clean.fa.degrees"
#filename="/home/aylingm/testgenome/mc_consensus_3k/contigs.fa.degrees"

cmd_args = commandArgs(TRUE);
filename=cmd_args[1];
#print(filename)

format="png"
# these need to be fixed! use dirname(), and check for directory existence, mkdir if not there
graphsdir=paste(dirname(filename), "/graphs", sep="");
if(!dir.exists(graphsdir)){
dir.create(graphsdir, showWarnings = TRUE, recursive = FALSE, mode = "0777")
}

#print(graphsdir)

types = c("I", "Y", "X","E");
min_graph=100 # need to have this as a variable that can be passed to the script
scaling_factor=100
bin_size=0.0001
title_font=7
decimal_points=4

degrees <- read.delim(filename)
if (min(degrees$total)<min_graph){
min_graph<-min(degrees$total)-1
}
degrees <- degrees[degrees$total>min_graph,]


for (t in 1:4) {
x_high<-c(0.95,1)
x_low<-c(0,0.05)
type = types[t];
if (type=="I"){
Is<-data.frame(degrees$for.1.rev.1.)
degree_dist<-data.frame(Is)
x_min<-x_high[1]
x_max<-x_high[2]
}
else if (type=="Y"){
Ys<-data.frame(degrees$for.1.rev.2., degrees$for.1.rev.3., degrees$for.1.rev.4., degrees$for.2.rev.1., degrees$for.3.rev.1., degrees$for.4.rev.1.)
degree_dist<-data.frame(rowMeans(Ys))
x_min<-x_low[1]
x_max<-x_low[2]
}
else if (type=="X"){
Xs<-data.frame(degrees$for.1.rev.2., degrees$for.1.rev.3., degrees$for.1.rev.4., degrees$for.2.rev.1., degrees$for.3.rev.1., degrees$for.4.rev.1.,degrees$for.2.rev.2., degrees$for.2.rev.3., degrees$for.2.rev.4., degrees$for.3.rev.3., degrees$for.3.rev.2., degrees$for.3.rev.4., degrees$for.4.rev.4., degrees$for.4.rev.2., degrees$for.4.rev.3.)
degree_dist<-data.frame(rowMeans(Xs))
x_min<-x_low[1]
x_max<-x_low[2]
}
else if (type=="E"){
Es<-data.frame(degrees$for.0.rev.0., degrees$for.0.rev.1., degrees$for.0.rev.2., degrees$for.0.rev.3., degrees$for.0.rev.4., degrees$for.1.rev.0., degrees$for.2.rev.0., degrees$for.3.rev.0., degrees$for.4.rev.0.)
degree_dist<-data.frame(rowMeans(Es))
x_min<-x_low[1]
x_max<-x_low[2]
}

degrees_png <- paste(graphsdir, "/", type,"_degrees.png", sep="");
png(degrees_png, width=1200, height=800)
degree_dist<-degree_dist*scaling_factor
sd_dd<-sd(degree_dist[,1])
mean_dd<-mean(degree_dist[,1])
print(ggplot(degree_dist, aes(degree_dist)) + ggtitle(paste(type ,"nodes probability,", "mean=",round(mean_dd,decimal_points),"sd=",round(sd_dd,decimal_points))) +
xlab("probability (%)") + ylab("Subgraph Count") + geom_histogram(binwidth=bin_size*scaling_factor) +
scale_x_continuous(breaks=seq(0, 1*scaling_factor, bin_size*10*scaling_factor), limits=c(x_min*scaling_factor, x_max*scaling_factor))) +
theme(plot.title = element_text(size=title_font))
garbage <- dev.off()
}

type="E"
degrees.not_meaned<-data.frame(degrees[degrees$total>min_graph,-26])
degrees.m<-data.frame("FOR"=rep(c(0:4),each = 5),"REV"=rep(0:4,5), data.frame(colMeans(degrees[degrees$total>min_graph,-26]*scaling_factor))) # manually 'melt' the average for each point
names(degrees.m)[3]<-"prob"
degrees_png <- paste(graphsdir, "/", type,"_heatmap.png", sep="")
png(degrees_png, width=1200, height=800)
print(ggplot(degrees.m, aes(x=FOR, y=REV)) + geom_tile(aes(fill = prob), colour = "white") + ggtitle(paste(filename, "_", type,"_heatmap")) + theme(plot.title = element_text(size=title_font)) + scale_fill_gradient(low = "white", high = "steelblue", trans='log'))
garbage <- dev.off()

degrees.not_meaned<-data.frame(degrees[degrees$total>min_graph,-26])
degrees.not_meaned$for.1.rev.1.<-0
degrees.m<-data.frame("FOR"=rep(c(0:4),each = 5),"REV"=rep(0:4,5), data.frame(colMeans(degrees.not_meaned*scaling_factor))) # manually 'melt' the average for each point
names(degrees.m)[3]<-"prob"
degrees_png <- paste(graphsdir, "/", type,"_heatmap_noI.png", sep="")
png(degrees_png, width=1200, height=800)
print(ggplot(degrees.m, aes(x=FOR, y=REV)) + geom_tile(aes(fill = prob), colour = "white")+ ggtitle(paste(filename, "_", type,"_heatmap (ex. I nodes)")) + theme(plot.title = element_text(size=title_font))+ scale_fill_gradient(low = "white", high = "steelblue", trans='log'))
garbage <- dev.off()
Empty file modified doc/metacortexmanual.pdf
100644 → 100755
Empty file.
Empty file modified doc/metacortexmanual.tex
100644 → 100755
Empty file.
Empty file modified doc/typicalprocess.pdf
100644 → 100755
Empty file.
Empty file modified gpl.txt
100644 → 100755
Empty file.
Loading

0 comments on commit 346ce5f

Please sign in to comment.