From c15abd2677f76717ba8043c46a87023a0a0179f4 Mon Sep 17 00:00:00 2001 From: Milton Silva Date: Mon, 3 Aug 2020 11:03:10 +0100 Subject: [PATCH] Add scripts to run datasets --- benchmark/run_ref.sh | 22 +- benchmark/run_reseq.sh | 429 +++++++++++++++++++++++++++++++++++++++ benchmark/run_vgeco2.sh | 12 +- benchmark/run_virome.sh | 280 +++++++++++++++++++++++++ benchmark/run_vothers.sh | 344 ++++++++++++++++--------------- 5 files changed, 899 insertions(+), 188 deletions(-) create mode 100755 benchmark/run_reseq.sh create mode 100755 benchmark/run_virome.sh diff --git a/benchmark/run_ref.sh b/benchmark/run_ref.sh index 08a897d..bc43c8b 100755 --- a/benchmark/run_ref.sh +++ b/benchmark/run_ref.sh @@ -12,7 +12,7 @@ PARSE=0; RUN_GDC2=0; RUN_IDOCOMP=0; RUN_HRCM=0; -RUN_GECO2=0; +RUN_GECO2=1; RUN_GECO3=0; ############################################################################### # @@ -34,22 +34,26 @@ if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then cp ../../datasets/$1 . cp ../../datasets/$2 . rm -f xxx* - (time ./GDC2 c xxx $2 $1 ) &> ../../results/C_GDC_$1-$2 + (/usr/bin/time -v ./GDC2 c xxx $2 $1 ) &> ../../results/C_GDC_$1-$2 ls -la xxx.gdc2_rc | awk '{ print $5;}' > ../../results/BC_GDC_$1-$2 rm -f $2 $1 xxx*; fi } # function RunGeCo2 { - PARAM=" -rm 20:500:1:35:0.95/3:100:0.95 -rm 13:200:1:1:0.95/0:0:0 -rm 10:10:0:0:0.95/0:0:0 "; + PARAMR=" -rm 20:500:1:35:0.95/3:100:0.95 -rm 13:200:1:1:0.95/0:0:0 -rm 10:10:0:0:0.95/0:0:0 -lr 0.03 -hs 64 "; + PARAMH=" -rm 20:500:1:35:0.95/3:100:0.95 -rm 13:200:1:1:0.95/0:0:0 -rm 10:10:0:0:0.95/0:0:0 -tm 4:1:0:1:0.9/0:0:0 -tm 17:100:1:10:0.95/2:20:0.95 -lr 0.03 -hs 64 "; # 1 - TARGET # 2 - REFERENCE if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then cp ../../datasets/$1 . cp ../../datasets/$2 . rm -f $1.co - (time ./GeCo2 $PARAM -r $2 $1 ) &> ../../results/C_GECO2_REFO_$1-$2 - ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO2_REFO_$1-$2 + (/usr/bin/time -v ./GeCo2 $PARAMR -r $2 $1 ) &> ../../results/C_GECO2R_REFO_$1-$2 + ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO2R_REFO_$1-$2 + + (/usr/bin/time -v ./GeCo2 $PARAMH -r $2 $1 ) &> ../../results/C_GECO2H_REFO_$1-$2 + ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO2H_REFO_$1-$2 rm -f $2 $1 $1.co; fi } @@ -63,10 +67,10 @@ if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then cp ../../datasets/$1 . cp ../../datasets/$2 . rm -f $1.co - (time ./GeCo3 $PARAMR -r $2 $1 ) &> ../../results/C_GECO3R_REFO_$1-$2 + (/usr/bin/time -v ./GeCo3 $PARAMR -r $2 $1 ) &> ../../results/C_GECO3R_REFO_$1-$2 ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO3R_REFO_$1-$2 - (time ./GeCo3 $PARAMH -r $2 $1 ) &> ../../results/C_GECO3H_REFO_$1-$2 + (/usr/bin/time -v ./GeCo3 $PARAMH -r $2 $1 ) &> ../../results/C_GECO3H_REFO_$1-$2 ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO3H_REFO_$1-$2 rm -f $2 $1 $1.co; fi @@ -79,7 +83,7 @@ if test -f "../../datasets-hrcm/$1.fasta" && test -f "../../datasets-hrcm/$2.fas cp ../../datasets-hrcm/$1.fasta . cp ../../datasets-hrcm/$2.fasta . rm -f $1.7z - (time ./hrcm compress -r $2.fasta -t $1.fasta ) &> ../../results/C_HRCM_REF_$1-$2 + (/usr/bin/time -v ./hrcm compress -r $2.fasta -t $1.fasta ) &> ../../results/C_HRCM_REF_$1-$2 ls -la $1.7z | awk '{ print $5;}' > ../../results/BC_HRCM_REF_$1-$2 rm -f $2.fasta $1.fasta $1.7z; fi @@ -98,7 +102,7 @@ if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then (./generateSA.sh ref sa ) &> TIME_SA TIMEOFSA=`cat TIME_SA | grep "..." | awk '{ print $5;}'` echo "ref/$2.fa tar/$1.fa sa/$2.sa" > f.txt; - (./iDoComp.run c f.txt OUT ) &> ../../../results/C_IDOCOMP_$1-$2 + (/usr/bin/time -v ./iDoComp.run c f.txt OUT ) &> ../../../results/C_IDOCOMP_$1-$2 cat ../../../results/C_IDOCOMP_$1-$2 | grep "Compressed Size:" | awk '{ print $3; }' > ../../../results/BC_IDOCOMP_$1-$2 CTIME=`cat ../../../results/C_IDOCOMP_$1-$2 | grep "CPU T" | awk '{print $4;}'` echo "$TIMEOFSA+$CTIME" | bc -l > ../../../results/CT_IDOCOMP_$1-$2 diff --git a/benchmark/run_reseq.sh b/benchmark/run_reseq.sh new file mode 100755 index 0000000..abca615 --- /dev/null +++ b/benchmark/run_reseq.sh @@ -0,0 +1,429 @@ +#!/bin/bash +############################################################################### +INSTALL_GDC2=0; +INSTALL_IDOCOMP=0; +INSTALL_HRCM=0; +INSTALL_GECO2=0; +INSTALL_GECO3=0; +############################################################################### +DOWNLOAD=0; +PARSE=1; +############################################################################### +RUN_GDC2=1; +RUN_IDOCOMP=1; +RUN_HRCM=1; +RUN_GECO2=1; +RUN_GECO3=1; +############################################################################### +# +function Parse { +if test -f "$1.fasta"; then + cat $1.fasta | grep -v ">" | tr -d -c "ACGT" > XTMP + cat HEADER XTMP > datasets/$1; + cp datasets/$1 datasets-hrcm/$1.fasta + fold -w80 datasets-hrcm/$1.fasta > XTMP + cp XTMP datasets-hrcm/$1.fasta + rm XTMP; +fi +} +# +function RunGDC2 { + # 1 - TARGET + # 2 - REFERENCE +if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then + cp ../../datasets/$1 . + cp ../../datasets/$2 . + rm -f xxx* + (/usr/bin/time -v ./GDC2 c xxx $2 $1 ) &> ../../results/C_GDC_$1-$2 + ls -la xxx.gdc2_rc | awk '{ print $5;}' > ../../results/BC_GDC_$1-$2 + rm -f $2 $1 xxx*; +fi +} +# +function RunGeCo2 { + PARAMR=" -rm 20:500:1:35:0.95/3:100:0.95 -rm 13:200:1:1:0.95/0:0:0 -rm 10:10:0:0:0.95/0:0:0 -lr 0.03 -hs 64 "; + PARAMH=" -rm 20:500:1:35:0.95/3:100:0.95 -rm 13:200:1:1:0.95/0:0:0 -rm 10:10:0:0:0.95/0:0:0 -tm 4:1:0:1:0.9/0:0:0 -tm 17:100:1:10:0.95/2:20:0.95 -lr 0.03 -hs 64 "; + # 1 - TARGET + # 2 - REFERENCE +if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then + cp ../../datasets/$1 . + cp ../../datasets/$2 . + rm -f $1.co + (/usr/bin/time -v ./GeCo2 $PARAMR -r $2 $1 ) &> ../../results/C_GECO2R_REFO_$1-$2 + ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO2R_REFO_$1-$2 + + (/usr/bin/time -v ./GeCo2 $PARAMH -r $2 $1 ) &> ../../results/C_GECO2H_REFO_$1-$2 + ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO2H_REFO_$1-$2 + rm -f $2 $1 $1.co; +fi +} + +function RunGeCo3 { + PARAMR=" -rm 20:500:1:35:0.95/3:100:0.95 -rm 13:200:1:1:0.95/0:0:0 -rm 10:10:0:0:0.95/0:0:0 -lr 0.03 -hs 64 "; + PARAMH=" -rm 20:500:1:35:0.95/3:100:0.95 -rm 13:200:1:1:0.95/0:0:0 -rm 10:10:0:0:0.95/0:0:0 -tm 4:1:0:1:0.9/0:0:0 -tm 17:100:1:10:0.95/2:20:0.95 -lr 0.03 -hs 64 "; + # 1 - TARGET + # 2 - REFERENCE +if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then + cp ../../datasets/$1 . + cp ../../datasets/$2 . + rm -f $1.co + (/usr/bin/time -v ./GeCo3 $PARAMR -r $2 $1 ) &> ../../results/C_GECO3R_REFO_$1-$2 + ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO3R_REFO_$1-$2 + + (/usr/bin/time -v ./GeCo3 $PARAMH -r $2 $1 ) &> ../../results/C_GECO3H_REFO_$1-$2 + ls -la $1.co | awk '{ print $5;}' > ../../results/BC_GECO3H_REFO_$1-$2 + rm -f $2 $1 $1.co; +fi +} + +function RunHRCM { + # 1 - TARGET + # 2 - REFERENCE +if test -f "../../datasets-hrcm/$1.fasta" && test -f "../../datasets-hrcm/$2.fasta"; then + cp ../../datasets-hrcm/$1.fasta . + cp ../../datasets-hrcm/$2.fasta . + rm -f $1.7z + (/usr/bin/time -v ./hrcm compress -r $2.fasta -t $1.fasta ) &> ../../results/C_HRCM_REF_$1-$2 + ls -la $1.7z | awk '{ print $5;}' > ../../results/BC_HRCM_REF_$1-$2 + rm -f $2.fasta $1.fasta $1.7z; +fi +} +# +function RunIDoComp { + # 1 - TARGET + # 2 - REFERENCE +if test -f "../../datasets/$1" && test -f "../../datasets/$2"; then + cd sais-lite-2.4.1/ + rm -fr sa ref tar + mkdir sa ref tar; + mkdir tmp_oneline_ref; + cp ../../../datasets/$1 tar/$1.fa + cp ../../../datasets/$2 ref/$2.fa + (./generateSA.sh ref sa ) &> TIME_SA + TIMEOFSA=`cat TIME_SA | grep "..." | awk '{ print $5;}'` + echo "ref/$2.fa tar/$1.fa sa/$2.sa" > f.txt; + (/usr/bin/time -v ./iDoComp.run c f.txt OUT ) &> ../../../results/C_IDOCOMP_$1-$2 + cat ../../../results/C_IDOCOMP_$1-$2 | grep "Compressed Size:" | awk '{ print $3; }' > ../../../results/BC_IDOCOMP_$1-$2 + CTIME=`cat ../../../results/C_IDOCOMP_$1-$2 | grep "CPU T" | awk '{print $4;}'` + echo "$TIMEOFSA+$CTIME" | bc -l > ../../../results/CT_IDOCOMP_$1-$2 + rm -fr sa ref tar tmp_oneline_ref + cd .. +fi +} +# +############################################################################### +# INSTALL +mkdir -p datasets-hrcm +mkdir -p datasets +mkdir -p progs +cd progs/ +############################################################################### +# GET iDoComp +#================================================================= +if [[ "$INSTALL_IDOCOMP" -eq "1" ]]; then + rm -fr iDoComp/ + git clone https://github.com/mikelhernaez/iDoComp.git + cd iDoComp/iDoComp_website_v1/sais-lite-2.4.1/source-code/ + gcc -o ../sa.run sa_generator.c sais.c -lm + cd ../../simulations/source_code/ + gcc -o ../../../sais-lite-2.4.1/iDoComp.run idc_generate_mapping.c main.c stats.c arith.c \ + fasta_decompressor.c idc_load_chr.c os_stream.c fasta_compressor.c \ + sam_stream.c -lm + cd ../../../../ +fi +############################################################################### +# GET GECO2 +#==================================================================== +if [[ "$INSTALL_GECO2" -eq "1" ]]; then + rm -fr geco2/ + git clone https://github.com/cobilab/geco2.git + cd geco2/src/ + cp Makefile.linux Makefile + sed -i 's/U32 garbage;//g' defs.h # fix for gcc 10 + sed -i 's/garbage =//g' gede2.c # fix for gcc 10 + make + cp GeCo2 ../ + cp GeDe2 ../ + cd ../../ +fi +############################################################################### +# GET GECO3 +#==================================================================== +if [[ "$INSTALL_GECO3" -eq "1" ]]; then + rm -fr geco3/ + git clone https://github.com/cobilab/geco3.git + cd geco3/src/ + make + cp GeCo3 ../ + cp GeDe3 ../ + cd ../../ +fi +############################################################################### +# GET GDC2 +#===================================================================== +if [[ "$INSTALL_GDC2" -eq "1" ]]; then + rm -fr GDC2/ + git clone https://github.com/refresh-bio/GDC2.git + cd GDC2/gdc_2/Gdc2/ + # LIBRARIES ORDER ACCESS CREATE SOME PROBLES (WE ADD THEM TO THE END) + printf '\nall: gdc2 \n\nCC = g++\nCFLAGS = -Wall -O3 -m64 -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -std=c++11\nCLINK = -lm -O3 -m64 -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -std=c++11 -lz \n\n.cpp.o: \n\t$(CC) $(CFLAGS) -c $< -o $@ \n\ngdc2: c1stage.o c2stage.o fasta.o hasher.o main.o p1stage.o qsmodel.o queue.o rangecod.o timer.o \n\t$(CC) $(CLINK) -o gdc2 c1stage.o c2stage.o fasta.o hasher.o main.o p1stage.o qsmodel.o queue.o rangecod.o timer.o ../libs/libaelf64.a -lz -lpthread \n\nclean: \n\trm gdc2 \n\trm *.o \n' > Makefile; + make clean ; make + cp gdc2 ../../GDC2 # TO NOT OVERLAP FOLDER NAME + cd ../../../ +fi +############################################################################### +# GET HRCM +#==================================================================== +if [[ "$INSTALL_HRCM" -eq "1" ]]; then + rm -fr HRCM/ + git clone https://github.com/haicy/HRCM.git + cd HRCM/ + chmod +x 7za + make + cd ../ +fi +############################################################################## +cd .. +############################################################################### +# DOWNLOAD +if [[ "$DOWNLOAD" -eq "1" ]]; then + echo "Downloading ..." +fi +# PARSE +#======================================================================= +if [[ "$PARSE" -eq "1" ]]; then + echo "Parsing ..." + echo ">X" > HEADER + # + Parse "HSK1_C1" + Parse "HSK1_C2" + Parse "HSK1_C3" + Parse "HSK1_C4" + Parse "HSK1_C5" + Parse "HSK1_C6" + Parse "HSK1_C7" + Parse "HSK1_C8" + Parse "HSK1_C9" + Parse "HSK1_C10" + Parse "HSK1_C11" + Parse "HSK1_C12" + Parse "HSK1_C13" + Parse "HSK1_C14" + Parse "HSK1_C15" + Parse "HSK1_C16" + Parse "HSK1_C17" + Parse "HSK1_C18" + Parse "HSK1_C19" + Parse "HSK1_C20" + Parse "HSK1_C21" + Parse "HSK1_C22" + Parse "HSK1_X" + Parse "HSK1_Y" + Parse "HSK1_M" + # + Parse "HSK2_C1" + Parse "HSK2_C2" + Parse "HSK2_C3" + Parse "HSK2_C4" + Parse "HSK2_C5" + Parse "HSK2_C6" + Parse "HSK2_C7" + Parse "HSK2_C8" + Parse "HSK2_C9" + Parse "HSK2_C10" + Parse "HSK2_C11" + Parse "HSK2_C12" + Parse "HSK2_C13" + Parse "HSK2_C14" + Parse "HSK2_C15" + Parse "HSK2_C16" + Parse "HSK2_C17" + Parse "HSK2_C18" + Parse "HSK2_C19" + Parse "HSK2_C20" + Parse "HSK2_C21" + Parse "HSK2_C22" + Parse "HSK2_X" + Parse "HSK2_Y" + Parse "HSK2_M" + # + echo "done!"; + fi +# +# RUN +#========================================================================= +if [[ "$RUN_HRCM" -eq "1" ]]; then + echo "Running HRCM ..."; + mkdir -p results + cd progs/HRCM + # target $1, reference $2: + RunHRCM "HSK1_C1" "HSK2_C1" + #RunHRCM "HSK1_C2" "HSK2_C2" + #RunHRCM "HSK1_C3" "HSK2_C3" + #RunHRCM "HSK1_C4" "HSK2_C4" + #RunHRCM "HSK1_C5" "HSK2_C5" + #RunHRCM "HSK1_C6" "HSK2_C6" + #RunHRCM "HSK1_C7" "HSK2_C7" + #RunHRCM "HSK1_C8" "HSK2_C8" + #RunHRCM "HSK1_C9" "HSK2_C9" + #RunHRCM "HSK1_C10" "HSK2_C10" + #RunHRCM "HSK1_C11" "HSK2_C11" + #RunHRCM "HSK1_C12" "HSK2_C12" + #RunHRCM "HSK1_C13" "HSK2_C13" + #RunHRCM "HSK1_C14" "HSK2_C14" + #RunHRCM "HSK1_C15" "HSK2_C15" + #RunHRCM "HSK1_C16" "HSK2_C16" + #RunHRCM "HSK1_C17" "HSK2_C17" + #RunHRCM "HSK1_C18" "HSK2_C18" + #RunHRCM "HSK1_C19" "HSK2_C19" + #RunHRCM "HSK1_C20" "HSK2_C20" + #RunHRCM "HSK1_C21" "HSK2_C21" + #RunHRCM "HSK1_C22" "HSK2_C22" + #RunHRCM "HSK1_X" "HSK2_X" + #RunHRCM "HSK1_Y" "HSK2_Y" + #RunHRCM "HSK1_M" "HSK2_M" + # + cd ../../ + echo "Done!"; +fi + +if [[ "$RUN_GDC2" -eq "1" ]]; then + echo "Running GDC2 ..."; + mkdir -p results + cd progs/GDC2 + # target $1, reference $2: + RunGDC2 "HSK1_C1" "HSK2_C1" + #RunGDC2 "HSK1_C2" "HSK2_C2" + #RunGDC2 "HSK1_C3" "HSK2_C3" + #RunGDC2 "HSK1_C4" "HSK2_C4" + #RunGDC2 "HSK1_C5" "HSK2_C5" + #RunGDC2 "HSK1_C6" "HSK2_C6" + #RunGDC2 "HSK1_C7" "HSK2_C7" + #RunGDC2 "HSK1_C8" "HSK2_C8" + #RunGDC2 "HSK1_C9" "HSK2_C9" + #RunGDC2 "HSK1_C10" "HSK2_C10" + #RunGDC2 "HSK1_C11" "HSK2_C11" + #RunGDC2 "HSK1_C12" "HSK2_C12" + #RunGDC2 "HSK1_C13" "HSK2_C13" + #RunGDC2 "HSK1_C14" "HSK2_C14" + #RunGDC2 "HSK1_C15" "HSK2_C15" + #RunGDC2 "HSK1_C16" "HSK2_C16" + #RunGDC2 "HSK1_C17" "HSK2_C17" + #RunGDC2 "HSK1_C18" "HSK2_C18" + #RunGDC2 "HSK1_C19" "HSK2_C19" + #RunGDC2 "HSK1_C20" "HSK2_C20" + #RunGDC2 "HSK1_C21" "HSK2_C21" + #RunGDC2 "HSK1_C22" "HSK2_C22" + #RunGDC2 "HSK1_X" "HSK2_X" + #RunGDC2 "HSK1_Y" "HSK2_Y" + #RunGDC2 "HSK1_M" "HSK2_M" + # + cd ../../ + echo "Done!"; +fi +#============================================================================== +if [[ "$RUN_IDOCOMP" -eq "1" ]]; then + echo "Running iDoComp ..."; + mkdir -p results + cd progs/iDoComp + # target $1, reference $2: + RunIDoComp "HSK1_C1" "HSK2_C1" + #RunIDoComp "HSK1_C2" "HSK2_C2" + #RunIDoComp "HSK1_C3" "HSK2_C3" + #RunIDoComp "HSK1_C4" "HSK2_C4" + #RunIDoComp "HSK1_C5" "HSK2_C5" + #RunIDoComp "HSK1_C6" "HSK2_C6" + #RunIDoComp "HSK1_C7" "HSK2_C7" + #RunIDoComp "HSK1_C8" "HSK2_C8" + #RunIDoComp "HSK1_C9" "HSK2_C9" + #RunIDoComp "HSK1_C10" "HSK2_C10" + #RunIDoComp "HSK1_C11" "HSK2_C11" + #RunIDoComp "HSK1_C12" "HSK2_C12" + #RunIDoComp "HSK1_C13" "HSK2_C13" + #RunIDoComp "HSK1_C14" "HSK2_C14" + #RunIDoComp "HSK1_C15" "HSK2_C15" + #RunIDoComp "HSK1_C16" "HSK2_C16" + #RunIDoComp "HSK1_C17" "HSK2_C17" + #RunIDoComp "HSK1_C18" "HSK2_C18" + #RunIDoComp "HSK1_C19" "HSK2_C19" + #RunIDoComp "HSK1_C20" "HSK2_C20" + #RunIDoComp "HSK1_C21" "HSK2_C21" + #RunIDoComp "HSK1_C22" "HSK2_C22" + #RunIDoComp "HSK1_X" "HSK2_X" + #RunIDoComp "HSK1_Y" "HSK2_Y" + #RunIDoComp "HSK1_M" "HSK2_M" + # + cd ../../ + echo "Done!"; +fi +#============================================================================== +if [[ "$RUN_GECO2" -eq "1" ]]; then + echo "Running GeCo2 ..."; + mkdir -p results + cd progs/geco2 + # target $1, reference $2: + RunGeCo2 "HSK1_C1" "HSK2_C1" + RunGeCo2 "HSK1_C2" "HSK2_C2" + RunGeCo2 "HSK1_C3" "HSK2_C3" + RunGeCo2 "HSK1_C4" "HSK2_C4" + RunGeCo2 "HSK1_C5" "HSK2_C5" + RunGeCo2 "HSK1_C6" "HSK2_C6" + RunGeCo2 "HSK1_C7" "HSK2_C7" + RunGeCo2 "HSK1_C8" "HSK2_C8" + RunGeCo2 "HSK1_C9" "HSK2_C9" + RunGeCo2 "HSK1_C10" "HSK2_C10" + RunGeCo2 "HSK1_C11" "HSK2_C11" + RunGeCo2 "HSK1_C12" "HSK2_C12" + RunGeCo2 "HSK1_C13" "HSK2_C13" + RunGeCo2 "HSK1_C14" "HSK2_C14" + RunGeCo2 "HSK1_C15" "HSK2_C15" + RunGeCo2 "HSK1_C16" "HSK2_C16" + RunGeCo2 "HSK1_C17" "HSK2_C17" + RunGeCo2 "HSK1_C18" "HSK2_C18" + RunGeCo2 "HSK1_C19" "HSK2_C19" + RunGeCo2 "HSK1_C20" "HSK2_C20" + RunGeCo2 "HSK1_C21" "HSK2_C21" + RunGeCo2 "HSK1_C22" "HSK2_C22" + RunGeCo2 "HSK1_X" "HSK2_X" + RunGeCo2 "HSK1_Y" "HSK2_Y" + RunGeCo2 "HSK1_M" "HSK2_M" + # + cd ../../ + echo "Done!"; +fi + +if [[ "$RUN_GECO3" -eq "1" ]]; then + echo "Running GeCo3 ..."; + mkdir -p results + cd progs/geco3 + # target $1, reference $2: + RunGeCo3 "HSK1_C1" "HSK2_C1" + #RunGeCo3 "HSK1_C2" "HSK2_C2" + #RunGeCo3 "HSK1_C3" "HSK2_C3" + #RunGeCo3 "HSK1_C4" "HSK2_C4" + #RunGeCo3 "HSK1_C5" "HSK2_C5" + #RunGeCo3 "HSK1_C6" "HSK2_C6" + #RunGeCo3 "HSK1_C7" "HSK2_C7" + #RunGeCo3 "HSK1_C8" "HSK2_C8" + #RunGeCo3 "HSK1_C9" "HSK2_C9" + #RunGeCo3 "HSK1_C10" "HSK2_C10" + #RunGeCo3 "HSK1_C11" "HSK2_C11" + #RunGeCo3 "HSK1_C12" "HSK2_C12" + #RunGeCo3 "HSK1_C13" "HSK2_C13" + #RunGeCo3 "HSK1_C14" "HSK2_C14" + #RunGeCo3 "HSK1_C15" "HSK2_C15" + #RunGeCo3 "HSK1_C16" "HSK2_C16" + #RunGeCo3 "HSK1_C17" "HSK2_C17" + #RunGeCo3 "HSK1_C18" "HSK2_C18" + #RunGeCo3 "HSK1_C19" "HSK2_C19" + #RunGeCo3 "HSK1_C20" "HSK2_C20" + #RunGeCo3 "HSK1_C21" "HSK2_C21" + #RunGeCo3 "HSK1_C22" "HSK2_C22" + #RunGeCo3 "HSK1_X" "HSK2_X" + #RunGeCo3 "HSK1_Y" "HSK2_Y" + #RunGeCo3 "HSK1_M" "HSK2_M" + # + cd ../../ + echo "Done!"; +fi +############################################################################### diff --git a/benchmark/run_vgeco2.sh b/benchmark/run_vgeco2.sh index 9bbda55..280670d 100755 --- a/benchmark/run_vgeco2.sh +++ b/benchmark/run_vgeco2.sh @@ -15,9 +15,9 @@ function RunGeCo2 { if test -f "../../ds/$2"; then cp ../../ds/$2 . rm -f $2.co - (time ./GeCo2 $1 $2 ) &> ../../res/C_GECO2_$2 - ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO2_$2 - rm -f $2 $2.co; + (/usr/bin/time -v ./GeCo2 $1 $2 ) &> ../../res/C_GECO2_LO_$2 + ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO2_LO_$2 + #rm -f $2 $2.co; fi } @@ -27,9 +27,9 @@ function RunGeCo3 { if test -f "../../ds/$2"; then cp ../../ds/$2 . rm -f $2.co - (time ./GeCo3 $1 $2 ) &> ../../res/C_GECO3_$2 - ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO3_$2 - rm -f $2 $2.co; + (/usr/bin/time -v ./GeCo3 $1 $2 ) &> ../../res/C_GECO3_LO_$2 + ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO3_LO_$2 + #rm -f $2 $2.co; fi } ############################################################################### diff --git a/benchmark/run_virome.sh b/benchmark/run_virome.sh new file mode 100755 index 0000000..59d5ce2 --- /dev/null +++ b/benchmark/run_virome.sh @@ -0,0 +1,280 @@ +#!/bin/bash +############################################################################### +INSTALL_NAF=0; +INSTALL_GECO2=0; +INSTALL_GECO3=0; +############################################################################### +DOWNLOAD=0; +############################################################################### +RUN_NAF=0; +RUN_GECO2=1; +RUN_GECO3=1; +# +function RunGeCo2 { + # 1 - params + # 2 - seq +if test -f "../../ds/$2"; then + cp ../../ds/$2 . + rm -f $2.co + (/usr/bin/time -v ./GeCo2 -v $1 $2 ) &> ../../res/C_GECO2_l16_$2 + ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO2_l16_$2 + rm -f $2 $2.co; +fi +} + +function RunGeCo3 { + # 1 - params + # 2 - seq +if test -f "../../ds/$2"; then + cp ../../ds/$2 . + rm -f $2.co + (/usr/bin/time -v ./GeCo3 -v $1 $2 ) &> ../../res/C_GECO3_l16_$2 + ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO3_l16_$2 + rm -f $2 $2.co; +fi +} + +function RunNAF { + # 1 - seq +if test -f "../../ds/$1"; then + echo ">" > $1.fasta + cat ../../ds/$1 >> $1.fasta + rm -f $1.fasta.naf + (/usr/bin/time -v ./ENNAF --temp-dir /tmp -22 $1.fasta -o $1.naf ) &> ../../res/C_NAF_$1 + ls -la $1.naf | awk '{ print $5;}' > ../../res/BC_NAF_$1 + rm -f $1.fasta $1.naf; +fi +} + +function RunXM { + # 1 - seq +if test -f "../../ds/$1"; then + echo ">" > $1.fasta + cat ../../ds/$1 >> $1.fasta + + rm -f $1.xm + (/usr/bin/time -v ./XM --real=$1.xm $1.fasta ) &> ../../res/C_XM_$1 + ls -la $1.xm | awk '{ print $5;}' > ../../res/BC_XM_$1 + rm -f $1.fasta $1.xm; +fi +} + +function RunJARVIS { + # 1 - params + # 2 - seq +if test -f "../../ds/$2"; then + cp ../../ds/$2 . + rm -f $2.jc + (/usr/bin/time -v ./JARVIS $1 $2 ) &> ../../res/C_JARVIS_$2 + ls -la $2.jc | awk '{ print $5;}' > ../../res/BC_JARVIS_$2 + rm -f $2 $2.jc; +fi +} + +function RunCMIX { + # 1 - seq +if test -f "../../ds/$1"; then + cp ../../ds/$1 . + rm -f $1.cmix + (/usr/bin/time -v ./cmix -c $1 $1.cmix ) &> ../../res/C_CMIX_$1 + ls -la $1.cmix | awk '{ print $5;}' > ../../res/BC_CMIX_$1 + rm -f $1 $1.cmix; +fi +} + +function RunDEEPZIP { + # 1 - seq +if test -f "../../ds/$1"; then + source tf/bin/activate + rm data/processed_files/*param.json data/processed_files/*npy + rm -rf data/files_to_be_compressed/ + #rm -rf data/compressed/ + mkdir -p data/files_to_be_compressed/ + mkdir -p data/compressed/ + cp ../../ds/$1 data/files_to_be_compressed/$1 + cd data + ./run_parser.sh + cd ../src + (/usr/bin/time -v ./run_experiments.sh biGRU ) &> ../../../res/C_DEEPZIP_$1 + cd ../data/compressed/ + ls -la $1/biGRU.compressed.combined | awk '{ print $5;}' > ../../../../res/BC_DEEPZIP_$1 + cd ../.. +fi +} + +function RunZPAQ { + # 1 - seq +if test -f "../../ds/$1"; then + cp ../../ds/$1 . + rm -f $1.zpaq + (/usr/bin/time -v ./zpaq a $1.zpaq $1 -m5 ) &> ../../res/C_ZPAQ_$1 + ls -la $1.zpaq | awk '{ print $5;}' > ../../res/BC_ZPAQ_$1 + rm -f $1 $1.zpaq; +fi +} + +# +############################################################################### +# INSTALL +mkdir -p ds +mkdir -p progs +cd progs/ +############################################################################### +# GET GECO2 +#==================================================================== +if [[ "$INSTALL_GECO2" -eq "1" ]]; then + rm -fr geco2/ + git clone https://github.com/cobilab/geco2.git + cd geco2/src/ + cp Makefile.linux Makefile + sed -i 's/U32 garbage;//g' defs.h # fix for gcc 10 + sed -i 's/garbage =//g' gede2.c # fix for gcc 10 + make + cp GeCo2 ../ + cp GeDe2 ../ + cd ../../ +fi +############################################################################### +# GET GECO3 +#==================================================================== +if [[ "$INSTALL_GECO3" -eq "1" ]]; then + rm -fr geco3/ + git clone https://github.com/cobilab/geco3.git + cd geco3/src/ + make + cp GeCo3 ../ + cp GeDe3 ../ + cd ../../ +fi +############################################################################### +# GET NAF +#==================================================================== +if [[ "$INSTALL_NAF" -eq "1" ]]; then + rm -fr naf/ + git clone --recurse-submodules https://github.com/KirillKryukov/naf.git + cd naf && make + cp ennaf/ennaf ENNAF # to avoid dir name collision + cd ../ +fi +############################################################################### +# GET XM +#==================================================================== +if [[ "$INSTALL_XM" -eq "1" ]]; then + rm -fr japsa/ + git clone https://github.com/mdcao/japsa.git + cd japsa + make install INSTALL_DIR=./xm MXMEM=7000m SERVER=true JLP=/usr/lib/jni + cp xm/bin/jsa.xm.compress XM + cd ../ +fi +############################################################################### +# GET JARVIS +#==================================================================== +if [[ "$INSTALL_JARVIS" -eq "1" ]]; then + rm -fr jarvis/ + git clone https://github.com/cobilab/jarvis.git + cd jarvis/src/ + sed -i 's/uint32_t garbage;//g' defs.h # fix for gcc 10 + make + cp JARVIS ../ + cd ../../ +fi +############################################################################### +# GET CMIX +#==================================================================== +if [[ "$INSTALL_CMIX" -eq "1" ]]; then + rm -fr cmix/ + git clone https://github.com/byronknoll/cmix.git + cd cmix + make + cd ../ +fi +############################################################################### +# GET DEEPZIP +#==================================================================== +if [[ "$INSTALL_DEEPZIP" -eq "1" ]]; then + rm -fr DeepZip/ + git clone --single-branch --branch noGPU https://github.com/mohit1997/DeepZip.git + cd DeepZip + python3.6 -m venv tf + source tf/bin/activate + bash install.sh + deactivate + #params mentioned in paper + sed -i 's/batch_size=128/batch_size=1024/g' src/trainer.py + sed -i 's/num_epochs=20/num_epochs=3/g' src/trainer.py + #don't decompress + sed -i 's/\/usr\/bin\/time -v python decompressor.py/#\/usr\/bin\/time -v python decompressor.py/g' src/run_experiments.sh + sed -i 's/ cmp $recon_file_name/ #cmp $recon_file_name/g' src/run_experiments.sh + cd ../ +fi +############################################################################### +# GET ZPAQ +#==================================================================== +if [[ "$INSTALL_ZPAQ" -eq "1" ]]; then + rm -fr zpaq/ + git clone https://github.com/zpaq/zpaq.git + cd zpaq + g++ -O3 -march=native -Dunix zpaq.cpp libzpaq.cpp -pthread -o zpaq + cd ../ +fi + +############################################################################## +cd .. +############################################################################### +# DOWNLOAD +if [[ "$DOWNLOAD" -eq "1" ]]; then + echo "Downloading ..." + rm -rf ds/ + mkdir ds/ + cd ds + + # ds4 + wget https://tinyurl.com/DNAcorpus + unzip DNAcorpus + mv DNACorpus/* . + rm -f DNAcorpus + rm -rf DNACorpus + + cd .. +fi +# +# RUN +#========================================================================= +################################################################### +#### Specific +if [[ "$RUN_NAF" -eq "1" ]]; then + echo "Running NAF ..."; + mkdir -p res + cd progs/naf + + RunNAF "virome" + # + cd ../../ + echo "Done!"; +fi + +############################################################################### +if [[ "$RUN_GECO2" -eq "1" ]]; then + echo "Running GeCo2 ..."; + mkdir -p res + cd progs/geco2 + PARAM_DS1="-tm 7:1:1:1:0.8/0:0:0 -tm 13:10:0:1:0.95/0:0:0 -tm 19:500:1:40:0.95/5:20:0.95 -ls 0.03 -hs 64" + RunGeCo2 "$PARAM_DS1" "virome" + # + cd ../../ + echo "Done!"; +fi + +if [[ "$RUN_GECO3" -eq "1" ]]; then + echo "Running GeCo3 ..."; + mkdir -p res + cd progs/geco3 + + PARAM_DS1="-tm 7:1:1:1:0.8/0:0:0 -tm 13:10:0:1:0.95/0:0:0 -tm 19:500:1:40:0.95/5:20:0.95 -ls 0.03 -hs 64" + RunGeCo3 "$PARAM_DS1" "virome" + + # + cd ../../ + echo "Done!"; +fi diff --git a/benchmark/run_vothers.sh b/benchmark/run_vothers.sh index d39ad5a..8105b12 100755 --- a/benchmark/run_vothers.sh +++ b/benchmark/run_vothers.sh @@ -14,7 +14,7 @@ DOWNLOAD=0; RUN_NAF=0; RUN_XM=0; RUN_JARVIS=0; -RUN_CMIX=0; +RUN_CMIX=1; RUN_DEEPZIP=0; RUN_ZPAQ=0; RUN_GECO2=0; @@ -26,7 +26,7 @@ function RunGeCo2 { if test -f "../../ds/$2"; then cp ../../ds/$2 . rm -f $2.co - (time ./GeCo2 $1 $2 ) &> ../../res/C_GECO2_l16_$2 + (/usr/bin/time -v ./GeCo2 -v $1 $2 ) &> ../../res/C_GECO2_l16_$2 ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO2_l16_$2 rm -f $2 $2.co; fi @@ -38,7 +38,7 @@ function RunGeCo3 { if test -f "../../ds/$2"; then cp ../../ds/$2 . rm -f $2.co - (time ./GeCo3 $1 $2 ) &> ../../res/C_GECO3_l16_$2 + (/usr/bin/time -v ./GeCo3 -v $1 $2 ) &> ../../res/C_GECO3_l16_$2 ls -la $2.co | awk '{ print $5;}' > ../../res/BC_GECO3_l16_$2 rm -f $2 $2.co; fi @@ -50,7 +50,7 @@ if test -f "../../ds/$1"; then echo ">" > $1.fasta cat ../../ds/$1 >> $1.fasta rm -f $1.fasta.naf - (time ./ENNAF --temp-dir /tmp -22 $1.fasta -o $1.naf ) &> ../../res/C_NAF_$1 + (/usr/bin/time -v ./ENNAF --temp-dir /tmp -22 $1.fasta -o $1.naf ) &> ../../res/C_NAF_$1 ls -la $1.naf | awk '{ print $5;}' > ../../res/BC_NAF_$1 rm -f $1.fasta $1.naf; fi @@ -63,7 +63,7 @@ if test -f "../../ds/$1"; then cat ../../ds/$1 >> $1.fasta rm -f $1.xm - (time ./XM --real=$1.xm $1.fasta ) &> ../../res/C_XM_$1 + (/usr/bin/time -v ./XM --real=$1.xm $1.fasta ) &> ../../res/C_XM_$1 ls -la $1.xm | awk '{ print $5;}' > ../../res/BC_XM_$1 rm -f $1.fasta $1.xm; fi @@ -75,7 +75,7 @@ function RunJARVIS { if test -f "../../ds/$2"; then cp ../../ds/$2 . rm -f $2.jc - (time ./JARVIS $1 $2 ) &> ../../res/C_JARVIS_$2 + (/usr/bin/time -v ./JARVIS $1 $2 ) &> ../../res/C_JARVIS_$2 ls -la $2.jc | awk '{ print $5;}' > ../../res/BC_JARVIS_$2 rm -f $2 $2.jc; fi @@ -86,7 +86,7 @@ function RunCMIX { if test -f "../../ds/$1"; then cp ../../ds/$1 . rm -f $1.cmix - (time ./cmix -c $1 $1.cmix ) &> ../../res/C_CMIX_$1 + (/usr/bin/time -v ./cmix -c $1 $1.cmix ) &> ../../res/C_CMIX_$1 ls -la $1.cmix | awk '{ print $5;}' > ../../res/BC_CMIX_$1 rm -f $1 $1.cmix; fi @@ -105,11 +105,10 @@ if test -f "../../ds/$1"; then cd data ./run_parser.sh cd ../src - (time ./run_experiments.sh biGRU ) &> ../../../res/C_DEEPZIP_$1 + (/usr/bin/time -v ./run_experiments.sh biGRU ) &> ../../../res/C_DEEPZIP_$1 cd ../data/compressed/ ls -la $1/biGRU.compressed.combined | awk '{ print $5;}' > ../../../../res/BC_DEEPZIP_$1 cd ../.. - #rm -rf data/files_to_be_compressed/$1 data/compressed/$1; fi } @@ -118,7 +117,7 @@ function RunZPAQ { if test -f "../../ds/$1"; then cp ../../ds/$1 . rm -f $1.zpaq - (time ./zpaq a $1.zpaq $1 -m5 ) &> ../../res/C_ZPAQ_$1 + (/usr/bin/time -v ./zpaq a $1.zpaq $1 -m5 ) &> ../../res/C_ZPAQ_$1 ls -la $1.zpaq | awk '{ print $5;}' > ../../res/BC_ZPAQ_$1 rm -f $1 $1.zpaq; fi @@ -247,102 +246,132 @@ if [[ "$DOWNLOAD" -eq "1" ]]; then rm -f DNAcorpus rm -rf DNACorpus - #ds1 - wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/900/067/695/GCA_900067695.1_Pabies01/GCA_900067695.1_Pabies01_genomic.fna.gz - cd .. fi # # RUN #========================================================================= -if [[ "$RUN_GECO2" -eq "1" ]]; then - echo "Running GeCo2 ..."; - # l 16 equiv - PARAM_DS4="-tm 1:1:0:0:0.9/0:0:0 -tm 2:1:0:0:0.78/0:0:0 -tm 3:1:0:0:0.9/0:0:0 -tm 4:1:1:0:0.78/0:0:0 -tm 5:10:1:1:0.90/0:0:0 -tm 6:1:1:0:0.85/0:0:0 -tm 7:1:1:0:0.85/0:0:0 -tm 8:1:1:0:0.91/0:0:0 -tm 9:10:0:0:0.9/0:0:0 -tm 10:10:0:0:0.9/0:0:0 -tm 11:10:0:0:0.9/0:0:0 -tm 12:20:1:1:0.94/0:0:0 -tm 13:10:1:0:0.95/0:0:0 -tm 14:50:1:1:0.95/0:0:0 -tm 16:200:1:10:0.95/1:50:0.95 -tm 17:100:1:20:0.9/3:10:0.9 -tm 20:500:1:30:0.95/2:20:0.95" - +## Gen purpose +if [[ "$RUN_CMIX" -eq "1" ]]; then + echo "Running CMIX ..."; mkdir -p res - cd progs/geco2 + cd progs/cmix #ds4 - RunGeCo2 "-l 1 -lr 0.06 -hs 8" "BuEb" - RunGeCo2 "-l 2 -lr 0.06 -hs 16" "AgPh" - RunGeCo2 "-l 3 -lr 0.09 -hs 24" "YeMi" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "HePy" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "AeCa" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "HaHi" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "EsCo" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "PlFa" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "ScPo" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "EnIn" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "DrMe" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "OrSa" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "DaRe" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "GaGa" - RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "HoSa" + #RunCMIX "BuEb" + #RunCMIX "AgPh" + #RunCMIX "YeMi" + #RunCMIX "HePy" + #RunCMIX "AeCa" + #RunCMIX "HaHi" + #RunCMIX "EsCo" + #RunCMIX "PlFa" + #RunCMIX "ScPo" + #RunCMIX "EnIn" + RunCMIX "HoSaY" + RunCMIX "DrMe" + RunCMIX "OrSa" + RunCMIX "DaRe" + RunCMIX "GaGa" + RunCMIX "HoSa" #ds2 and ds3 - PARAM_DS23="-tm 3:1:1:1:0.8/0:0:0 -tm 6:1:1:1:0.85/0:0:0 -tm 9:1:1:1:0.85/0:0:0 -tm 12:10:0:1:0.85/0:0:0 -tm 15:200:1:10:0.85/2:1:0.85 -tm 17:200:1:10:0.85/2:1:0.85 -tm 20:500:1:40:0.85/5:20:0.85 -lr 0.03 -hs 64" + #RunCMIX "HoSaY" + #RunCMIX "Mitochondrion" + + #RunCMIX "VDB" + #RunCMIX "Archaea" + + #ds1 + #RunCMIX "GoGoC" + #RunCMIX "PaTrC" + #RunCMIX "HoSaC" + #RunCMIX "PiAbC" + # + cd ../../ + echo "Done!"; +fi + +if [[ "$RUN_DEEPZIP" -eq "1" ]]; then + echo "Running DEEPZIP ..."; + mkdir -p res + cd progs/DeepZip - RunGeCo2 PARAM_DS23 "HoSaY" - RunGeCo2 PARAM_DS23 "Mitochondrion" + #ds4 System reboots with larger files (>= AeCa) + #RunDEEPZIP "BuEb" + #RunDEEPZIP "AgPh" + #RunDEEPZIP "YeMi" + #RunDEEPZIP "HePy" + #RunDEEPZIP "AeCa" + RunDEEPZIP "HaHi" + RunDEEPZIP "EsCo" + RunDEEPZIP "PlFa" + RunDEEPZIP "ScPo" + RunDEEPZIP "EnIn" + #RunDEEPZIP "DrMe" + #RunDEEPZIP "OrSa" + #RunDEEPZIP "DaRe" + #RunDEEPZIP "GaGa" + #RunDEEPZIP "HoSa" - RunGeCo2 PARAM_DS23 "VDB" - RunGeCo2 PARAM_DS23 "Archaea" + #ds2 and ds3 + #RunDEEPZIP "HoSaY" + #RunDEEPZIP "Mitochondrion" + + #RunDEEPZIP "VDB" + #RunDEEPZIP "Archaea" #ds1 - PARAM_DS1="-tm 3:1:1:1:0.70/0:0:0 -tm 8:1:1:1:0.85/0:0:0 -tm 13:10:0:1:0.85/0:0:0 -tm 19:500:1:40:0.85/5:20:0.85 -ls 0.03 -hs 64" - RunGeCo2 PARAM_DS1 "GoGoC" - RunGeCo2 PARAM_DS1 "PaTrC" - RunGeCo2 PARAM_DS1 "HoSaC" - RunGeCo2 PARAM_DS1 "PiAbC" + #RunDEEPZIP "GoGoC" + #RunDEEPZIP "PaTrC" + #RunDEEPZIP "HoSaC" + #RunDEEPZIP "PiAbC" # cd ../../ echo "Done!"; fi -if [[ "$RUN_GECO3" -eq "1" ]]; then - echo "Running GeCo3 ..."; + +if [[ "$RUN_ZPAQ" -eq "1" ]]; then + echo "Running ZPAQ ..."; mkdir -p res - cd progs/geco3 + cd progs/zpaq #ds4 - RunGeCo3 "-l 1 -lr 0.06 -hs 8" "BuEb" - RunGeCo3 "-l 2 -lr 0.06 -hs 16" "AgPh" - RunGeCo3 "-l 3 -lr 0.09 -hs 24" "YeMi" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "HePy" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "AeCa" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "HaHi" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "EsCo" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "PlFa" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "ScPo" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "EnIn" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "DrMe" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "OrSa" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "DaRe" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "GaGa" - RunGeCo3 "-l 16 -lr 0.03 -hs 64" "HoSa" + RunZPAQ "BuEb" + RunZPAQ "AgPh" + RunZPAQ "YeMi" + RunZPAQ "HePy" + RunZPAQ "AeCa" + RunZPAQ "HaHi" + RunZPAQ "EsCo" + RunZPAQ "PlFa" + RunZPAQ "ScPo" + RunZPAQ "EnIn" + RunZPAQ "DrMe" + RunZPAQ "OrSa" + RunZPAQ "DaRe" + RunZPAQ "GaGa" + RunZPAQ "HoSa" #ds2 and ds3 - PARAM_DS23="-tm 3:1:1:1:0.8/0:0:0 -tm 6:1:1:1:0.85/0:0:0 -tm 9:1:1:1:0.85/0:0:0 -tm 12:10:0:1:0.85/0:0:0 -tm 15:200:1:10:0.85/2:1:0.85 -tm 17:200:1:10:0.85/2:1:0.85 -tm 20:500:1:40:0.85/5:20:0.85 -lr 0.03 -hs 64" - - RunGeCo3 PARAM_DS23 "HoSaY" - RunGeCo3 PARAM_DS23 "Mitochondrion" + RunZPAQ "HoSaY" + RunZPAQ "Mitochondrion" - RunGeCo3 PARAM_DS23 "VDB" - RunGeCo3 PARAM_DS23 "Archaea" + RunZPAQ "VDB" + RunZPAQ "Archaea" #ds1 - PARAM_DS1="-tm 3:1:1:1:0.70/0:0:0 -tm 8:1:1:1:0.85/0:0:0 -tm 13:10:0:1:0.85/0:0:0 -tm 19:500:1:40:0.85/5:20:0.85 -ls 0.03 -hs 64" - RunGeCo3 PARAM_DS1 "GoGoC" - RunGeCo3 PARAM_DS1 "PaTrC" - RunGeCo3 PARAM_DS1 "HoSaC" - RunGeCo3 PARAM_DS1 "PiAbC" - + RunZPAQ "GoGoC" + RunZPAQ "PaTrC" + RunZPAQ "HoSaC" + RunZPAQ "PiAbC" # cd ../../ echo "Done!"; fi - +################################################################### +#### Specific if [[ "$RUN_NAF" -eq "1" ]]; then echo "Running NAF ..."; mkdir -p res @@ -411,11 +440,11 @@ if [[ "$RUN_XM" -eq "1" ]]; then RunXM "VDB" RunXM "Archaea" - #ds1 - RunXM "GoGoC" - RunXM "PaTrC" - RunXM "HoSaC" - RunXM "PiAbC" + #ds1 ERROR can't compress + #RunXM "GoGoC" + #RunXM "PaTrC" + #RunXM "HoSaC" + #RunXM "PiAbC" # cd ../../ echo "Done!"; @@ -450,7 +479,7 @@ if [[ "$RUN_JARVIS" -eq "1" ]]; then RunJARVIS "-l 7" "VDB" RunJARVIS "-l 7" "Archaea" - #ds1 + #ds1 OOM with 32GB #RunJARVIS "-l 7" "GoGoC" #RunJARVIS "-l 7" "PaTrC" #RunJARVIS "-l 7" "HoSaC" @@ -460,120 +489,89 @@ if [[ "$RUN_JARVIS" -eq "1" ]]; then echo "Done!"; fi ############################################################################### -## Gen purpose -if [[ "$RUN_CMIX" -eq "1" ]]; then - echo "Running CMIX ..."; +if [[ "$RUN_GECO2" -eq "1" ]]; then + echo "Running GeCo2 ..."; + # l 16 equiv + PARAM_DS4="-tm 1:1:0:0:0.9/0:0:0 -tm 2:1:0:0:0.78/0:0:0 -tm 3:1:0:0:0.9/0:0:0 -tm 4:1:1:0:0.78/0:0:0 -tm 5:10:1:1:0.90/0:0:0 -tm 6:1:1:0:0.85/0:0:0 -tm 7:1:1:0:0.85/0:0:0 -tm 8:1:1:0:0.91/0:0:0 -tm 9:10:0:0:0.9/0:0:0 -tm 10:10:0:0:0.9/0:0:0 -tm 11:10:0:0:0.9/0:0:0 -tm 12:20:1:1:0.94/0:0:0 -tm 13:10:1:0:0.95/0:0:0 -tm 14:50:1:1:0.95/0:0:0 -tm 16:200:1:10:0.95/1:50:0.95 -tm 17:100:1:20:0.9/3:10:0.9 -tm 20:500:1:30:0.95/2:20:0.95" + mkdir -p res - cd progs/cmix + cd progs/geco2 #ds4 - RunCMIX "BuEb" - RunCMIX "AgPh" - RunCMIX "YeMi" - RunCMIX "HePy" - RunCMIX "AeCa" - RunCMIX "HaHi" - RunCMIX "EsCo" - RunCMIX "PlFa" - #RunCMIX "ScPo" - #RunCMIX "EnIn" - #RunCMIX "DrMe" - #RunCMIX "OrSa" - #RunCMIX "DaRe" - #RunCMIX "GaGa" - #RunCMIX "HoSa" + RunGeCo2 "-l 1 -lr 0.06 -hs 8" "BuEb" + RunGeCo2 "-l 2 -lr 0.06 -hs 16" "AgPh" + RunGeCo2 "-l 3 -lr 0.09 -hs 24" "YeMi" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "HePy" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "AeCa" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "HaHi" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "EsCo" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "PlFa" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "ScPo" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "EnIn" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "DrMe" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "OrSa" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "DaRe" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "GaGa" + RunGeCo2 "$PARAM_DS4 -lr 0.03 -hs 64" "HoSa" #ds2 and ds3 - #RunCMIX "HoSaY" - #RunCMIX "Mitochondrion" - - #RunCMIX "VDB" - #RunCMIX "Archaea" - - #ds1 - #RunCMIX "GoGoC" - #RunCMIX "PaTrC" - #RunCMIX "HoSaC" - #RunCMIX "PiAbC" - # - cd ../../ - echo "Done!"; -fi - -if [[ "$RUN_DEEPZIP" -eq "1" ]]; then - echo "Running DEEPZIP ..."; - mkdir -p res - cd progs/DeepZip - - #ds4 - RunDEEPZIP "BuEb" - RunDEEPZIP "AgPh" - RunDEEPZIP "YeMi" - RunDEEPZIP "HePy" - RunDEEPZIP "AeCa" - RunDEEPZIP "HaHi" - RunDEEPZIP "EsCo" - RunDEEPZIP "PlFa" - #RunDEEPZIP "ScPo" - #RunDEEPZIP "EnIn" - #RunDEEPZIP "DrMe" - #RunDEEPZIP "OrSa" - #RunDEEPZIP "DaRe" - #RunDEEPZIP "GaGa" - #RunDEEPZIP "HoSa" + PARAM_DS23="-tm 3:1:1:1:0.8/0:0:0 -tm 6:1:1:1:0.85/0:0:0 -tm 9:1:1:1:0.85/0:0:0 -tm 12:10:0:1:0.85/0:0:0 -tm 15:200:1:10:0.85/2:1:0.85 -tm 17:200:1:10:0.85/2:1:0.85 -tm 20:500:1:40:0.85/5:20:0.85 -lr 0.03 -hs 64" - #ds2 and ds3 - #RunDEEPZIP "HoSaY" - #RunDEEPZIP "Mitochondrion" + RunGeCo2 "$PARAM_DS23" "HoSaY" + RunGeCo2 "$PARAM_DS23" "Mitochondrion" - #RunDEEPZIP "VDB" - #RunDEEPZIP "Archaea" + RunGeCo2 "$PARAM_DS23" "VDB" + RunGeCo2 "$PARAM_DS23" "Archaea" #ds1 - #RunDEEPZIP "GoGoC" - #RunDEEPZIP "PaTrC" - #RunDEEPZIP "HoSaC" - #RunDEEPZIP "PiAbC" + PARAM_DS1="-tm 3:1:1:1:0.70/0:0:0 -tm 8:1:1:1:0.85/0:0:0 -tm 13:10:0:1:0.85/0:0:0 -tm 19:500:1:40:0.85/5:20:0.85 -ls 0.03 -hs 64" + RunGeCo2 "$PARAM_DS1" "GoGoC" + RunGeCo2 "$PARAM_DS1" "PaTrC" + RunGeCo2 "$PARAM_DS1" "HoSaC" + RunGeCo2 "$PARAM_DS1" "PiAbC" # cd ../../ echo "Done!"; fi - -if [[ "$RUN_ZPAQ" -eq "1" ]]; then - echo "Running ZPAQ ..."; +if [[ "$RUN_GECO3" -eq "1" ]]; then + echo "Running GeCo3 ..."; mkdir -p res - cd progs/zpaq + cd progs/geco3 #ds4 - RunZPAQ "BuEb" - RunZPAQ "AgPh" - RunZPAQ "YeMi" - RunZPAQ "HePy" - RunZPAQ "AeCa" - RunZPAQ "HaHi" - RunZPAQ "EsCo" - RunZPAQ "PlFa" - RunZPAQ "ScPo" - RunZPAQ "EnIn" - RunZPAQ "DrMe" - RunZPAQ "OrSa" - RunZPAQ "DaRe" - RunZPAQ "GaGa" - RunZPAQ "HoSa" + RunGeCo3 "-l 1 -lr 0.06 -hs 8" "BuEb" + RunGeCo3 "-l 2 -lr 0.06 -hs 16" "AgPh" + RunGeCo3 "-l 3 -lr 0.09 -hs 24" "YeMi" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "HePy" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "AeCa" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "HaHi" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "EsCo" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "PlFa" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "ScPo" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "EnIn" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "DrMe" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "OrSa" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "DaRe" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "GaGa" + RunGeCo3 "-l 16 -lr 0.03 -hs 64" "HoSa" #ds2 and ds3 - RunZPAQ "HoSaY" - RunZPAQ "Mitochondrion" + PARAM_DS23="-tm 3:1:1:1:0.8/0:0:0 -tm 6:1:1:1:0.85/0:0:0 -tm 9:1:1:1:0.85/0:0:0 -tm 12:10:0:1:0.85/0:0:0 -tm 15:200:1:10:0.85/2:1:0.85 -tm 17:200:1:10:0.85/2:1:0.85 -tm 20:500:1:40:0.85/5:20:0.85 -lr 0.03 -hs 64" - RunZPAQ "VDB" - RunZPAQ "Archaea" + RunGeCo3 "$PARAM_DS23" "HoSaY" + RunGeCo3 "$PARAM_DS23" "Mitochondrion" + + RunGeCo3 "$PARAM_DS23" "VDB" + RunGeCo3 "$PARAM_DS23" "Archaea" #ds1 - RunZPAQ "GoGoC" - RunZPAQ "PaTrC" - RunZPAQ "HoSaC" - RunZPAQ "PiAbC" + PARAM_DS1="-tm 3:1:1:1:0.70/0:0:0 -tm 8:1:1:1:0.85/0:0:0 -tm 13:10:0:1:0.85/0:0:0 -tm 19:500:1:40:0.85/5:20:0.85 -ls 0.03 -hs 64" + RunGeCo3 "$PARAM_DS1" "GoGoC" + RunGeCo3 "$PARAM_DS1" "PaTrC" + RunGeCo3 "$PARAM_DS1" "HoSaC" + RunGeCo3 "$PARAM_DS1" "PiAbC" + # cd ../../ echo "Done!";