diff --git a/pipelines/Simulation.sh b/pipelines/Simulation.sh index 72f03e6..315a9fd 100755 --- a/pipelines/Simulation.sh +++ b/pipelines/Simulation.sh @@ -1,13 +1,13 @@ #!/bin/bash # -ITERATIONS=20000; -RATE=0.00005; +ITERATIONS=10000; +RATE=0.0001; # rm -f ALL.fa; # printf "\n" > DIV; # -gto_genomic_gen_random_dna -n 2000 -s 7 \ +gto_genomic_gen_random_dna -n 5000 -s 7 \ | gto_fasta_from_seq -n "Synthetic DNA" > ORIGINAL.fa # cp ORIGINAL.fa IN.fa diff --git a/src/defs.h b/src/defs.h index 2f84db8..8b733df 100644 --- a/src/defs.h +++ b/src/defs.h @@ -99,6 +99,7 @@ typedef int8_t I8; #define DEF_NC_LEVEL 11 #define DEF_NC_MIN_THREADS 1 #define DEF_NC_MAX_THREADS 999999 +#define HEADERS_PREFIX_SIZE 50 #define DEF_NCD_HELP 0 #define DEF_NCD_FORCE 0 diff --git a/src/nc.c b/src/nc.c index e534c5c..ccd7e6c 100644 --- a/src/nc.c +++ b/src/nc.c @@ -495,7 +495,10 @@ void NormalizedCompression(NC_PARAMETERS *MAP) if(P->verbose) fprintf(stderr, "[>] Compressing %s ...\n", !P->dna ? "DNA" : "Aminoacids"); - + + char identifier_prefix[FA->nReads+1][HEADERS_PREFIX_SIZE+1]; + uint32_t idx_header = 0; + while((k = fread(buffer, 1, BUFFER_SIZE, F))) for(idx = 0 ; idx < k ; ++idx) { @@ -512,9 +515,20 @@ void NormalizedCompression(NC_PARAMETERS *MAP) continue; } if(sym == '\n' && header == 1) - { header = 0; nSymbols = 0; continue; } + { + header = 0; + nSymbols = 0; + identifier_prefix[idx_reads-1][idx_header] = '\0'; + idx_header = 0; + continue; + } if(sym == '\n') continue; - if(header == 1) continue; + if(header == 1) + { + if(idx_header < HEADERS_PREFIX_SIZE) + identifier_prefix[idx_reads-1][idx_header++] = sym; + continue; + } SEQ[nSymbols++] = sym; } @@ -522,7 +536,8 @@ void NormalizedCompression(NC_PARAMETERS *MAP) else vr[idx_reads-1] = CompressTargetReadAA (SEQ, nSymbols); for(idx_reads = 0 ; idx_reads < FA->nReads ; ++idx_reads) - fprintf(stdout, "%"PRIu64"\t%lf\n", idx_reads+1, vr[idx_reads]); + fprintf(stdout, "%"PRIu64"\t%lf\t%s\n", idx_reads+1, vr[idx_reads], + identifier_prefix[idx_reads]); fclose(F);