From 507246a7a1695cd86202aaead960ea36e26bae60 Mon Sep 17 00:00:00 2001 From: LaraFuhrmann <55209716+LaraFuhrmann@users.noreply.github.com> Date: Thu, 9 Mar 2023 15:05:10 +0100 Subject: [PATCH] [fix] nucleotide diversity - account also for reference base --- workflow/scripts/compute_diversity_measures.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/workflow/scripts/compute_diversity_measures.py b/workflow/scripts/compute_diversity_measures.py index ad7ae01e..d0939906 100644 --- a/workflow/scripts/compute_diversity_measures.py +++ b/workflow/scripts/compute_diversity_measures.py @@ -51,8 +51,11 @@ def population_nucleotide_diversity(df_mutations, length): if N == 0: continue freq = df_temp["frequency"].to_numpy() + ref_freq = 1 - freq.sum() + position_pnd = freq**2 - postion_pi = ( 1 - position_pnd.sum() ) + postion_pi = ( 1 - (position_pnd.sum() + ref_freq**2))* N / (N-1 ) + pi += postion_pi return float(pi / length)