From 8e61039024e99d26b495aa8e6f54bc165957c543 Mon Sep 17 00:00:00 2001 From: Lara Fuhrmann Date: Tue, 23 Aug 2022 11:09:21 +0200 Subject: [PATCH 1/2] [fix] populatin_nucleotide_diversity --- workflow/scripts/compute_diversity_measures.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/workflow/scripts/compute_diversity_measures.py b/workflow/scripts/compute_diversity_measures.py index e5a102c4b..ad7ae01ef 100644 --- a/workflow/scripts/compute_diversity_measures.py +++ b/workflow/scripts/compute_diversity_measures.py @@ -48,10 +48,11 @@ def population_nucleotide_diversity(df_mutations, length): for position_temp in list_polymorphic_sites(df_mutations, minor_allele_frequency=0): df_temp = df_mutations[df_mutations["position"] == position_temp] N = df_temp["coverage"].unique()[0] - position_pnd = 0 - pi = df_temp["tvar"] * (df_temp["tvar"] - 1) - postion_pi = (N * (N - 1) - pi.sum()) / (N * (N - 1)) - + if N == 0: + continue + freq = df_temp["frequency"].to_numpy() + position_pnd = freq**2 + postion_pi = ( 1 - position_pnd.sum() ) pi += postion_pi return float(pi / length) From 979b3d8e507b8e54adb1a44fc358af360b176e0b Mon Sep 17 00:00:00 2001 From: LaraFuhrmann <55209716+LaraFuhrmann@users.noreply.github.com> Date: Thu, 9 Mar 2023 15:05:10 +0100 Subject: [PATCH 2/2] [fix] nucleotide diversity - account also for reference base --- workflow/scripts/compute_diversity_measures.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/workflow/scripts/compute_diversity_measures.py b/workflow/scripts/compute_diversity_measures.py index ad7ae01ef..d0939906d 100644 --- a/workflow/scripts/compute_diversity_measures.py +++ b/workflow/scripts/compute_diversity_measures.py @@ -51,8 +51,11 @@ def population_nucleotide_diversity(df_mutations, length): if N == 0: continue freq = df_temp["frequency"].to_numpy() + ref_freq = 1 - freq.sum() + position_pnd = freq**2 - postion_pi = ( 1 - position_pnd.sum() ) + postion_pi = ( 1 - (position_pnd.sum() + ref_freq**2))* N / (N-1 ) + pi += postion_pi return float(pi / length)