-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCoulterFile.py
160 lines (128 loc) · 5.71 KB
/
CoulterFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from pathlib import Path
import re
from dataclasses import dataclass, asdict
import numpy as np
class CoulterFile():
def __init__(self, file_path) -> None:
self.stat_tags = ['Mean', 'Mode', 'Median', 'SD', 'CV', 'MinSize',
'MaxSize', 'SampleSize']
self._populate_fields(file_path)
def _populate_fields(self, file_path) -> None:
"""
Populate class fields by reading coulter counter .#m4 file
Args:
file_path (string): path to coulter counter file
"""
fp = Path(file_path)
with fp.open('r') as file:
lines = file.readlines()
self.stats = self._get_selection_stats(lines)
self.bin_edges_diameter, self.bin_edges_volume, self.bin_counts = \
self._get_hist(lines)
self.diameters, self.volumes = self._get_single_cell(lines)
def _get_selection_stats(self, lines) -> dict:
"""
Extracts pre-selected/gated statistic values from a single coulter
counter file
Args:
lines (list(str)): list of lines in coulter counter file
Raises:
ValueError: Raises error if statistics are not in the coulter counter
file
Returns:
dict: dictionary {stat name:value}
"""
relevant_lines = self._get_file_section(lines, '[SizeStats]',
'[SizePctX]')
# Extract numbers after the equals sign
stat_dict = {}
for line in relevant_lines:
statname, val = re.match(r'([\w\(\)\,]+)=\s*([-\d\.]+)', line).groups()
if statname in self.stat_tags:
stat_dict[statname] = val
return stat_dict
def _get_hist(self, lines) -> tuple:
"""
Get histogram bin edges and counts from coulter counter raw file
Args:
lines (list(str)): list of strings from coulter counter file
"""
edges_diameter = self._get_file_section(lines, '[#Bindiam]',
'[Binunits]')
edges_diameter = [float(dm) for dm in edges_diameter]
edges_volume = [4/3 * np.pi * (dm/2)**3 for dm in edges_diameter]
bin_counts = self._get_file_section(lines, '[#Binheight]',
'[SizeStats]')
bin_counts = [int(ct) for ct in bin_counts]
return (np.array(edges_diameter), np.array(edges_volume),
np.array(bin_counts))
def _get_single_cell(self, lines) -> np.array:
counts_per_volt = 1 / (4 * 298.02e-9)
kd_lst = self._get_file_section(lines, '[KDsave0]', '[sample]')
kd_str = [str_ for str_ in kd_lst if 'Kd= ' in str_][0]
get_param = lambda marker, search_str: \
float(re.match(f'^{marker}' + r'([-\d\.]+)', search_str).groups()[0])
kd = get_param('Kd= ', kd_str)
param_lst = self._get_file_section(lines, '[instrument]', '[M3Info]')
amp_str = [str_ for str_ in param_lst if 'Current= ' in str_][0]
current = get_param('Current= ', amp_str) / 1000
res_str = [str_ for str_ in param_lst if 'Gain= ' in str_][0]
resistance = get_param('Gain= ', res_str) * 25
mxht_str = [str_ for str_ in param_lst if 'MaxHtCorr= ' in str_][0]
max_ht_corr = get_param('MaxHtCorr= ', mxht_str)
pulse_strs = self._get_file_section(lines, '[#Pulses5hex]',
'[#TSms]')
get_first_hex = lambda str_: \
re.match(r'^([A-Z\d]+),[A-Z\d,]+$', str_).groups()[0]
hex_convert = [int(get_first_hex(str_), 16) for str_ in pulse_strs]
hex_convert = np.array(hex_convert)
height = hex_convert + max_ht_corr
diameter = kd * ((height / (counts_per_volt * resistance * current))**(1/3))
volume = 4/3 * np.pi * (diameter/2)**3
return diameter, volume
def _get_file_section(self, lines, start_marker, end_marker) -> list:
"""
Given bracketed section markers at beginning and end of a section in
coulter counter file, extract list of strings from coulter file
Args:
lines (list(str)): lines from coulter counter raw file
start_marker (str): bracketed start marker of lines of interest
end_marker (str): bracketed start marker concluding lines of
interest
Returns:
list(str): relevant lines from file
"""
start_index = None
end_index = None
# Find the indices for [SizeStats] and [SizePctX]
for i, line in enumerate(lines):
if start_marker in line:
start_index = i
if end_marker in line:
end_index = i
break
if start_index is None or end_index is None:
raise ValueError("Size stats were not found in file.")
# Extract lines between start and end marker
return lines[start_index + 1:end_index]
def get_stats(self) -> dict:
"""
Getter for pre-selected coulter file stats
Returns:
dict: coulter file stats {name of stat: value}
"""
return self.stats
def get_diameters(self) -> np.array:
return self.diameters
def get_volumes(self) -> np.array:
return self.volumes
def pairwise_mean(lst) -> list:
"""
Calculates the pairwise mean between adjacent elements in the list (mean
of elements 0 and 1, then 1 and 2, etc)
Args:
lst (float): original list
Returns:
list: pairwise mean list (length of len(lst)-1)
"""
return np.array([(lst[i] + lst[i + 1]) / 2 for i in range(len(lst) - 1)])