forked from Altaheri/EEG-ATCNet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_HGD.py
129 lines (107 loc) · 4.98 KB
/
preprocess_HGD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Copyright (C) 2022 King Saud University, Saudi Arabia
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License"); you may not use
this file except in compliance with the License. You may obtain a copy of the
License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
Author: Hamdi Altaheri
"""
#%%
# We need the following to load and preprocess the High Gamma Dataset
import numpy as np
import logging
from collections import OrderedDict
from braindecode.datasets.bbci import BBCIDataset
from braindecode.datautil.trial_segment import \
create_signal_target_from_raw_mne
from braindecode.mne_ext.signalproc import mne_apply, resample_cnt
from braindecode.datautil.signalproc import exponential_running_standardize
from braindecode.datautil.signalproc import highpass_cnt
#%%
def load_HGD_data(data_path, subject, training, low_cut_hz =0, debug = False):
""" Loading training/testing data for the High Gamma Dataset (HGD)
for a specific subject.
Please note that HGD is for "executed movements" NOT "motor imagery"
This code is taken from https://github.com/robintibor/high-gamma-dataset
You can download the HGD using the following link:
https://gin.g-node.org/robintibor/high-gamma-dataset/src/master/data
The Braindecode library is required to load and processs the HGD dataset.
Parameters
----------
data_path: string
dataset path
subject: int
number of subject in [1, .. ,14]
training: bool
if True, load training data
if False, load testing data
debug: bool
if True,
if False,
"""
log = logging.getLogger(__name__)
log.setLevel('DEBUG')
if training: filename = (data_path + 'train/{}.mat'.format(subject))
else: filename = (data_path + 'test/{}.mat'.format(subject))
load_sensor_names = None
if debug:
load_sensor_names = ['C3', 'C4', 'C2']
# we loaded all sensors to always get same cleaning results independent of sensor selection
# There is an inbuilt heuristic that tries to use only EEG channels and that definitely
# works for datasets in our paper
loader = BBCIDataset(filename, load_sensor_names=load_sensor_names)
log.info("Loading data...")
cnt = loader.load()
# Cleaning: First find all trials that have absolute microvolt values
# larger than +- 800 inside them and remember them for removal later
log.info("Cutting trials...")
marker_def = OrderedDict([('Right Hand', [1]), ('Left Hand', [2],),
('Rest', [3]), ('Feet', [4])])
clean_ival = [0, 4000]
set_for_cleaning = create_signal_target_from_raw_mne(cnt, marker_def,
clean_ival)
clean_trial_mask = np.max(np.abs(set_for_cleaning.X), axis=(1, 2)) < 800
log.info("Clean trials: {:3d} of {:3d} ({:5.1f}%)".format(
np.sum(clean_trial_mask),
len(set_for_cleaning.X),
np.mean(clean_trial_mask) * 100))
# now pick only sensors with C in their name
# as they cover motor cortex
C_sensors = ['FC5', 'FC1', 'FC2', 'FC6', 'C3', 'C4', 'CP5',
'CP1', 'CP2', 'CP6', 'FC3', 'FCz', 'FC4', 'C5', 'C1', 'C2',
'C6',
'CP3', 'CPz', 'CP4', 'FFC5h', 'FFC3h', 'FFC4h', 'FFC6h',
'FCC5h',
'FCC3h', 'FCC4h', 'FCC6h', 'CCP5h', 'CCP3h', 'CCP4h', 'CCP6h',
'CPP5h',
'CPP3h', 'CPP4h', 'CPP6h', 'FFC1h', 'FFC2h', 'FCC1h', 'FCC2h',
'CCP1h',
'CCP2h', 'CPP1h', 'CPP2h']
if debug:
C_sensors = load_sensor_names
cnt = cnt.pick_channels(C_sensors)
# Further preprocessings as descibed in paper
log.info("Resampling...")
cnt = resample_cnt(cnt, 250.0)
log.info("Highpassing...")
cnt = mne_apply(
lambda a: highpass_cnt(
a, low_cut_hz, cnt.info['sfreq'], filt_order=3, axis=1),
cnt)
log.info("Standardizing...")
cnt = mne_apply(
lambda a: exponential_running_standardize(a.T, factor_new=1e-3,
init_block_size=1000,
eps=1e-4).T,
cnt)
# Trial interval, start at -500 already, since improved decoding for networks
ival = [-500, 4000]
dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival)
dataset.X = dataset.X[clean_trial_mask]
dataset.y = dataset.y[clean_trial_mask]
return dataset.X, dataset.y