diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..0b6404b24 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,38 @@ +name: Test + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + test: + name: ${{ matrix.os }}, py-${{ matrix.python_version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-18.04, windows-latest, macOS-latest] + python_version: [3.6] + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python_version }} + - name: Install dependencies + shell: bash + run: | + python -m pip install --upgrade pip wheel flake8 + pip install -r requirements.txt + pip install . + - name: Run tests + shell: bash + run: | + python -m unittest moabb.tests + python -m moabb.run --pipelines=./moabb/tests/test_pipelines/ --verbose + - name: Run linting + shell: bash + run: | + flake8 moabb diff --git a/LICENSE b/LICENSE index 1478d3165..6bf91f0a0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,24 +1,29 @@ -Copyright © 2017, authors of moabb +BSD 3-Clause License + +Copyright (c) 2017, authors of moabb All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the names of moabb authors nor the names of any - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 341bbceee..5d410c889 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ **This is work in progress. API will change significantly (as well as the results of the benchmark).** -[![Build Status](https://travis-ci.org/NeuroTechX/moabb.svg?branch=master)](https://travis-ci.org/NeuroTechX/moabb) +[![Build Status](https://github.com/NeuroTechX/moabb/workflows/Test/badge.svg)](https://github.com/NeuroTechX/moabb/actions?query=branch%3Amaster) ## Welcome! diff --git a/moabb/datasets/epfl.py b/moabb/datasets/epfl.py index 0b12900d1..9b90dd560 100644 --- a/moabb/datasets/epfl.py +++ b/moabb/datasets/epfl.py @@ -116,6 +116,13 @@ def _get_single_run_data(self, file_path): 'MA2'] ch_types = ['eeg'] * 32 + ['misc'] * 2 + # The last X entries are 0 for all signals. This leads to + # artifacts when epoching and band-pass filtering the data. + # Correct the signals for this. + sig_i = np.where( + np.diff(np.all(signals == 0, axis=0).astype(int)) != 0)[0][0] + signals = signals[:, :sig_i] + signals *= 1e-6 # data is stored as uV, but MNE expects V # we have to re-reference the signals # the average signal on the mastoids electrodes is used as reference references = [32, 33] diff --git a/moabb/datasets/schirrmeister2017.py b/moabb/datasets/schirrmeister2017.py index 451ca3f56..40207e542 100644 --- a/moabb/datasets/schirrmeister2017.py +++ b/moabb/datasets/schirrmeister2017.py @@ -198,8 +198,9 @@ def get_all_sensors(filename, pattern=None): """ with h5py.File(filename, 'r') as h5file: clab_set = h5file['nfo']['clab'][:].squeeze() - all_sensor_names = [''.join(chr(c) for c in h5file[obj_ref]) for - obj_ref in clab_set] + all_sensor_names = [''.join( + chr(c.squeeze()) for c in h5file[obj_ref]) + for obj_ref in clab_set] if pattern is not None: all_sensor_names = filter( lambda sname: re.search(pattern, sname), diff --git a/moabb/datasets/upper_limb.py b/moabb/datasets/upper_limb.py index 40b458bc1..f8d2b83f2 100644 --- a/moabb/datasets/upper_limb.py +++ b/moabb/datasets/upper_limb.py @@ -1,6 +1,6 @@ from moabb.datasets.base import BaseDataset -from mne.io import read_raw_edf +from mne.io import read_raw_gdf from mne.channels import make_standard_montage import numpy as np @@ -94,7 +94,7 @@ def _get_single_subject_data(self, subject): montage = make_standard_montage('standard_1005') data = {} for ii, path in enumerate(paths): - raw = read_raw_edf(path, eog=eog, misc=range(64, 96), + raw = read_raw_gdf(path, eog=eog, misc=range(64, 96), preload=True, verbose='ERROR') raw.set_montage(montage) # there is nan in the data diff --git a/moabb/paradigms/base.py b/moabb/paradigms/base.py index fd5804b6a..b2c6c0e9b 100644 --- a/moabb/paradigms/base.py +++ b/moabb/paradigms/base.py @@ -75,14 +75,11 @@ def process_raw(self, raw, dataset, return_epochs=False): Parameters ---------- - raw: mne.Raw instance the raw EEG data. - dataset : dataset instance The dataset corresponding to the raw file. mainly use to access dataset specific information. - return_epochs: boolean This flag specifies whether to return only the data array or the complete processed mne.Epochs @@ -93,10 +90,8 @@ def process_raw(self, raw, dataset, return_epochs=False): the data that will be used as features for the model Note: if return_epochs=True, this is mne.Epochs if return_epochs=False, this is np.ndarray - labels: np.ndarray the labels for training / evaluating the model - metadata: pd.DataFrame A dataframe containing the metadata @@ -105,18 +100,23 @@ def process_raw(self, raw, dataset, return_epochs=False): event_id = self.used_events(dataset) # find the events, first check stim_channels then annotations - stim_channels = mne.utils._get_stim_channel( - None, raw.info, raise_error=False) + stim_channels = mne.utils._get_stim_channel(None, raw.info, + raise_error=False) if len(stim_channels) > 0: events = mne.find_events(raw, shortest_event=0, verbose=False) else: - events, _ = mne.events_from_annotations(raw, event_id=event_id, - verbose=False) - channels = () if self.channels is None else self.channels + try: + events, _ = mne.events_from_annotations(raw, + event_id=event_id, + verbose=False) + except ValueError: + events, _ = mne.events_from_annotations(raw, verbose=False) # picks channels - picks = mne.pick_types(raw.info, eeg=True, stim=False, - include=channels) + if self.channels is None: + picks = mne.pick_types(raw.info, eeg=True, stim=False) + else: + picks = mne.pick_types(raw.info, stim=False, include=self.channels) # pick events, based on event_id try: @@ -139,11 +139,23 @@ def process_raw(self, raw, dataset, return_epochs=False): raw_f = raw.copy().filter(fmin, fmax, method='iir', picks=picks, verbose=False) # epoch data + baseline = self.baseline + if baseline is not None: + baseline = (self.baseline[0] + dataset.interval[0], + self.baseline[1] + dataset.interval[0]) + bmin = baseline[0] if baseline[0] < tmin else tmin + bmax = baseline[1] if baseline[1] > tmax else tmax + else: + bmin = tmin + bmax = tmax epochs = mne.Epochs(raw_f, events, event_id=event_id, - tmin=tmin, tmax=tmax, proj=False, - baseline=None, preload=True, + tmin=bmin, tmax=bmax, proj=False, + baseline=baseline, preload=True, verbose=False, picks=picks, + event_repeated='drop', on_missing='ignore') + if bmin < tmin or bmax > tmax: + epochs.crop(tmin=tmin, tmax=tmax) if self.resample is not None: epochs = epochs.resample(self.resample) # rescale to work with uV @@ -181,11 +193,16 @@ def get_data(self, dataset, subjects=None, return_epochs=False): A dataset instance. subjects: List of int List of subject number + return_epochs: boolean + This flag specifies whether to return only the data array or the + complete processed mne.Epochs returns ------- - X : np.ndarray + X : Union[np.ndarray, mne.Epochs] the data that will be used as features for the model + Note: if return_epochs=True, this is mne.Epochs + if return_epochs=False, this is np.ndarray labels: np.ndarray the labels for training / evaluating the model metadata: pd.DataFrame diff --git a/moabb/paradigms/motor_imagery.py b/moabb/paradigms/motor_imagery.py index 53802bb36..9715a527c 100644 --- a/moabb/paradigms/motor_imagery.py +++ b/moabb/paradigms/motor_imagery.py @@ -36,6 +36,14 @@ class BaseMotorImagery(BaseParadigm): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) list of channel to select. If None, use all EEG channels available in the dataset. @@ -45,11 +53,12 @@ class BaseMotorImagery(BaseParadigm): """ def __init__(self, filters=([7, 35],), events=None, tmin=0.0, tmax=None, - channels=None, resample=None): + baseline=None, channels=None, resample=None): super().__init__() self.filters = filters - self.channels = channels self.events = events + self.channels = channels + self.baseline = baseline self.resample = resample if (tmax is not None): @@ -120,6 +129,14 @@ class SinglePass(BaseMotorImagery): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) list of channel to select. If None, use all EEG channels available in the dataset. @@ -298,6 +315,14 @@ class MotorImagery(SinglePass): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) list of channel to select. If None, use all EEG channels available in the dataset. diff --git a/moabb/paradigms/p300.py b/moabb/paradigms/p300.py index e6bf29551..e8b515fbe 100644 --- a/moabb/paradigms/p300.py +++ b/moabb/paradigms/p300.py @@ -39,6 +39,14 @@ class BaseP300(BaseParadigm): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) list of channel to select. If None, use all EEG channels available in the dataset. @@ -48,11 +56,12 @@ class BaseP300(BaseParadigm): """ def __init__(self, filters=([1, 24],), events=None, tmin=0.0, tmax=None, - channels=None, resample=None): + baseline=None, channels=None, resample=None): super().__init__() self.filters = filters - self.channels = channels self.events = events + self.channels = channels + self.baseline = baseline self.resample = resample if (tmax is not None): @@ -99,6 +108,12 @@ def process_raw(self, raw, dataset, return_epochs=False): # pick events, based on event_id try: + if (type(event_id['Target']) is list and + type(event_id['NonTarget']) == list): + event_id_new = dict(Target=1, NonTarget=0) + events = mne.merge_events(events, event_id['Target'], 1) + events = mne.merge_events(events, event_id['NonTarget'], 0) + event_id = event_id_new events = mne.pick_events(events, include=list(event_id.values())) except RuntimeError: # skip raw if no event found @@ -118,11 +133,22 @@ def process_raw(self, raw, dataset, return_epochs=False): raw_f = raw.copy().filter(fmin, fmax, method='iir', picks=picks, verbose=False) # epoch data + baseline = self.baseline + if baseline is not None: + baseline = (self.baseline[0] + dataset.interval[0], + self.baseline[1] + dataset.interval[0]) + bmin = baseline[0] if baseline[0] < tmin else tmin + bmax = baseline[1] if baseline[1] > tmax else tmax + else: + bmin = tmin + bmax = tmax epochs = mne.Epochs(raw_f, events, event_id=event_id, - tmin=tmin, tmax=tmax, proj=False, - baseline=None, preload=True, + tmin=bmin, tmax=bmax, proj=False, + baseline=baseline, preload=True, verbose=False, picks=picks, on_missing='ignore') + if bmin < tmin or bmax > tmax: + epochs.crop(tmin=tmin, tmax=tmax) if self.resample is not None: epochs = epochs.resample(self.resample) # rescale to work with uV @@ -187,6 +213,14 @@ class SinglePass(BaseP300): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) list of channel to select. If None, use all EEG channels available in the dataset. diff --git a/moabb/paradigms/ssvep.py b/moabb/paradigms/ssvep.py index 9eddc96ae..80295fb64 100644 --- a/moabb/paradigms/ssvep.py +++ b/moabb/paradigms/ssvep.py @@ -35,6 +35,14 @@ class BaseSSVEP(BaseParadigm): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) List of channel to select. If None, use all EEG channels available in the dataset. @@ -44,11 +52,12 @@ class BaseSSVEP(BaseParadigm): """ def __init__(self, filters=[(7, 45)], events=None, n_classes=2, tmin=0.0, - tmax=None, channels=None, resample=None): + tmax=None, baseline=None, channels=None, resample=None): super().__init__() self.filters = filters self.events = events self.n_classes = n_classes + self.baseline = baseline self.channels = channels self.resample = resample @@ -155,6 +164,14 @@ class SSVEP(BaseSSVEP): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) List of channel to select. If None, use all EEG channels available in the dataset. @@ -200,6 +217,14 @@ class FilterBankSSVEP(BaseSSVEP): 5 second after the begining of the task as defined in the dataset. If None, use the dataset value. + baseline: None | tuple of length 2 + The time interval to consider as “baseline” when applying baseline + correction. If None, do not apply baseline correction. + If a tuple (a, b), the interval is between a and b (in seconds), + including the endpoints. + Correction is applied by computing the mean of the baseline period + and subtracting it from the data (see mne.Epochs) + channels: list of str | None (default None) List of channel to select. If None, use all EEG channels available in the dataset. diff --git a/moabb/tests/paradigms.py b/moabb/tests/paradigms.py index 480b13fc0..a3b177f8c 100644 --- a/moabb/tests/paradigms.py +++ b/moabb/tests/paradigms.py @@ -130,15 +130,16 @@ class Test_P300(unittest.TestCase): def test_BaseP300_paradigm(self): paradigm = SimpleP300() - dataset = FakeDataset(paradigm='p300') + dataset = FakeDataset(paradigm='p300', + event_list=['Target', 'NonTarget']) X, labels, metadata = paradigm.get_data(dataset, subjects=[1]) # we should have all the same length self.assertEqual(len(X), len(labels), len(metadata)) # X must be a 3D Array self.assertEqual(len(X.shape), 3) - # labels must contain 3 values - self.assertEqual(len(np.unique(labels)), 3) + # labels must contain 2 values (Target/NonTarget) + self.assertEqual(len(np.unique(labels)), 2) # metadata must have subjets, sessions, runs self.assertTrue('subject' in metadata.columns) @@ -160,7 +161,8 @@ def test_BaseP300_tmintmax(self): def test_BaseP300_filters(self): # can work with filter bank paradigm = SimpleP300(filters=[[1, 12], [12, 24]]) - dataset = FakeDataset(paradigm='p300') + dataset = FakeDataset(paradigm='p300', + event_list=['Target', 'NonTarget']) X, labels, metadata = paradigm.get_data(dataset, subjects=[1]) # X must be a 4D Array @@ -171,7 +173,8 @@ def test_BaseP300_wrongevent(self): # test process_raw return empty list if raw does not contain any # selected event. cetain runs in dataset are event specific. paradigm = SimpleP300(filters=[[1, 12], [12, 24]]) - dataset = FakeDataset(paradigm='p300') + dataset = FakeDataset(paradigm='p300', + event_list=['Target', 'NonTarget']) raw = dataset.get_data([1])[1]['session_0']['run_0'] # add something on the event channel raw._data[-1] *= 10 diff --git a/requirements.txt b/requirements.txt index 242ffeb23..91754a47f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -scikit-learn +scikit-learn<0.24.0 # 0.24.0 breaks pyriemann, requires https://github.com/alexandrebarachant/pyRiemann/pull/93 mne >= 0.19 pyriemann matplotlib >= 2.2 seaborn >= 0.9.0 -h5py +h5py==2.10 # locked due to https://github.com/NeuroTechX/moabb/issues/122 pandas pyyaml coloredlogs diff --git a/tutorials/select_electrodes_resample.py b/tutorials/select_electrodes_resample.py new file mode 100644 index 000000000..3ec8fa4bb --- /dev/null +++ b/tutorials/select_electrodes_resample.py @@ -0,0 +1,88 @@ +""" +================================ +Select electrodes and resampling +================================ + +Within paradigm, it is possible to restrict analysis only to a subset of +electrodes and to resample to a specific sampling rate. There is also a +utility function to select common electrodes shared between datasets. +This tutorial demonstrates how to use this functionality. +""" +# Authors: Sylvain Chevallier +# +# License: BSD (3-clause) +from moabb.datasets import BNCI2014001, Zhou2016 +from moabb.paradigms import LeftRightImagery +from moabb.evaluations import WithinSessionEvaluation +from moabb.datasets.utils import find_intersecting_channels + +from sklearn.pipeline import make_pipeline +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA +from sklearn.linear_model import LogisticRegression as LR + +from mne.decoding import CSP +from pyriemann.estimation import Covariances +from pyriemann.tangentspace import TangentSpace + +import matplotlib.pyplot as plt +import moabb.analysis.plotting as moabb_plt + +############################################################################## +# Datasets +# -------- +# +# Select datasets for motor imagery + +datasets = [Zhou2016(), BNCI2014001()] + +############################################################################## +# Paradigm +# -------- +# +# Restrict further analysis to specified channels, here C3, C4, and Cz. +# Also, use a specific resampling. In this example, all datasets are +# set to 200 Hz. + +paradigm = LeftRightImagery(channels=['C3', 'C4', 'Cz'], resample=200.) + +############################################################################## +# Evaluation +# ---------- +# +# The evaluation is conducted on with CSP+LDA, only on the 3 electrodes, with +# a sampling rate of 200 Hz. + +evaluation = WithinSessionEvaluation(paradigm=paradigm, + datasets=datasets) +csp_lda = make_pipeline(CSP(n_components=2), LDA()) +ts_lr = make_pipeline(Covariances(estimator='oas'), + TangentSpace(metric='riemann'), + LR(C=1.0)) +results = evaluation.process({'csp+lda': csp_lda, 'ts+lr': ts_lr}) +print(results.head()) + +############################################################################## +# Electrode selection +# ------------------- +# +# It is possible to select the electrodes that are shared by all datasets +# using the `find_intersecting_channels` function. Datasets that have 0 +# overlap with others are discarded. It returns the set of common channels, +# as well as the list of datasets with valid channels. + +electrodes, datasets = find_intersecting_channels(datasets) +evaluation = WithinSessionEvaluation(paradigm=paradigm, + datasets=datasets, + overwrite=True) +results = evaluation.process({'csp+lda': csp_lda, 'ts+lr': ts_lr}) +print(results.head()) + +############################################################################## +# Plot results +# ------------ +# +# Compare the obtained results with the two pipelines, CSP+LDA and logistic +# regression computed in the tangent space of the covariance matrices. + +fig = moabb_plt.paired_plot(results, 'csp+lda', 'ts+lr') +plt.show()