camcan

Configuration file:

import pathlib

study_name = "age-prediction-benchmark"

bids_root = pathlib.Path(
    '/storage/store/data/camcan/BIDSsep/rest')

deriv_root = pathlib.Path('/storage/store3/derivatives/camcan-bids/derivatives')

subjects_dir = pathlib.Path('/storage/store/data/camcan-mne/freesurfer')

source_info_path_update = {'processing': 'autoreject',
                           'suffix': 'epo'}

inverse_targets = []

noise_cov = 'ad-hoc'

task = 'rest'
sessions = ['rest']  # keep empty for code flow
data_type = 'meg'
ch_types = ['meg']

analyze_channels = [
    'MEG0111', 'MEG0121', 'MEG0131', 'MEG0141', 'MEG0211',
    'MEG0221', 'MEG0231', 'MEG0241', 'MEG0311', 'MEG0321', 'MEG0331',
    'MEG0341', 'MEG0411', 'MEG0421', 'MEG0431', 'MEG0441', 'MEG0511',
    'MEG0521', 'MEG0531', 'MEG0541', 'MEG0611', 'MEG0621', 'MEG0631',
    'MEG0641', 'MEG0711', 'MEG0721', 'MEG0731', 'MEG0741', 'MEG0811',
    'MEG0821', 'MEG0911', 'MEG0921', 'MEG0931', 'MEG0941', 'MEG1011',
    'MEG1021', 'MEG1031', 'MEG1041', 'MEG1111', 'MEG1121', 'MEG1131',
    'MEG1141', 'MEG1211', 'MEG1221', 'MEG1231', 'MEG1241', 'MEG1311',
    'MEG1321', 'MEG1331', 'MEG1341', 'MEG1411', 'MEG1421', 'MEG1431',
    'MEG1441', 'MEG1511', 'MEG1521', 'MEG1531', 'MEG1541', 'MEG1611',
    'MEG1621', 'MEG1631', 'MEG1641', 'MEG1711', 'MEG1721', 'MEG1731',
    'MEG1741', 'MEG1811', 'MEG1821', 'MEG1831', 'MEG1841', 'MEG1911',
    'MEG1921', 'MEG1931', 'MEG1941', 'MEG2011', 'MEG2021', 'MEG2031',
    'MEG2041', 'MEG2111', 'MEG2121', 'MEG2131', 'MEG2141', 'MEG2211',
    'MEG2221', 'MEG2231', 'MEG2241', 'MEG2311', 'MEG2321', 'MEG2331',
    'MEG2341', 'MEG2411', 'MEG2421', 'MEG2431', 'MEG2441', 'MEG2511',
    'MEG2521', 'MEG2531', 'MEG2541', 'MEG2611', 'MEG2621', 'MEG2631',
    'MEG2641'
]

l_freq = 0.1
h_freq = 49

eeg_reference = []

eog_channels = []

find_breaks = False

n_proj_eog = 1

reject = None

on_rename_missing_events = "warn"

N_JOBS = 30

decim = 5  # Cam-CAN has 1000 Hz; Cuban Human Brain Project 200Hz

mf_st_duration = 10.

# XXX the values below differ from our previous papers but would be in line
# with the other EEG data used in this benchmark
epochs_tmin = 0.
epochs_tmax = 10.
rest_epochs_overlap = 0.
rest_epochs_duration = 10.
baseline = None

mf_cal_fname = '/storage/store/data/camcan-mne/Cam-CAN_sss_cal.dat'
mf_ctc_fname = '/storage/store/data/camcan-mne/Cam-CAN_ct_sparse.fif'

find_flat_channels_meg = True
find_noisy_channels_meg = True
use_maxwell_filter = True
run_source_estimation = True
use_template_mri = "fsaverage_small"
adjust_coreg = True

event_repeated = "drop"
l_trans_bandwidth = "auto"

h_trans_bandwidth = "auto"

random_state = 42

shortest_event = 1

log_level = "info"

mne_log_level = "error"

# on_error = 'continue'
on_error = "continue"

N_JOBS = 40
subjects = ['CC110033']

chbp

Configuration file:

import pathlib
import mne

study_name = "age-prediction-benchmark"

bids_root = pathlib.Path(
    "/storage/store3/data/CHBMP_EEG_and_MRI/ds_bids_chbmp")

deriv_root = pathlib.Path("/storage/store3/derivatives/CHBMP_EEG_and_MRI/")
# "/storage/store2/derivatives/eeg-pred-modeling-summer-school/")

subjects_dir = pathlib.Path('/storage/store/data/camcan-mne/freesurfer')

source_info_path_update = {'processing': 'autoreject',
                           'suffix': 'epo'}

inverse_targets = []

noise_cov = 'ad-hoc'

task = "protmap"

sessions = []  # keep empty for code flow
data_type = "eeg"
ch_types = ["eeg"]

analyze_channels = [
    "AF3", "AF4", "C1", "C2", "C3", "C4", "C5", "C6", "CP1", "CP2", "CP3",
    "CP4", "CP5", "CP6", "Cz", "F1", "F2", "F3", "F4", "F5", "F6", "F7",
    "F8", "FC1", "FC2", "FC3", "FC4", "FC5", "FC6", "Fp1", "Fp2", "Fz",
    "O1", "O2", "P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "PO3",
    "PO4", "PO5", "PO6", "PO7", "PO8", "Pz", "T7", "T8", "TP7", "TP8",
]

eeg_template_montage = mne.channels.make_standard_montage("standard_1005")
eeg_template_montage.rename_channels({"FFT7h": "FFC7h", "FFT8h": "FFC8h"})

l_freq = 0.1
h_freq = 49

eeg_reference = []

find_breaks = False

n_proj_eog = 1

ssp_reject_eog = "autoreject_global"

reject = None

on_error = "abort"
on_rename_missing_events = "warn"

N_JOBS = 30

epochs_tmin = 0
epochs_tmax = 10
baseline = None

run_source_estimation = True
use_template_mri = True

rename_events = {
    "artefacto": "artefact",
    "discontinuity": "discontinuity",
    "electrodes artifacts": "artefact",
    "eyes closed": "eyes/closed",
    "eyes opened": "eyes/open",
    "fotoestimulacion": "photic_stimulation",
    "hiperventilacion 1": "hyperventilation/1",
    "hiperventilacion 2": "hyperventilation/2",
    "hiperventilacion 3": "hyperventilation/3",
    "hyperventilation 1": "hyperventilation/1",
    "hyperventilation 2": "hyperventilation/2",
    "hyperventilation 3": "hyperventilation/3",
    "ojos abiertos": "eyes/open",
    "ojos cerrados": "eyes/closed",
    "photic stimulation": "photic_stimulation",
    "recuperacion": "recovery",
    "recuperation": "recovery",
}

conditions = ["eyes/open", "eyes/closed"]

event_repeated = "drop"
l_trans_bandwidth = "auto"

h_trans_bandwidth = "auto"


random_state = 42

shortest_event = 1

log_level = "info"

mne_log_level = "error"

# on_error = 'continue'
on_error = "continue"

lemon

Configuration file:

import argparse
import os
import pathlib
from tkinter import BOTTOM
import urllib.request
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
import mne
from mne.io.brainvision.brainvision import _aux_vhdr_info

from mne_bids import write_raw_bids, print_dir_tree, make_report, BIDSPath

lemon_info = pd.read_csv(
    "./META_File_IDs_Age_Gender_Education_Drug_Smoke_SKID_LEMON.csv")
lemon_info = lemon_info.set_index("ID")
eeg_subjects = pd.read_csv('./lemon_eeg_subjects.csv')
lemon_info = lemon_info.loc[eeg_subjects.subject]
lemon_info['gender'] = lemon_info['Gender_ 1=female_2=male'].map({1: 2, 2: 1})
lemon_info['age_guess'] = np.array(
  lemon_info['Age'].str.split('-').tolist(), dtype=np.int).mean(1)
subjects = list(lemon_info.index)

def convert_lemon_to_bids(lemon_data_dir, bids_save_dir, n_jobs=1, DEBUG=False):
    """Convert TUAB dataset to BIDS format.

    Parameters
    ----------
    lemon_data_dir : str
        Directory where the original LEMON dataset is saved, e.g.
        `/storage/store3/data/LEMON_RAW`.
    bids_save_dir : str
        Directory where to save the BIDS version of the dataset.
    n_jobs : None | int
        Number of jobs for parallelization.
    """
    subjects_ = subjects
    if DEBUG:
        subjects_ = subjects[:1]

    good_subjects = Parallel(n_jobs=n_jobs)(
        delayed(_convert_subject)(subject, lemon_data_dir, bids_save_dir)
        for subject in subjects_) 
    subjects_ = [sub for sub in good_subjects if not isinstance(sub, tuple)]
    _, bad_subjects, errs = zip(*[
        sub for sub in good_subjects if isinstance(sub, tuple)])
    bad_subjects = pd.DataFrame(
        dict(subjects= bad_subjects, error=errs))
    bad_subjects.to_csv(
        '/storage/store3/data/LEMON_EEG_BIDS/bids_conv_erros.csv')
    # update the participants file as LEMON has no official age data
    participants = pd.read_csv(
        "/storage/store3/data/LEMON_EEG_BIDS/participants.tsv", sep='\t')
    participants = participants.set_index("participant_id")
    participants.loc[subjects_, 'age'] = lemon_info.loc[subjects_, 'age_guess']
    participants.to_csv(
        "/storage/store3/data/LEMON_EEG_BIDS/participants.tsv", sep='\t')


def _convert_subject(subject, data_path, bids_save_dir):
    """Get the work done for one subject"""
    try:
        fname = pathlib.Path(data_path) / subject / "RSEEG" / f"{subject}.vhdr"    
        raw = mne.io.read_raw_brainvision(fname)

        raw.set_channel_types({"VEOG": "eog"})
        montage = mne.channels.make_standard_montage('standard_1005')
        raw.set_montage(montage)
        sub_id = subject.strip("sub-")
        raw.info['subject_info'] = {
            'participant_id': sub_id,
            'sex': lemon_info.loc[subject, 'gender'],
            'age': lemon_info.loc[subject, 'age_guess'],
            # XXX LEMON shares no public age 
            'hand': lemon_info.loc[subject, 'Handedness']
        }
        events, _ = mne.events_from_annotations(raw)

        events = events[(events[:, 2] == 200) | (events[:, 2] == 210)]
        event_id = {"eyes/open": 200, "eyes/closed": 210}
        bids_path = BIDSPath(
            subject=sub_id, session=None, task='RSEEG',
            run=None,
            root=bids_save_dir, datatype='eeg', check=True)

        write_raw_bids(
            raw,
            bids_path,
            events_data=events,
            event_id=event_id,
            overwrite=True
        )
    except Exception as err:
        print(err)
        return ("BAD", subject, err)
    return subject


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Convert LEMON to BIDS.')
    parser.add_argument(
        '--lemon_data_dir', type=str,
        default='/storage/store3/data/LEMON_RAW',
        help='Path to the original data.')
    parser.add_argument(
        '--bids_data_dir', type=str,
        default=pathlib.Path("/storage/store3/data/LEMON_EEG_BIDS"),
        help='Path to where the converted data should be saved.')
    parser.add_argument(
        '--n_jobs', type=int, default=1,
        help='number of parallel processes to use (default: 1)')
    parser.add_argument(
        '--DEBUG', type=bool, default=False,
        help='activate debugging mode')
    args = parser.parse_args()

    convert_lemon_to_bids(
        args.lemon_data_dir, args.bids_data_dir, n_jobs=args.n_jobs,
        DEBUG=args.DEBUG)

    print_dir_tree(args.bids_data_dir)
    print(make_report(args.bids_data_dir))

tuab

Configuration file:

from sys import path
from pathlib import Path
import mne

study_name = "age-prediction-benchmark"

# On drago
N_JOBS = 80
bids_root = Path("/storage/store2/data/TUAB-healthy-bids-bv")
#deriv_root = Path("/storage/store3/derivatives/TUAB-healthy-bids2")
deriv_root = Path("/storage/store3/derivatives/TUAB-healthy-bids3")
subjects_dir = Path('/storage/store/data/camcan-mne/freesurfer')

source_info_path_update = {'processing': 'autoreject',
                           'suffix': 'epo'}

eeg_template_montage = mne.channels.make_standard_montage("standard_1005")
eeg_template_montage.rename_channels(
    {ch: ch + '-REF' for ch in eeg_template_montage.ch_names})

inverse_targets = []

noise_cov = 'ad-hoc'
eeg_reference = []  # Tuab has a custom reference

# subjects = ['00002355']

task = "rest"
# task = "normal"

analyze_channels = ['Fp1', 'Fp2', 'F3', 'F4', 'C3', 'C4', 'P3', 'P4', 'O1',
                    'O2', 'F7', 'F8', 'T3', 'T4', 'T5', 'T6', 'A1', 'A2',
                    'Fz', 'Cz', 'Pz']

conditions = []

sessions = ["001"]

data_type = "eeg"
ch_types = ["eeg"]

l_freq = 0.1
h_freq = 49
resample_sfreq = 200

find_breaks = False

spatial_filter = None

reject = None

on_error = "abort"
on_rename_missing_events = "warn"


epochs_tmin = 0
epochs_tmax = 10 - 1 / resample_sfreq
rest_epochs_duration = 10. - 1 / resample_sfreq
rest_epochs_overlap = 0.
baseline = None

run_source_estimation = True
use_template_mri = True

event_repeated = "drop"
l_trans_bandwidth = "auto"

h_trans_bandwidth = "auto"

random_state = 42

shortest_event = 1

log_level = "info"

mne_log_level = "info"
on_error = 'continue'
# on_error = "continue"

# on_error = 'abort'
# on_error = 'debug'

Benchmark results

camcan

Configuration file:

chbp

Configuration file:

lemon

Configuration file:

tuab

Configuration file: