import sys
!{sys.executable} -m pip install -r requirements.txt


import download_data

# This will take some time; you can skip data validation by setting check_hash=False, but that is not recommended.
### Uncomment this line to download the data:
# download_data.data_fetch(check_hash=True)


import getpass, subprocess
## Decrypt the data.
subprocess.call(['openssl', 'aes-256-cbc', '-md', 'sha256', 
                 '-d', '-a', '-in',
                 'ATLAS_R2.0_encrypted.tar.gz', '-out', 'ATLAS_R2.0.tar.gz',
                 '-pass', f'pass:{getpass.getpass("Enter password")}'])
subprocess.call(['tar', '-xzf', 'ATLAS_R2.0.tar.gz'])

*** WARNING : deprecated key derivation used.
Using -iter or -pbkdf2 would be better.

0


from stroke import indi_reformat
## Uncomment the following lines to correct the formatting.
atlas_2_path = 'ATLAS_R2.0/ATLAS_2'  # Assumes that it is in the current directory. Set this to the location on your drive.
destination = 'data'  # Assumes that we're in the stroke/ directory.
indi_reformat.bidsify_indi_atlas(atlas_path=atlas_2_path, destination_path=destination)


import bids
from bids import BIDSLayout
bids.config.set_option('extension_initial_dot', True)  # Warning suppression
data = BIDSLayout('data/train', derivatives=True).derivatives['ATLAS']

print(data)

/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/bids/config.py:39: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.
  warnings.warn("Setting 'extension_initial_dot' will be removed in pybids 0.16.",

BIDS Layout: ...s/data/train/derivatives/ATLAS | Subjects: 436 | Sessions: 436 | Runs: 0


# Pick random sample
import random
subjects = data.entities['subject'].unique()
sample = random.choice(subjects)

structural = data.get(subject=sample, suffix='T1w')[0]
mask = data.get(subject=sample, suffix='mask')[0]
print(structural.filename)
print(mask.filename)

sub-r031s022_ses-1_space-MNI152NLin2009aSym_T1w.nii.gz
sub-r031s022_ses-1_space-MNI152NLin2009aSym_label-L_desc-T1lesion_mask.nii.gz


%matplotlib inline
from stroke.nii_slice import qc_slice
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [14, 6]
qc_slice(structural.path, mask_path=mask.path, nslices=5)

# Beautify plot
plt.text(0,0, f'{sample}', bbox={'facecolor': 'white'});  # Label which subject
plt.tick_params(axis='both', labelbottom=False, labelleft=False)

/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/bids/config.py:39: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.
  warnings.warn("Setting 'extension_initial_dot' will be removed in pybids 0.16.",
/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/bids/config.py:39: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.
  warnings.warn("Setting 'extension_initial_dot' will be removed in pybids 0.16.",
/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/bids/config.py:39: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.
  warnings.warn("Setting 'extension_initial_dot' will be removed in pybids 0.16.",


print(f'Shape of structural scan: {structural.get_image().shape}')
print(structural.get_image().header)

Shape of structural scan: (197, 233, 189)
<class 'nibabel.nifti1.Nifti1Header'> object, endian='<'
sizeof_hdr      : 348
data_type       : b''
db_name         : b''
extents         : 0
session_error   : 0
regular         : b''
dim_info        : 0
dim             : [  3 197 233 189   1   1   1   1]
intent_p1       : 0.0
intent_p2       : 0.0
intent_p3       : 0.0
intent_code     : none
datatype        : float32
bitpix          : 32
slice_start     : 0
pixdim          : [1. 1. 1. 1. 0. 1. 1. 1.]
vox_offset      : 0.0
scl_slope       : nan
scl_inter       : nan
slice_end       : 0
slice_code      : unknown
xyzt_units      : 10
cal_max         : 0.0
cal_min         : 0.0
slice_duration  : 0.0
toffset         : 0.0
glmax           : 0
glmin           : 0
descrip         : b'FreeSurfer Jan 18 2017'
aux_file        : b''
qform_code      : scanner
sform_code      : scanner
quatern_b       : 0.0
quatern_c       : 0.0
quatern_d       : 0.0
qoffset_x       : -98.0
qoffset_y       : -134.0
qoffset_z       : -72.0
srow_x          : [  1.   0.   0. -98.]
srow_y          : [   0.    1.    0. -134.]
srow_z          : [  0.   0.   1. -72.]
intent_name     : b''
magic           : b'n+1'


from stroke.bids_loader import BIDSLoader
help(BIDSLoader)

Help on class BIDSLoader in module stroke.bids_loader:

class BIDSLoader(builtins.object)
 |  BIDSLoader(root_dir: str, data_entities: list, target_entities: list, batch_size: int = 1, data_derivatives_names: list = None, target_derivatives_names: list = None, root_list: list = None, label_names: list = None)
 |  
 |  BIDS-compatible data loader used for classifying BIDS datasets.
 |  Parameters
 |  ----------
 |  root_dir : str
 |      BIDS root directory; subject directories should be immediately below this (e.g. root_dir/sub-123)
 |  data_entities : list [dict]
 |      List of dictionaries, where each dictionary contains BIDS entities that will uniquely match data. Multiple
 |      dictionaries should be used to if multiple files will be used for prediction.
 |      Empty entries ({'subject': ''}) indicate that that entry should match across samples (e.g., [{'session': ''}]
 |      would ensure that entities from a returned sample are from the same session, but that any value is valid.
 |      For example: ({'subject': '', 'session': '1', 'desc': 'Normalized'}, {'session': '1', 'desc': 'defaced'}) would
 |      return samples of two images: The first could be 'sub-123_ses-1_desc-Normalized.nii.gz', and the second would be
 |      'sub-123_ses-1_desc-defaced.nii.gz'. The subject entity matches, session is restricted to "1", and the
 |      description entitiy is used to differentiate between them.
 |  target_entities : list [dict]
 |      Same as data_entities, but for the prediction target.
 |  batch_size : int
 |      Optional. Size of the batch to train the estimator. Default: 1.
 |  data_derivatives_names : list [str]
 |      Optional. If an entry in data_entities is BIDS derivatives data, its name should be listed here. Entries
 |      that don't correspond to derivatives should be listed as None. Default: [None for _ in data_entities]
 |  target_derivatives_names : list [str]
 |      Optional. If an entry in target_entities is BIDS derivatives data, its name should be listed here. Entries
 |      that don't correspond to derivatives should be listed as None. Default: [None for _ in target_entities]
 |  root_list : list
 |      Reserved. Not yet implemented. List of BIDS root directories, if data must be loaded from different BIDS
 |      directories. There must be exactly len(data_entities) + len(target_entities) entries in the list, with the
 |      order corresponding to the order of the data_entities, followed by the target_entities.
 |  label_names : list [str]
 |      Names of the values of the target, if any.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, root_dir: str, data_entities: list, target_entities: list, batch_size: int = 1, data_derivatives_names: list = None, target_derivatives_names: list = None, root_list: list = None, label_names: list = None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __len__(self)
 |  
 |  load_batch(self, indices: list)
 |      Loads a batch of N images and returns the data/target in arrays.
 |      Parameters
 |      ----------
 |      indices : list [int]
 |          List of indices to load.
 |      Returns
 |      -------
 |      np.array
 |          Array of shape (len(indices), num_data, *image.shape) containing data.
 |      np.array
 |          Array of shape (len(indices), num_target, *image.shape) containing data.
 |  
 |  load_sample(self, idx: int)
 |      Loads the sample at idx.
 |      Parameters
 |      ----------
 |      idx : int
 |          Index of the sample to load. Max valid value is len(BIDSClassifier)-1
 |      
 |      Returns
 |      -------
 |      np.array
 |          Array of shape (num_data, *image.shape) containing the data.
 |      np.array
 |          Array of shape (num_target, *image.shape) containing the target.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  get_matching_images(image_to_match: bids.layout.models.BIDSImageFile, bids_dataset: bids.layout.layout.BIDSLayout, matching_entities: list = None, required_entities: dict = None)
 |      Returns a list of images from the BIDS dataset that has the specified required_entities and has the same
 |      value for entities listed in matching_entities as the image_to_match.
 |      Example: for an image "sub-123_ses-1_T1w.nii" with matching_entities ['ses'] and required_entities
 |      {'suffix': 'FLAIR'}, the image "sub-123_ses-1_FLAIR.nii" would match, but "sub-123_ses-2_FLAIR.nii" would not.
 |      Parameters
 |      ----------
 |      required_entities: dict
 |          Entity-value dictionary that are required.
 |      matching_entities: list
 |          List of entities that must match, if present, between the previous image and the one to fetch.
 |      image_to_match: BIDSImageFile
 |          Image to use as reference for matching_entities.
 |      bids_dataset: BIDSLayout
 |          BIDS dataset from which to fetch the new image.
 |      
 |      Returns
 |      -------
 |      list [BIDSImageFile]
 |          BIDS image file matching the input specifications. Empty if there are no matches.
 |  
 |  load_image_tuple(image_tuple: tuple, dtype=<class 'numpy.float32'>)
 |      Loads the tuple and returns it in an array
 |      Parameters
 |      ----------
 |      image_tuple : tuple (BIDSImageFile,)
 |          Tuple of BIDSImageFile to be loaded and returned in an array
 |      Returns
 |      -------
 |      np.array
 |          Loaded data
 |  
 |  load_image_tuple_list(image_list: list, dtype=<class 'numpy.float32'>)
 |      Loads each image in the tuple and returns in a single array; different tuples in the list are assumed to be
 |      batches. The returned array will be of shape (len(image_list), len(image_tuple), *image.shape
 |      Parameters
 |      ----------
 |      image_list : list [tuple]
 |          List of tuples containing BIDSImageFile
 |      
 |      Returns
 |      -------
 |      np.array
 |          Loaded data.
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)


training_set = BIDSLoader(root_dir='data/train/',
                          data_entities={'subject': '',
                                         'session': '',
                                         'suffix': 'T1w'},
                          target_entities={'label': 'L',
                                          'desc': 'T1lesion',
                                          'suffix': 'mask'},
                          data_derivatives_names=['ATLAS'],
                          target_derivatives_names=['ATLAS'],
                          label_names=['not lesion', 'lesion'])


print(f'Image: {training_set.data_list[0]}')
print(f'Mask: {training_set.target_list[0]}')

Image: (<BIDSImageFile filename='/home/frcaud/ramp-board/ramp_deployment/ramp-kits/stroke_lesions/data/train/derivatives/ATLAS/sub-r001s002/ses-1/anat/sub-r001s002_ses-1_space-MNI152NLin2009aSym_T1w.nii.gz'>,)
Mask: (<BIDSImageFile filename='/home/frcaud/ramp-board/ramp_deployment/ramp-kits/stroke_lesions/data/train/derivatives/ATLAS/sub-r001s002/ses-1/anat/sub-r001s002_ses-1_space-MNI152NLin2009aSym_label-L_desc-T1lesion_mask.nii.gz'>,)


data, target = training_set.load_sample(0)
print(f'data shape: {data.shape}')
print(f'target shape: {target.shape}')

data shape: (1, 197, 233, 189)
target shape: (1, 197, 233, 189)


data, target = training_set.load_batch([0,1,2])
print(f'data shape: {data.shape}')
print(f'target shape: {target.shape}')

data shape: (3, 1, 197, 233, 189)
target shape: (3, 1, 197, 233, 189)


from nilearn.image import load_img
from nilearn import plotting

/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/nilearn/datasets/__init__.py:93: FutureWarning: Fetchers from the nilearn.datasets module will be updated in version 0.9 to return python strings instead of bytes and Pandas dataframes instead of Numpy arrays.
  warn("Fetchers from the nilearn.datasets module will be "


def plot_overlap(path_to_t1, path_to_lesion, title='overlap'):
    fig = plt.figure(1, figsize=(14, 5), frameon=False, dpi=50)
    ax = plt.gca()
    plotting.plot_roi(path_to_lesion, path_to_t1, title=title,
                       axes=ax,
                       draw_cross=False,
                       annotate=False, 
                       cmap='autumn',  # the lesions will be shown in red
                       cut_coords=(0, 0, 0)
                       )
plot_overlap(path_to_t1=structural.path,
             path_to_lesion=mask.path,
             title=f'overlap, {sample}')

/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/numpy/ma/core.py:2831: UserWarning: Warning: converting a masked element to nan.
  _data = np.array(data, dtype=dtype, copy=copy,


from stroke.stroke_config import bids_loader_train, bids_loader_test  # Default loaders for starting kit
import numpy as np

train_fraction = np.floor(len(bids_loader_train.target_list)/10)
test_fraction = np.floor(len(bids_loader_train.target_list))
subset_size = np.min([50, train_fraction, test_fraction])  # number of subjects to examine; reduce if it takes too long to load
subset_size = int(subset_size)
lesion_load_train = np.zeros((subset_size,))  # lesion load per subject
lesion_load_test = np.zeros((subset_size,))

# Load the data, compute the lesion load, store
for i in range(subset_size):
    target_tuple = bids_loader_train.target_list[i]
    target = bids_loader_train.load_image_tuple(target_tuple)
    lesion_load_train[i] = np.sum(target)
for i in range(subset_size):
    target_tuple = bids_loader_test.target_list[i]
    target = bids_loader_test.load_image_tuple(target_tuple)
    lesion_load_test[i] = np.sum(target)


# Plot the distribution!
from matplotlib import pyplot as plt
plt.hist(np.log10(lesion_load_train), bins=10)  # Note the log scale
plt.hist(np.log10(lesion_load_test), bins=10, alpha=0.5)
plt.xlabel('log10 Lesion Load')
plt.ylabel('Frequency of lesion load')
plt.title('Distribution of lesion load in sets')
plt.legend(['Training set', 'Testing set'])

<matplotlib.legend.Legend at 0x7f3e29c498b0>


from submissions.sample.estimator import BIDSEstimator  # Load estimator
methods = [d for d in dir(BIDSEstimator) if not d.startswith('_')]  # Get methods; remove private ones.
print(methods)

['fit', 'fit_partial', 'get_params', 'predict', 'predict_proba', 'set_params']


help(BIDSEstimator.fit_partial)

Help on function fit_partial in module submissions.sample.estimator:

fit_partial(self, X, y)
    Fit the estimator using the input data (X) and target (y). Assumes that the inputs represent only a fraction
    of the data and that it will be called multiple times while using the dataset. I.e., learning rates and adaptive
    parameters should not be entirely recalculated with each call to this method. Required.
    This estimator in particular does nothing.
    Parameters
    ----------
    X : np.array
        Data of the form (n_samples, n_channels, *image.shape)
    y : np.array
        Target (labels) of the form (n_samples, n_channels, *image.shape)
    
    Returns
    -------
    None


help(BIDSEstimator.predict_proba)

Help on function predict_proba in module submissions.sample.estimator:

predict_proba(self, X)
    Applies the data to the estimator to produce a prediction. The output can be continuous to represent the
    relative confidence the estimator has in the prediction. Optional.
    Typically, correct but uncertain predictions are rewarded less. Similarly, incorrect but uncertain predictions
    are punished less severely.
    This estimator always returns 1.
    Parameters
    ----------
    X : np.array
        Data of the form (n_samples, n_channels, *image.shape)
    
    Returns
    -------
    np.array
        Prediction made by the estimator.


!ramp-test --quick-test --submission sample

/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/bids/config.py:39: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.
  warnings.warn("Setting 'extension_initial_dot' will be removed in pybids 0.16.",
/home/frcaud/anaconda3/envs/ramp-stroke/lib/python3.8/site-packages/bids/config.py:39: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.
  warnings.warn("Setting 'extension_initial_dot' will be removed in pybids 0.16.",
Testing ATLAS Stroke Lesion Segmentation
Reading train and test files from ./data/ ...
Reading cv ...
Training submissions/sample ...
CV fold 0
	score  Sørensen–Dice Coefficient      time
	train                      0.012  1.257557
	valid                      0.004  0.000009
	test                       0.017  0.000004
CV fold 1
	score  Sørensen–Dice Coefficient      time
	train                      0.012  1.323619
	valid                      0.009  0.000010
	test                       0.017  0.000005
CV fold 2
	score  Sørensen–Dice Coefficient      time
	train                      0.006  1.301342
	valid                      0.018  0.000007
	test                       0.017  0.000004
CV fold 3
	score  Sørensen–Dice Coefficient      time
	train                      0.018  1.360942
	valid                      0.001  0.000010
	test                       0.017  0.000005
CV fold 4
	score  Sørensen–Dice Coefficient      time
	train                      0.012  1.287049
	valid                      0.009  0.000006
	test                       0.017  0.000005
----------------------------
Mean CV scores
----------------------------
	score Sørensen–Dice Coefficient        time
	train            0.012 ± 0.0036  1.3 ± 0.03
	valid            0.008 ± 0.0058   0.0 ± 0.0
	test                0.017 ± 0.0   0.0 ± 0.0
----------------------------
Bagged scores
----------------------------
	score  Sørensen–Dice Coefficient
	valid38;5;12m0m                          0
	test38;5;1m0m                           0


from stroke.scoring import DiceCoeff
help(DiceCoeff)

Help on class DiceCoeff in module stroke.scoring:

class DiceCoeff(rampwf.score_types.base.BaseScoreType)
 |  DiceCoeff(name='Sørensen–Dice Coefficient', precision=3)
 |  
 |  Method resolution order:
 |      DiceCoeff
 |      rampwf.score_types.base.BaseScoreType
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __call__(self, y_true: tuple, y_pred: <built-in function array>)
 |      Call self as a function.
 |  
 |  __init__(self, name='Sørensen–Dice Coefficient', precision=3)
 |      Scoring class for RAMP workflows. When called, returns the Sørensen–Dice coefficient. Note that this
 |      implementation allows for continuous values in the prediction.
 |      Parameters
 |      ----------
 |      name : str
 |          Name of the score; used for creating column headers.
 |      precision : str
 |          Numerical precision.
 |  
 |  score_function(self, Y_true: <built-in function array>, Y_pred: <built-in function array>)
 |      Returns the Sørensen–Dice coefficient for the input images. If multiple samples are given, the mean score
 |      is returned.
 |      Parameters
 |      ----------
 |      true_bids : BIDSPrediction
 |          BIDSPrediction with true_bids.y_true set as an array.
 |      pred : np.array
 |          Array containing the predicted labels of an image.
 |      
 |      Returns
 |      -------
 |      dice_coefficient : float
 |          Sørensen–Dice coefficient.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  calc_score(array_0: <built-in function array>, array_1: <built-in function array>)
 |      Performs the calculation to get the Sørensen–Dice coefficient.
 |      Parameters
 |      ----------
 |      array_0 : np.array
 |          First array to score.
 |      array_1 : np.array
 |          Second array to score.
 |      
 |      Returns
 |      -------
 |      float
 |          Sørensen–Dice coefficient
 |  
 |  calc_score_parts(array_0: <built-in function array>, array_1: <built-in function array>)
 |      Computes the three parts of the Sørensen–Dice coefficient: overlap and 2 positives
 |      Parameters
 |      ----------
 |      array_0
 |      array_1
 |      
 |      Returns
 |      -------
 |      tuple
 |          Tuple containing (overlap, sum(array_0), sum(array_1)
 |  
 |  check_y_pred_dimensions(array_0: <built-in function array>, array_1: <built-in function array>)
 |      Checks that the dimensions of the inputs are consistent.
 |      Parameters
 |      ----------
 |      array_0 : np.array
 |          First array to check.
 |      array_1 : np.array
 |          Second array to check
 |      
 |      Returns
 |      -------
 |      bool
 |  
 |  unpack_data(array_0: <built-in function array>, output_shape: <built-in function array>)
 |      Unpacks boolean data packed via np.packbits into appropriate shape, discarding excess bytes
 |      Parameters
 |      ----------
 |      array_0 : np.array
 |          np.uint8 array to unpack
 |      output_shape : tuple
 |          Expected shape of output.
 |      
 |      Returns
 |      -------
 |      np.array
 |          Unpacked, reshape array
 |  
 |  ----------------------------------------------------------------------
 |  Readonly properties inherited from rampwf.score_types.base.BaseScoreType:
 |  
 |  worst
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from rampwf.score_types.base.BaseScoreType:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)


help(DiceCoeff.calc_score)

Help on function calc_score in module stroke.scoring:

calc_score(array_0: <built-in function array>, array_1: <built-in function array>)
    Performs the calculation to get the Sørensen–Dice coefficient.
    Parameters
    ----------
    array_0 : np.array
        First array to score.
    array_1 : np.array
        Second array to score.
    
    Returns
    -------
    float
        Sørensen–Dice coefficient

RAMP: segmentation of the brain lesions ¶

Table of contents¶

1. Introduction ¶

Clinical/research motivation

References¶

Objective of the challenge¶

Setup

Prerequisites¶

Data exploration

Data download and reformat¶

1. Archive download¶

2. Encryption password¶

3. OpenSSL + tar extraction¶

4. Data reformatting¶

Viewing the data files¶

Visualize data

Paired Loading

Lesion analysis

Sample prediction algorithms

Dummy solution (predict only 1s)¶

Submitting to RAMP¶

Scores used

Submission¶