Source code for clinicadl.split.splitter.single_split
fromtypingimportOptional,UnionfrompydanticimportNonNegativeFloat,ValidationInfo,field_validatorfromclinicadl.data.datasetsimportDatasetfromclinicadl.split.splitimportSplitfromclinicadl.split.splitter.splitterimport(Splitter,SplitterConfig,SubjectsSessionsSplit,)classSingleSplitConfig(SplitterConfig):""" Configuration for simple split. """_json_name:str="single_split_config"n_test:NonNegativeFloatstratification:list[str]p_categorical_threshold:NonNegativeFloatp_continuous_threshold:NonNegativeFloat@field_validator("p_categorical_threshold","p_continuous_threshold",mode="after")@classmethoddef_validate_thresholds(cls,value:Union[float,int],ctx:ValidationInfo)->float:"""Ensures that the thresholds are between 0 and 1."""ifnot(0<=value<=1):raiseValueError(f"'{ctx.field_name}' must be between 0 and 1, got {value}")returnvaluedef_check_split_dirs(self)->None:"""Checks the split directory."""self._check_split_dir(self.split_dir)
[docs]classSingleSplit(Splitter):""" To handle a single training-validation split, as opposed to :py:class:`~clinicadl.split.KFold` that can handle several splits. This object will read a split directory returned by :py:func:`~clinicadl.split.make_split` and can then be used to split any :py:class:`~clinicadl.data.datasets.Dataset` using :py:meth:`~SingleSplit.get_split`, provided that all the (participant, session) pairs in the dataset are mentioned in the split directory. Parameters ---------- split_dir : PathType The split directory, returned by :py:func:`~clinicadl.split.make_split`. Raises ------ FileNotFoundError If ``split_dir`` does not exist or if a required file is missing in this directory. """_config_type=SingleSplitConfig
[docs]defget_split(self,dataset:Dataset,eval_dataset:Optional[Dataset]=None)->Split:""" Splits a dataset according to the split found in the split directory. Parameters ---------- dataset : Dataset The dataset to split. eval_dataset : Optional[Dataset], default=None If not ``None``, it will be understood as the dataset from which the validation dataset should be created, and ``dataset`` will be the dataset from which the training dataset will be created (see examples). If ``None``, both training and validation datasets are built from ``dataset``. Returns ------- Split A :py:class:`~clinicadl.split.Split` object, with the training and validation datasets. Examples -------- .. code-block:: >>> df # a quick look at the data participant_id session_id 0 sub-000 ses-M000 1 sub-000 ses-M003 2 sub-010 ses-M003 3 sub-010 ses-M012 4 sub-100 ses-M000 5 sub-100 ses-M012 6 sub-999 ses-M099 7 sub-999 ses-M999 .. code-block:: from clinicadl.split import SingleSplit from clinicadl.data.datasets import BidsDataset from clinicadl.transforms import TransformsHandler, extraction dataset = BidsDataset( "bids_dir", data=df, transforms=TransformsHandler(extraction=extraction.Patch(patch_size=64)), ... ) splitter = SingleSplit("split_dir") split = splitter.get_split(dataset) .. code-block:: >>> split.train_dataset.df participant_id session_id 0 sub-000 ses-M000 1 sub-000 ses-M003 2 sub-100 ses-M000 3 sub-100 ses-M012 4 sub-999 ses-M099 5 sub-999 ses-M999 >>> split.val_dataset.df participant_id session_id 0 sub-010 ses-M003 Now, let's say you want to train your model on patches, but evaluate it on images: .. code-block:: eval_dataset = BidsDataset( "bids_dir", data=df, transforms=TransformsHandler(), ... ) split = splitter.get_split(dataset, eval_dataset=eval_dataset) .. code-block:: >>> split.train_dataset[0].spatial_shape (64, 64, 64) >>> split.val_dataset[0].spatial_shape (181, 217, 181) """returnself._get_split(dataset,eval_dataset=eval_dataset)
def_read_splits(self)->list[SubjectsSessionsSplit]:""" Load the split from the tsv files in 'split_dir'. """return[self._read_split(self.config.split_dir)]