Source code for clinicadl.io.bids.reader

from __future__ import annotations

import os
import re
from pathlib import Path
from typing import Annotated, Optional

from pydantic import (
    ConfigDict,
    Field,
    StringConstraints,
    field_validator,
    model_validator,
)
from typing_extensions import Self

from clinicadl.utils.bids import BidsEntity, Session, Subject
from clinicadl.utils.config import ClinicaDLConfig, ObjectConfig
from clinicadl.utils.enum import BaseEnum
from clinicadl.utils.json import read_json
from clinicadl.utils.objects import HasConfig, equal_if_config_equal
from clinicadl.utils.typing import PathType

from .file_type import BidsFileType

NO_FILE_FOUND = "no file found"


class DatasetType(str, BaseEnum):
    """DatasetTypes allowed by the BIDS specification
    (https://bids-specification.readthedocs.io/en/stable/glossary.html#datasettype-metadata)"""

    RAW = "raw"
    DERIVATIVE = "derivative"
    STUDY = "study"


class BidsConfig(ObjectConfig["Bids"]):
    """
    Config class for ``Bids``.
    """

    path: Path

    @field_validator("path", mode="after")
    def _resolve_path(cls, v: Path) -> Path:
        return v.resolve()

    @classmethod
    def _get_class(cls):
        return Bids


SemVer = Annotated[str, StringConstraints(pattern=r"^\d+\.\d+\.\d+$")]


class DatasetDescription(ClinicaDLConfig):
    """
    The information stored in a BIDS' dataset description file.
    """

    name: str = Field(alias="Name")
    bids_version: SemVer = Field(alias="BIDSVersion")
    dataset_type: DatasetType = Field(alias="DatasetType")

    model_config = ConfigDict(
        validate_by_name=True,
        validate_by_alias=True,
        extra="allow",
    )

    @property
    def is_caps(self) -> bool:
        return "CAPSVersion" in self.__pydantic_extra__

    @classmethod
    def from_json(cls, json_path, **kwargs):
        return cls(**read_json(json_path))

    @model_validator(mode="after")
    def _validate_caps_version(self) -> Self:
        if self.is_caps:
            assert (
                self.dataset_type == DatasetType.DERIVATIVE
            ), f"If the directory is a CAPS, DatasetType must be 'derivative' in dataset_description.json. Got: '{self.dataset_type}'"

        return self


[docs] @equal_if_config_equal class Bids(HasConfig[BidsConfig]): """ A class to read :term:`BIDS` datasets or :term:`BIDS derivatives <BIDS derivative>` (including :term:`CAPS`). The directory is expected to contain the mandatory :bids:`dataset_description.json <modality-agnostic-files/dataset-description.html#dataset_descriptionjson>` file, with the key ``"DatasetType"`` (whose value can be either ``"raw"``, ``"derivative"`` or ``"study"``). Depending on the value of ``DatasetType``, the expected organisation is different: - ``"raw"``: default organisation, the subject-specific folders are in the root directory. The tensors saved by ``ClinicaDL`` will be in ``derivatives/tensors``. - ``"study"``: :bids:`study organisation <common-principles.html#study-dataset>`, the subject-specific folders are in ``sourcedata/raw``. The tensors will be saved in ``derivatives/tensors``. - ``"derivative"``: - if ``"CAPSVersion"`` is in ``dataset_description.json``, the directory will be understood as a :term:`CAPS`. The subject-specific folders are expected in ``subjects``, and the tensors will be saved in ``../tensors``. - otherwise, it is interpreted as a BIDS derivative. The subject-specific folders are expected in the root directory, and the tensors will be saved in ``../tensors``. Parameters ---------- directory : str | Path The path to the BIDS-like directory. Examples -------- The default BIDS organisation: .. code-block:: bash bids <- this path is passed to the Bids object ├── dataset_description.json <- contains "DatasetType": "raw" ├── sub-... ... └── derivatives └── tensors <- where tensors will be saved The "study" organization: .. code-block:: bash study <- this path is passed to the Bids object ├── dataset_description.json <- contains "DatasetType": "study" ├── derivatives │ └── tensors <- where tensors will be saved └── sourcedata └── raw ├── sub-... A BIDS derivative: .. code-block:: bash ├── bids_derivative <- this path is passed to the Bids object │ ├── dataset_description.json <- contains "DatasetType": "derivative" │ ├── sub-... │ ... └── tensors <- where tensors will be saved A CAPS: .. code-block:: bash ├── caps <- this path is passed to the Bids object │ ├── dataset_description.json <- contains "CAPSVersion" │ └── subjects │ ├── sub-... │ ... └── tensors <- where tensors will be saved """ _config_type = BidsConfig DATASET_DESC_FILENAME = "dataset_description.json" def __init__(self, path: PathType): self.config = self._config_type(path=path) self.path = self.config.path self.dataset_desc = self._read_bids_description(self.path) @classmethod def _read_bids_description(cls, bids_dir: Path) -> DatasetDescription: """ Gets the DatasetType and determines whether the dataset is a CAPS or not. """ data_desc_path = bids_dir / cls.DATASET_DESC_FILENAME if not data_desc_path.exists(): raise FileNotFoundError( f"A BIDS (or a derivative) must contain a dataset_description.json. Nothing found at: {data_desc_path}" ) return DatasetDescription.from_json(data_desc_path) @property def participants_dir(self) -> Path: """ Where the subject-specific directories are stored. """ if self.dataset_desc.is_caps: return self.path / "subjects" elif self.dataset_desc.dataset_type == DatasetType.STUDY: return self.path / "sourcedata" / "raw" return self.path @property def tensors_dir(self) -> Path: """ Where the tensors produced by ``ClinicaDL`` are saved. """ if self.dataset_desc.dataset_type == DatasetType.DERIVATIVE: return self.path.parent / "tensors" return self.path / "derivatives" / "tensors"
[docs] def get_path( self, file_type: BidsFileType, participant_id: Optional[str] = None, session_id: Optional[str] = None, ) -> Path: """ To get the path of a file in the BIDS-like directory. The specifications of the file to find are given via a :py:class:`~clinicadl.io.bids.BidsFileType`. The user can also give a participant id and a session id if the wanted file is subject- and session- specific. Parameters ---------- file_type : BidsFileType The specifications of the file to find. participant_id : Optional[str], default=None The participant id (e.g., ``"sub-xxx"``), if the file is subject-specific. session_id : Optional[str], default=None The session id (e.g., ``"ses-xxx"``), if the file is session-specific. Returns ------- Path The file matching all the requirements. Raises ------ RuntimeError If no corresponding file is found, or if several corresponding files are found. Examples -------- .. code-block:: bash bids ├── sub-001 │ ├── ses-M000 │ │ └── anat │ │ │ └── sub-001_ses-M000_space-MNI152NLin2009cSym_res-1x1x1_T1w.nii │ │ └── sub-001_ses-M000_scans.tsv │ └── sub-001_sessions.tsv ... └── space-MNI152NLin2009cSym_participants.tsv .. code-block:: python >>> from clinicadl.io.bids import Bids, BidsFileType >>> bids = Bids("bids") >>> bids.get_path( file_type=BidsFileType( suffix="T1w", data_type="anat", with_entities={"space": "MNI152NLin2009cSym"}, ), participant_id="sub-001", session_id="ses-M000", ) Path("bids/sub-001/ses-M000/anat/sub-001_ses-M000_space-MNI152NLin2009cSym_res-1x1x1_T1w.nii") >>> bids.get_path( file_type=BidsFileType( suffix="scans", extension=".tsv", ), participant_id="sub-001", session_id="ses-M000", ) Path("bids/sub-001/ses-M000/sub-001_ses-M000_scans.tsv") >>> bids.get_path( file_type=BidsFileType( suffix="sessions", extension=".tsv", ), participant_id="sub-001", ) Path("bids/sub-001/sub-001_sessions.tsv") >>> bids.get_path( file_type=BidsFileType( suffix="participants", extension=".tsv", with_entities={"space": "MNI152NLin2009cSym"}, ), ) Path("bids/space-MNI152NLin2009cSym_participants.tsv") """ dir_ = self._find_root(participant_id, session_id) selected_files = [] for root, _, files in os.walk(dir_): for file in files: full_path = os.path.join(root, file) rel_path = os.path.relpath(full_path, dir_) try: if file_type.match(rel_path, participant_id, session_id): selected_files.append(Path(full_path)) except (ValueError, AssertionError): # not a BIDS file continue error_msg = f"For ({participant_id} | {session_id}), an error occurred while trying to get {file_type}: " if len(selected_files) > 1: error_msg += "more than 1 file found:\n" for found_file in selected_files: error_msg += f"\t * {found_file}\n" raise RuntimeError(error_msg) elif len(selected_files) == 0: error_msg += NO_FILE_FOUND raise RuntimeError(error_msg) else: return selected_files[0]
[docs] def has_file_type( self, participant_id: str, session_id: str, file_type: BidsFileType ) -> bool: """ To check if a participant has a file type for a specified session. In practice, it will just check that :py:meth:`get_path` returns a file. Parameters ---------- participant_id : str The participant id (e.g., ``"sub-xxx"``). session_id : str The session id (e.g., ``"ses-xxx"``). file_type : BidsFileType The :py:class:`~clinicadl.io.BidsFileType` containing the specifications of the file to check. Returns ------- bool Whether the participant has a file type for the specified session. Raises ------ RuntimeError If several corresponding files are found. Examples -------- .. code-block:: bash bids ├── sub-001 │ ├── ses-M000 │ │ └── anat │ │ ├── sub-001_ses-M000_space-MNI152NLin2009cSym_res-1x1x1_T1w.nii │ │ └── sub-001_ses-M000_T1w.nii ... .. code-block:: python >>> from clinicadl.io.bids import Bids, BidsFileType >>> bids = Bids("bids") >>> bids.has_file_type( file_type=BidsFileType( suffix="T1w", data_type="anat", with_entities={"space": "MNI152NLin2009cSym"}, ), participant_id="sub-001", session_id="ses-M000", ) True >>> bids.has_file_type( file_type=BidsFileType( suffix="T1w", data_type="anat", ), participant_id="sub-001", session_id="ses-M000", ) RuntimeError >>> bids.has_file_type( file_type=BidsFileType( suffix="FLAIR", data_type="anat", ), participant_id="sub-001", session_id="ses-M000", ) False """ try: self.get_path(file_type, participant_id, session_id) except RuntimeError as e: if NO_FILE_FOUND in str(e): return False raise return True
[docs] def build_path( self, file_type: BidsFileType, participant_id: Optional[str] = None, session_id: Optional[str] = None, ) -> Path: """ Builds the path to the file associated to the input file type and the potential participant and session ids. Parameters ---------- file_type : BidsFileType The :py:class:`~clinicadl.io.BidsFileType` containing the specifications of the path to create. .. note:: The entities in the ``without_entities`` attribute of the :py:class:`~clinicadl.io.BidsFileType` are not used here. participant_id : Optional[str], default=None The participant id (e.g., ``"sub-xxx"``), if the file must be subject-specific. session_id : Optional[str], default=None The session id (e.g., ``"ses-xxx"``), if the file must be subject-specific. Returns ------- Path The built path. Examples -------- .. code-block:: python >>> from clinicadl.io.bids import Bids, BidsFileType >>> bids = Bids("bids") >>> bids.build_path( file_type=BidsFileType( suffix="T1w", data_type="anat", with_entities={"space": "MNI152NLin2009cSym", "res": "1x1x1}, extension="nii", ), participant_id="sub-001", session_id="ses-M000", ) Path("bids/sub-001/ses-M000/anat/sub-001_ses-M000_space-MNI152NLin2009cSym_res-1x1x1_T1w.nii") >>> bids.build_path( file_type=BidsFileType( suffix="scans", extension=".tsv", ), participant_id="sub-001", session_id="ses-M000", ) Path("bids/sub-001/ses-M000/sub-001_ses-M000_scans.tsv") >>> bids.build_path( file_type=BidsFileType( suffix="sessions", extension=".tsv", ), participant_id="sub-001", ) Path("bids/sub-001/sub-001_sessions.tsv") >>> bids.build_path( file_type=BidsFileType( suffix="participants", extension=".tsv", with_entities={"space": "MNI152NLin2009cSym"}, ), ) Path("bids/space-MNI152NLin2009cSym_participants.tsv") """ dir_ = self._find_root(participant_id, session_id) filename_components = ( [ BidsEntity.from_key_value(key, value.pattern) for key, value in file_type.with_entities.items() ] if file_type.with_entities else [] ) filename_components.append(file_type.suffix.pattern) if session_id: filename_components.insert(0, session_id) if participant_id: filename_components.insert(0, participant_id) filename = "_".join(filename_components) folder = Path(file_type.data_type.pattern) if file_type.data_type else Path(".") return (dir_ / folder / filename).with_suffix(file_type.extension.pattern)
[docs] def get_participants_sessions_with( self, file_type: BidsFileType, ) -> set[tuple[str, str]]: """ Finds all the (participant, session) pairs which have a file matching a specified file type. In practice, it will get all the (participant, session) pairs for which :py:meth:`has_file_type` returns ``True``. Parameters ---------- file_type : BidsFileType The :py:class:`~clinicadl.io.BidsFileType` to match. Returns ------- set[tuple[str, str]] The (participant, session) pairs that have the specified file type. """ participants_sessions = self.get_all_participants_sessions() with_data_type = set() for participant, session in participants_sessions: if self.has_file_type(participant, session, file_type): with_data_type.add((participant, session)) return with_data_type
[docs] def get_all_participants_sessions( self, ) -> set[tuple[str, str]]: """ Finds all the (participant, session) pairs in the BIDS-like directory. Returns ------- set[tuple[str, str]] The (participant, session) pairs. """ participant_pattern = re.compile(Subject.pattern) session_pattern = re.compile(Session.pattern) participants_sessions = set() for f in os.scandir(self.participants_dir): if not f.is_dir(): continue if not participant_pattern.match(f.name): continue for f_ in os.scandir(f.path): if not f_.is_dir(): continue if not session_pattern.match(f_.name): continue participants_sessions.add((f.name, f_.name)) return participants_sessions
def _find_root( self, participant_id: Optional[str] = None, session_id: Optional[str] = None ) -> Path: """ Depending on the participant and session specifications, find the root directory to consider. """ if participant_id: sub = Subject(participant_id) root = self.participants_dir / sub if session_id: ses = Session(session_id) root /= ses else: assert session_id is None, "Cannot pass a session without a participant" root = self.path return root