diff --git a/patch-bids-tables.sql b/patch-bids-tables.sql new file mode 100644 index 000000000..a9c9ef07d --- /dev/null +++ b/patch-bids-tables.sql @@ -0,0 +1,52 @@ +CREATE TABLE `bids_dataset` ( + `ID` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, + `Path` VARCHAR(255) NOT NULL, + `InsertTime` DATETIME NOT NULL, + PRIMARY KEY (`ID`), + UNIQUE KEY `bids_dataset_path_unique` (`Path`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +CREATE TABLE `bids_file` ( + `ID` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, + `DatasetID` INT(10) UNSIGNED NOT NULL, + `Path` VARCHAR(255) NOT NULL, + `SourcePath` VARCHAR(255) NULL, + `InsertTime` DATETIME NOT NULL, + `Blake2bHash` CHAR(128) NOT NULL, + `Derivative` TINYINT(1) NOT NULL, + PRIMARY KEY (`ID`), + UNIQUE KEY `bids_file_path_unique` (`Path`), + KEY `bids_file_dataset_id_fk_idx` (`DatasetID`), + CONSTRAINT `bids_file_dataset_id_fk` + FOREIGN KEY (`DatasetID`) REFERENCES `bids_dataset` (`ID`) ON DELETE CASCADE +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +ALTER TABLE `files` + ADD COLUMN `BidsInfoID` INT(10) UNSIGNED NULL, + ADD KEY `files_bids_info_id_fk_idx` (`BidsInfoID`), + ADD CONSTRAINT `files_bids_info_id_fk` + FOREIGN KEY (`BidsInfoID`) REFERENCES `bids_file` (`ID`) ON DELETE SET NULL; + +ALTER TABLE `physiological_file` + ADD COLUMN `BidsInfoID` INT(10) UNSIGNED NULL, + ADD KEY `physiological_file_bids_info_id_fk_idx` (`BidsInfoID`), + ADD CONSTRAINT `physiological_file_bids_info_id_fk` + FOREIGN KEY (`BidsInfoID`) REFERENCES `bids_file` (`ID`) ON DELETE SET NULL; + +ALTER TABLE `physiological_event_file` + ADD COLUMN `BidsInfoID` INT(10) UNSIGNED NULL, + ADD KEY `physiological_event_file_bids_info_id_fk_idx` (`BidsInfoID`), + ADD CONSTRAINT `physiological_event_file_bids_info_id_fk` + FOREIGN KEY (`BidsInfoID`) REFERENCES `bids_file` (`ID`) ON DELETE SET NULL; + +ALTER TABLE `meg_ctf_head_shape_file` + ADD COLUMN `BidsInfoID` INT(10) UNSIGNED NULL, + ADD COLUMN `InsertTime` DATETIME NOT NULL, + ADD KEY `meg_ctf_head_shape_file_bids_info_id_fk_idx` (`BidsInfoID`), + ADD CONSTRAINT `meg_ctf_head_shape_file_bids_info_id_fk` + FOREIGN KEY (`BidsInfoID`) REFERENCES `bids_file` (`ID`) ON DELETE SET NULL; + +ALTER TABLE `physiological_file` + DROP FOREIGN KEY `physiological_file_head_shape_file_id_fk`, + ADD CONSTRAINT `physiological_file_head_shape_file_id_fk` + FOREIGN KEY (`HeadShapeFileID`) REFERENCES `meg_ctf_head_shape_file` (`ID`) ON DELETE SET NULL; diff --git a/python/lib/db/models/bids_dataset.py b/python/lib/db/models/bids_dataset.py new file mode 100644 index 000000000..7121b253e --- /dev/null +++ b/python/lib/db/models/bids_dataset.py @@ -0,0 +1,30 @@ +from datetime import datetime +from pathlib import Path + +from sqlalchemy.orm import Mapped, mapped_column + +from lib.db.base import Base +from lib.db.decorators.string_path import StringPath + + +class DbBidsDataset(Base): + """ + A LORIS BIDS dataset. + """ + + __tablename__ = 'bids_dataset' + + id: Mapped[int] = mapped_column('ID', primary_key=True, autoincrement=True) + """ + The ID of this BIDS dataset. + """ + + path: Mapped[Path] = mapped_column('Path', StringPath, unique=True) + """ + The path of this BIDS dataset, relative to the LORIS data directory. + """ + + insert_time: Mapped[datetime] = mapped_column('InsertTime', default=datetime.now) + """ + The time at which this BIDS dataset was created in LORIS. + """ diff --git a/python/lib/db/models/bids_file.py b/python/lib/db/models/bids_file.py new file mode 100644 index 000000000..6c0530039 --- /dev/null +++ b/python/lib/db/models/bids_file.py @@ -0,0 +1,58 @@ +from datetime import datetime +from pathlib import Path + +from sqlalchemy import ForeignKey +from sqlalchemy.orm import Mapped, mapped_column, relationship + +import lib.db.models.bids_dataset as db_bids_dataset +from lib.db.base import Base +from lib.db.decorators.int_bool import IntBool +from lib.db.decorators.string_path import StringPath + + +class DbBidsFile(Base): + """ + A file within a LORIS BIDS dataset. + """ + + __tablename__ = 'bids_file' + + id: Mapped[int] = mapped_column('ID', primary_key=True, autoincrement=True) + """ + The ID of this BIDS file. + """ + + dataset_id: Mapped[int] = mapped_column('DatasetID', ForeignKey('bids_dataset.ID', ondelete='CASCADE')) + """ + The ID of the BIDS dataset to which this file belongs. + """ + + path: Mapped[Path] = mapped_column('Path', StringPath, unique=True) + """ + The path of this file relative to its LORIS BIDS dataset. + """ + + source_path: Mapped[Path | None] = mapped_column('SourcePath', StringPath) + """ + The source path of this file relative to the BIDS dataset from which it was imported. + """ + + insert_time: Mapped[datetime] = mapped_column('InsertTime', default=datetime.now) + """ + The time at which this BIDS dataset was created in LORIS. + """ + + blake2b_hash: Mapped[str] = mapped_column('Blake2bHash') + """ + The BLAKE2b hash of this file. + """ + + derivative: Mapped[bool] = mapped_column('Derivative', IntBool) + """ + Whether this file is a BIDS derivative. + """ + + dataset: Mapped['db_bids_dataset.DbBidsDataset'] = relationship('DbBidsDataset') + """ + The BIDS dataset to which this file belongs. + """ diff --git a/python/lib/db/models/file.py b/python/lib/db/models/file.py index ceb5599eb..04ae55011 100644 --- a/python/lib/db/models/file.py +++ b/python/lib/db/models/file.py @@ -4,6 +4,7 @@ from sqlalchemy import ForeignKey from sqlalchemy.orm import Mapped, mapped_column, relationship +import lib.db.models.bids_file as db_bids_file import lib.db.models.dicom_archive as db_dicom_archive import lib.db.models.file_parameter as db_file_parameter import lib.db.models.session as db_session @@ -40,6 +41,16 @@ class DbFile(Base): acquisition_order_per_modality : Mapped[int | None] = mapped_column('AcqOrderPerModality') acquisition_date : Mapped[date | None] = mapped_column('AcquisitionDate') + bids_info_id: Mapped[int | None] = mapped_column('BidsInfoID', ForeignKey('bids_file.ID', ondelete='SET NULL')) + """ + The ID of the BIDS information of this file, if any. + """ + + bids_info: Mapped['db_bids_file.DbBidsFile | None'] = relationship('DbBidsFile') + """ + The BIDS information of this file, if any. + """ + session: Mapped['db_session.DbSession'] = relationship('DbSession', back_populates='files') """ The session to which this file belongs. diff --git a/python/lib/db/models/meg_ctf_head_shape_file.py b/python/lib/db/models/meg_ctf_head_shape_file.py index 186d34504..7aca80499 100644 --- a/python/lib/db/models/meg_ctf_head_shape_file.py +++ b/python/lib/db/models/meg_ctf_head_shape_file.py @@ -1,7 +1,10 @@ +from datetime import datetime from pathlib import Path +from sqlalchemy import ForeignKey from sqlalchemy.orm import Mapped, mapped_column, relationship +import lib.db.models.bids_file as db_bids_file import lib.db.models.meg_ctf_head_shape_point as db_meg_ctf_head_shape_point from lib.db.base import Base from lib.db.decorators.string_path import StringPath @@ -20,11 +23,21 @@ class DbMegCtfHeadShapeFile(Base): ID of the head shape file. """ + bids_info_id: Mapped[int | None] = mapped_column('BidsInfoID', ForeignKey('bids_file.ID', ondelete='SET NULL')) + """ + The ID of the BIDS information of this head shape file, if any. + """ + path: Mapped[Path] = mapped_column('Path', StringPath) """ Path of the head shape file relative to the LORIS data directory. """ + insert_time: Mapped[datetime] = mapped_column('InsertTime', default=datetime.now) + """ + The time at which this head shape file was created in LORIS. + """ + blake2b_hash: Mapped[str] = mapped_column('Blake2bHash') """ Blake2B hash of the head shape file, which may be used to check that the on-disk file data @@ -35,3 +48,8 @@ class DbMegCtfHeadShapeFile(Base): """ 3D points present in the head shape file. """ + + bids_info: Mapped['db_bids_file.DbBidsFile | None'] = relationship('DbBidsFile') + """ + The BIDS information of this head shape file, if any. + """ diff --git a/python/lib/db/models/physio_event_file.py b/python/lib/db/models/physio_event_file.py index cc7b4c602..ca82ddfd4 100644 --- a/python/lib/db/models/physio_event_file.py +++ b/python/lib/db/models/physio_event_file.py @@ -4,6 +4,7 @@ from sqlalchemy import ForeignKey from sqlalchemy.orm import Mapped, mapped_column, relationship +import lib.db.models.bids_file as db_bids_file import lib.db.models.imaging_file_type as db_imaging_file_type import lib.db.models.physio_event_parameter as db_physio_event_parameter import lib.db.models.physio_file as db_physio_file @@ -24,8 +25,18 @@ class DbPhysioEventFile(Base): last_update : Mapped[datetime] = mapped_column('LastUpdate', default=datetime.now) last_written : Mapped[datetime] = mapped_column('LastWritten', default=datetime.now) + bids_info_id: Mapped[int | None] = mapped_column('BidsInfoID', ForeignKey('bids_file.ID', ondelete='SET NULL')) + """ + The ID of the BIDS information of this event file, if any. + """ + physio_file : Mapped['db_physio_file.DbPhysioFile | None'] = relationship('DbPhysioFile') project : Mapped['db_project.DbProject | None'] = relationship('DbProject') imaging_file_type : Mapped['db_imaging_file_type.DbImagingFileType | None'] = relationship('DbImagingFileType') task_events : Mapped[list['db_physio_task_event.DbPhysioTaskEvent']] = relationship('DbPhysioTaskEvent', back_populates='event_file') event_parameters : Mapped[list['db_physio_event_parameter.DbPhysioEventParameter']] = relationship('DbPhysioEventParameter', back_populates='event_file') + + bids_info: Mapped['db_bids_file.DbBidsFile | None'] = relationship('DbBidsFile') + """ + The BIDS information of this event file, if any. + """ diff --git a/python/lib/db/models/physio_file.py b/python/lib/db/models/physio_file.py index 03e014929..ac41e91d9 100644 --- a/python/lib/db/models/physio_file.py +++ b/python/lib/db/models/physio_file.py @@ -4,6 +4,7 @@ from sqlalchemy import ForeignKey from sqlalchemy.orm import Mapped, mapped_column, relationship +import lib.db.models.bids_file as db_bids_file import lib.db.models.meg_ctf_head_shape_file as db_meg_ctf_head_shape_file import lib.db.models.physio_channel as db_physio_channel import lib.db.models.physio_event_archive as db_physio_event_archive @@ -33,7 +34,12 @@ class DbPhysioFile(Base): index : Mapped[int | None] = mapped_column('Index') parent_id : Mapped[int | None] = mapped_column('ParentID') - head_shape_file_id: Mapped[int | None] = mapped_column('HeadShapeFileID', ForeignKey('meg_ctf_head_shape_file.ID')) + bids_info_id: Mapped[int | None] = mapped_column('BidsInfoID', ForeignKey('bids_file.ID', ondelete='SET NULL')) + """ + The ID of the BIDS information of this file, if any. + """ + + head_shape_file_id: Mapped[int | None] = mapped_column('HeadShapeFileID', ForeignKey('meg_ctf_head_shape_file.ID', ondelete='SET NULL')) """ ID of the head shape file associated to this file, which is only present for MEG CTF files. """ @@ -48,6 +54,11 @@ class DbPhysioFile(Base): event_files : Mapped[list['db_physio_event_file.DbPhysioEventFile']] = relationship('DbPhysioEventFile', back_populates='physio_file') task_events : Mapped[list['db_physio_task_event.DbPhysioTaskEvent']] = relationship('DbPhysioTaskEvent', back_populates='physio_file') + bids_info: Mapped['db_bids_file.DbBidsFile | None'] = relationship('DbBidsFile') + """ + The BIDS information of this file, if any. + """ + head_shape_file: Mapped['db_meg_ctf_head_shape_file.DbMegCtfHeadShapeFile | None'] = relationship('DbMegCtfHeadShapeFile') """ The head shape file associated to this file, which is only present for MEG CTF files. diff --git a/python/lib/db/queries/bids_dataset.py b/python/lib/db/queries/bids_dataset.py new file mode 100644 index 000000000..72c0637ff --- /dev/null +++ b/python/lib/db/queries/bids_dataset.py @@ -0,0 +1,16 @@ +from pathlib import Path + +from sqlalchemy import select +from sqlalchemy.orm import Session as Database + +from lib.db.models.bids_dataset import DbBidsDataset + + +def try_get_bids_dataset_with_path(db: Database, path: Path) -> DbBidsDataset | None: + """ + Get a BIDS dataset from the database using its path, or return `None` if no dataset is found. + """ + + return db.execute(select(DbBidsDataset) + .where(DbBidsDataset.path == path) + ).scalar_one_or_none() diff --git a/python/lib/db/queries/bids_file.py b/python/lib/db/queries/bids_file.py new file mode 100644 index 000000000..3026c0175 --- /dev/null +++ b/python/lib/db/queries/bids_file.py @@ -0,0 +1,18 @@ +from pathlib import Path + +from sqlalchemy import select +from sqlalchemy.orm import Session as Database + +from lib.db.models.bids_file import DbBidsFile + + +def try_get_bids_file_with_dataset_id_path(db: Database, dataset_id: int, path: Path) -> DbBidsFile | None: + """ + Get a BIDS file from the database using its dataset ID and path, or return `None` if no file is + found. + """ + + return db.execute(select(DbBidsFile) + .where(DbBidsFile.dataset_id == dataset_id) + .where(DbBidsFile.path == path) + ).scalar_one_or_none() diff --git a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py index c9a4d75a4..3bcc7c59e 100644 --- a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py +++ b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py @@ -676,6 +676,7 @@ def _register_into_files_and_parameter_file(self, nifti_rel_path): Path(nifti_rel_path), file_type, self.session, + None, self.scan_type, self.mri_scanner, self.dicom_archive, diff --git a/python/lib/imaging_lib/file.py b/python/lib/imaging_lib/file.py index ed754dd39..71628b702 100644 --- a/python/lib/imaging_lib/file.py +++ b/python/lib/imaging_lib/file.py @@ -2,6 +2,7 @@ from datetime import date, datetime from pathlib import Path +from lib.db.models.bids_file import DbBidsFile from lib.db.models.dicom_archive import DbDicomArchive from lib.db.models.file import DbFile from lib.db.models.imaging_file_type import DbImagingFileType @@ -16,6 +17,7 @@ def register_mri_file( file_path: Path, file_type: DbImagingFileType, session: DbSession, + bids_info: DbBidsFile | None, scan_type: DbMriScanType | None, scanner: DbMriScanner | None, dicom_archive: DbDicomArchive | None, @@ -46,6 +48,7 @@ def register_mri_file( echo_number = echo_number, phase_encoding_direction = phase_encoding_direction, source_file_id = None, + bids_info_id = bids_info.id if bids_info is not None else None, scan_type_id = scan_type.id if scan_type is not None else None, scanner_id = scanner.id if scanner is not None else None, dicom_archive_id = dicom_archive.id if dicom_archive is not None else None, diff --git a/python/lib/physio/events.py b/python/lib/physio/events.py index 6b4ee1cac..d560654d2 100644 --- a/python/lib/physio/events.py +++ b/python/lib/physio/events.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any +from lib.db.models.bids_file import DbBidsFile from lib.db.models.physio_task_event_hed import DbPhysioTaskEventHed from lib.db.models.physio_task_event_opt import DbPhysioTaskEventOpt from lib.db.models.physio_task_event import DbPhysioTaskEvent @@ -49,7 +50,12 @@ def from_file(physio_file: DbPhysioFile) -> 'EventDictFileSource': ) -def insert_event_dict_file(env: Env, source: EventDictFileSource, event_file_path: Path) -> DbPhysioEventFile: +def insert_event_dict_file( + env: Env, + bids_info: DbBidsFile, + source: EventDictFileSource, + event_file_path: Path, +) -> DbPhysioEventFile: """ Insert an event dictionary file into the LORIS database. """ @@ -59,6 +65,7 @@ def insert_event_dict_file(env: Env, source: EventDictFileSource, event_file_pat project_id = source.project.id, file_type = 'json', file_path = event_file_path, + bids_info_id = bids_info.id, ) env.db.add(event_dict_file) @@ -202,7 +209,12 @@ def parse_and_insert_event_dict( return tag_dict -def insert_events_file(env: Env, physio_file: DbPhysioFile, event_file_path: Path) -> DbPhysioEventFile: +def insert_events_file( + env: Env, + physio_file: DbPhysioFile, + bids_info: DbBidsFile, + event_file_path: Path, +) -> DbPhysioEventFile: """ Insert an events file into the LORIS database. """ @@ -212,6 +224,7 @@ def insert_events_file(env: Env, physio_file: DbPhysioFile, event_file_path: Pat project_id = physio_file.session.project.id, file_type = 'tsv', file_path = event_file_path, + bids_info_id = bids_info.id, ) env.db.add(event_dict_file) diff --git a/python/lib/physio/file.py b/python/lib/physio/file.py index e1891da3f..0042211b7 100644 --- a/python/lib/physio/file.py +++ b/python/lib/physio/file.py @@ -2,6 +2,7 @@ from datetime import datetime from pathlib import Path +from lib.db.models.bids_file import DbBidsFile from lib.db.models.imaging_file_type import DbImagingFileType from lib.db.models.physio_file import DbPhysioFile from lib.db.models.physio_modality import DbPhysioModality @@ -18,6 +19,7 @@ def insert_physio_file( modality: DbPhysioModality, output_type: DbPhysioOutputType, acquisition_time: datetime | None, + bids_info: DbBidsFile | None, ) -> DbPhysioFile: """ Insert a physiological file into the database. @@ -31,6 +33,7 @@ def insert_physio_file( output_type_id = output_type.id, acquisition_time = acquisition_time, inserted_by_user = getpass.getuser(), + bids_info_id = bids_info.id if bids_info is not None else None, ) env.db.add(file) diff --git a/python/loris_bids_importer/src/loris_bids_importer/acquisitions.py b/python/loris_bids_importer/src/loris_bids_importer/acquisitions.py index 41a663772..4d4f511e6 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/acquisitions.py +++ b/python/loris_bids_importer/src/loris_bids_importer/acquisitions.py @@ -5,16 +5,16 @@ from lib.logging import log, log_error from loris_bids_utils.info import BidsAcquisitionInfo -from loris_bids_importer.env import BidsImportEnv +from loris_bids_importer.importer import BidsImporter T = TypeVar('T') def import_bids_acquisitions( env: Env, - import_env: BidsImportEnv, + importer: BidsImporter, acquisitions: list[tuple[T, BidsAcquisitionInfo]], - importer: Callable[[T, BidsAcquisitionInfo], None] + import_function: Callable[[T, BidsAcquisitionInfo], None] ): """ Run an import function on a list of BIDS acquisitions, logging the overall import progress, @@ -28,9 +28,9 @@ def import_bids_acquisitions( ) try: - importer(acquisition, bids_info) + import_function(acquisition, bids_info) log(env, f"Successfully imported acquisition '{bids_info.name}'.") - import_env.imported_acquisitions_count += 1 + importer.imported_acquisitions_count += 1 except Exception as exception: log_error( env, @@ -40,4 +40,4 @@ def import_bids_acquisitions( "Skipping." ) ) - import_env.failed_acquisitions_count += 1 + importer.failed_acquisitions_count += 1 diff --git a/python/loris_bids_importer/src/loris_bids_importer/args.py b/python/loris_bids_importer/src/loris_bids_importer/args.py deleted file mode 100644 index b4d8f549d..000000000 --- a/python/loris_bids_importer/src/loris_bids_importer/args.py +++ /dev/null @@ -1,14 +0,0 @@ -from dataclasses import dataclass -from pathlib import Path -from typing import Literal - - -@dataclass -class Args: - source_bids_path: Path - type: Literal[None, 'raw', 'derivative'] - bids_validation: bool - create_candidate: bool - create_session: bool - copy: bool - verbose: bool diff --git a/python/loris_bids_importer/src/loris_bids_importer/channels.py b/python/loris_bids_importer/src/loris_bids_importer/channels.py index f75ce456a..e03593a6f 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/channels.py +++ b/python/loris_bids_importer/src/loris_bids_importer/channels.py @@ -14,12 +14,13 @@ from loris_utils.error import group_errors, group_errors_tuple from loris_bids_importer.copy_files import get_loris_bids_file_path -from loris_bids_importer.env import BidsImportEnv +from loris_bids_importer.dataset import get_or_create_loris_bids_file +from loris_bids_importer.importer import BidsImporter def insert_bids_channels_file( env: Env, - import_env: BidsImportEnv, + importer: BidsImporter, physio_file: DbPhysioFile, session: DbSession, acquisition: BidsAcquisitionInfo, @@ -30,7 +31,7 @@ def insert_bids_channels_file( """ loris_channels_file_path = get_loris_bids_file_path( - import_env, session, acquisition.data_type, channels_file.path + importer, session, acquisition.data_type, channels_file.path ) blake2_hash = compute_file_blake2b_hash(channels_file.path) @@ -48,6 +49,7 @@ def insert_bids_channels_file( ), ) + get_or_create_loris_bids_file(env, importer, channels_file.path, loris_channels_file_path) insert_physio_file_parameter(env, physio_file, 'channel_file_blake2b_hash', blake2_hash) env.db.flush() diff --git a/python/loris_bids_importer/src/loris_bids_importer/copy_files.py b/python/loris_bids_importer/src/loris_bids_importer/copy_files.py index ac9f068b5..fda010ac9 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/copy_files.py +++ b/python/loris_bids_importer/src/loris_bids_importer/copy_files.py @@ -2,50 +2,30 @@ import shutil from pathlib import Path -from lib.config import get_data_dir_path_config from lib.db.models.session import DbSession from lib.env import Env -from loris_bids_utils.files.dataset_description import BidsDatasetDescriptionJsonFile from loris_bids_utils.files.participants import BidsParticipantsTsvFile from loris_bids_utils.files.scans import BidsScansTsvFile -from loris_bids_importer.env import BidsImportEnv +from loris_bids_importer.dataset import get_or_create_loris_bids_file +from loris_bids_importer.importer import BidsImporter -def get_loris_bids_dataset_path(env: Env, dataset_description: BidsDatasetDescriptionJsonFile) -> Path: - """ - Get the LORIS BIDS directory path for the BIDS dataset to import, and create that directory if - it does not exist yet. - """ - - # Sanitize the dataset metadata to have a usable name for the directory. - dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', dataset_description.data['Name']) - dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', dataset_description.data['BIDSVersion']) - - data_dir_path = get_data_dir_path_config(env) - loris_bids_path = data_dir_path / 'bids_imports' / f'{dataset_name}_BIDSVersion_{dataset_version}' - - if not loris_bids_path.exists(): - loris_bids_path.mkdir() - - return loris_bids_path - - -def get_loris_bids_root_file_path(import_env: BidsImportEnv, file_path: Path) -> Path: +def get_loris_bids_root_file_path(importer: BidsImporter, file_path: Path) -> Path: """ Get the path of a BIDS file relative to the LORIS data directory, maintaining the same relative path in the LORIS BIDS dataset as within the source BIDS dataset. """ # In the import is run in no-copy mode, return the original file path. - if import_env.loris_bids_path is None: - return file_path.relative_to(import_env.data_dir_path) + if not importer.args.copy: + return file_path.relative_to(importer.data_dir_path) - return import_env.loris_bids_path / file_path.relative_to(import_env.source_bids_path) + return importer.loris_bids_dataset.path / file_path.relative_to(importer.args.source_bids_path) def get_loris_bids_file_path( - import_env: BidsImportEnv, + importer: BidsImporter, session: DbSession, data_type: str, file_path: Path, @@ -56,20 +36,20 @@ def get_loris_bids_file_path( """ # In the import is run in no-copy mode, return the original file path. - if import_env.loris_bids_path is None: - return file_path.relative_to(import_env.data_dir_path) + if not importer.args.copy: + return file_path.relative_to(importer.data_dir_path) # If the file is a derivative, the path is unpredictable, so return a copy of that path in the # LORIS BIDS dataset. if derivative: - return import_env.loris_bids_path / file_path.relative_to(import_env.source_bids_path) + return importer.loris_bids_dataset.path / file_path.relative_to(importer.args.source_bids_path) # Otherwise, normalize the subject and session directory names using the LORIS session # information. loris_file_name = get_loris_bids_file_name(file_path.name, session) return ( - import_env.loris_bids_path + importer.loris_bids_dataset.path / f'sub-{session.candidate.psc_id}' / f'ses-{session.visit_label}' / data_type @@ -91,34 +71,34 @@ def get_loris_bids_file_name(file_name: str, session: DbSession) -> str: return f'sub-{session.candidate.psc_id}_ses-{session.visit_label}_{file_name}' -def get_loris_scans_path(import_env: BidsImportEnv, scans_file: BidsScansTsvFile, session: DbSession) -> Path: +def get_loris_scans_path(importer: BidsImporter, scans_file: BidsScansTsvFile, session: DbSession) -> Path: """ Get the path of a `scans.tsv` file in LORIS, relative to the LORIS data directory. """ # In the import is run in no-copy mode, return the original file path. - if import_env.loris_bids_path is None: - return scans_file.path.relative_to(import_env.data_dir_path) + if not importer.args.copy: + return scans_file.path.relative_to(importer.data_dir_path) loris_file_name = get_loris_bids_file_name(scans_file.path.name, session) return ( - import_env.loris_bids_path + importer.loris_bids_dataset.path / f'sub-{session.candidate.psc_id}' / f'ses-{session.visit_label}' / loris_file_name ) -def copy_loris_bids_file(import_env: BidsImportEnv, file_path: Path, loris_file_path: Path): +def copy_loris_bids_file(importer: BidsImporter, file_path: Path, loris_file_path: Path): """ Copy a BIDS file to the LORIS data directory, unless the no-copy mode is enabled. """ # Do not copy the file in no-copy mode. - if import_env.loris_bids_path is None: + if not importer.args.copy: return - full_loris_file_path = import_env.data_dir_path / loris_file_path + full_loris_file_path = importer.data_dir_path / loris_file_path if full_loris_file_path.exists(): raise Exception(f"File '{loris_file_path}' already exists in the LORIS data directory.") @@ -130,31 +110,29 @@ def copy_loris_bids_file(import_env: BidsImportEnv, file_path: Path, loris_file_ shutil.copytree(file_path, full_loris_file_path) -def copy_bids_static_files(import_env: BidsImportEnv): +def copy_bids_static_files(env: Env, importer: BidsImporter): """ Copy the static files of the source BIDS dataset to the LORIS BIDS dataset. """ - # Do not copy files in no-copy mode. - if import_env.loris_bids_path is None: - return - for file_name in ['README', 'dataset_description.json']: - source_file_path = import_env.source_bids_path / file_name + source_file_path = importer.args.source_bids_path / file_name if not source_file_path.is_file(): continue - loris_file_path = import_env.loris_bids_path / file_name + loris_file_path = importer.loris_bids_dataset.path / file_name # Do not copy the file if it is already present during an incremental import. - if (import_env.data_dir_path / loris_file_path).is_file(): + if (importer.data_dir_path / loris_file_path).is_file(): continue - copy_loris_bids_file(import_env, source_file_path, loris_file_path) + get_or_create_loris_bids_file(env, importer, source_file_path, loris_file_path) + copy_loris_bids_file(importer, source_file_path, loris_file_path) def copy_bids_participants_file( - import_env: BidsImportEnv, + env: Env, + importer: BidsImporter, participants_file: BidsParticipantsTsvFile, loris_participants_path: Path, ): @@ -163,11 +141,13 @@ def copy_bids_participants_file( necessary. """ + get_or_create_loris_bids_file(env, importer, participants_file.path, loris_participants_path) + # Do not copy the file in no-copy mode. - if import_env.loris_bids_path is None: + if not importer.args.copy: return - participants_path = import_env.data_dir_path / loris_participants_path + participants_path = importer.data_dir_path / loris_participants_path if participants_path.exists(): participants_file.merge(BidsParticipantsTsvFile(participants_path)) @@ -175,16 +155,18 @@ def copy_bids_participants_file( participants_file.write(participants_path) -def copy_bids_scans_file(import_env: BidsImportEnv, scans_file: BidsScansTsvFile, loris_scans_path: Path): +def copy_bids_scans_file(env: Env, importer: BidsImporter, scans_file: BidsScansTsvFile, loris_scans_path: Path): """ Copy some `scans.tsv` rows into a LORIS `scans.tsv` file, creating it if necessary. """ + get_or_create_loris_bids_file(env, importer, scans_file.path, loris_scans_path) + # Do not copy the file in no-copy mode. - if import_env.loris_bids_path is None: + if not importer.args.copy: return - scans_path = import_env.data_dir_path / loris_scans_path + scans_path = importer.data_dir_path / loris_scans_path if scans_path.exists(): scans_file.merge(BidsScansTsvFile(scans_path)) diff --git a/python/loris_bids_importer/src/loris_bids_importer/dataset.py b/python/loris_bids_importer/src/loris_bids_importer/dataset.py new file mode 100644 index 000000000..9419cb5ab --- /dev/null +++ b/python/loris_bids_importer/src/loris_bids_importer/dataset.py @@ -0,0 +1,142 @@ +import re +from datetime import datetime +from pathlib import Path + +from lib.config import get_data_dir_path_config +from lib.db.models.bids_dataset import DbBidsDataset +from lib.db.models.bids_file import DbBidsFile +from lib.db.queries.bids_dataset import try_get_bids_dataset_with_path +from lib.db.queries.bids_file import try_get_bids_file_with_dataset_id_path +from lib.env import Env +from lib.logging import log_error_exit +from loris_bids_utils.reader import BidsDatasetReader +from loris_utils.crypto import compute_file_blake2b_hash + +from loris_bids_importer.importer import BidsImporter, BidsImporterArgs + + +def make_bids_importer(env: Env, args: BidsImporterArgs, bids: BidsDatasetReader) -> BidsImporter: + """ + Make the LORIS BIDS importer object from the BIDS import input information. + """ + + data_dir_path = get_data_dir_path_config(env) + + if args.copy: + loris_bids_path = get_copy_dataset_path(env, bids, data_dir_path) + else: + loris_bids_path = get_no_copy_dataset_path(env, bids, data_dir_path) + + loris_bids_dataset = get_or_create_loris_bids_dataset(env, loris_bids_path) + + return BidsImporter( + args = args, + data_dir_path = data_dir_path, + loris_bids_dataset = loris_bids_dataset + ) + + +def get_copy_dataset_path(env: Env, bids: BidsDatasetReader, data_dir_path: Path) -> Path: + """ + Get the LORIS BIDS dataset path relative to the LORIS data directory in the copy mode, creating + it if it does not exist yet. + """ + + try: + dataset_description = bids.dataset_description_file + except Exception as error: + log_error_exit(env, str(error)) + + if dataset_description is None: + log_error_exit( + env, + "No file 'dataset_description.json' found in the input BIDS dataset.", + ) + + # Sanitize the dataset metadata to have a usable name for the directory. + dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', dataset_description.data['Name']) + dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', dataset_description.data['BIDSVersion']) + + loris_bids_path = Path('bids_imports') / f'{dataset_name}_BIDSVersion_{dataset_version}' + + # Create the BIDS dataset directory if it does not exist yet. + (data_dir_path / loris_bids_path).mkdir(exist_ok=True) + + return loris_bids_path + + +def get_no_copy_dataset_path(env: Env, bids: BidsDatasetReader, data_dir_path: Path) -> Path: + """ + Get the LORIS BIDS dataset path relative to the LORIS data directory in the no-copy mode. + """ + + if not bids.path.is_relative_to(data_dir_path): + log_error_exit( + env, + "The source BIDS dataset should be inside the LORIS data directory in no-copy mode.", + ) + + return bids.path.relative_to(data_dir_path) + + +def get_or_create_loris_bids_dataset(env: Env, bids_path: Path) -> DbBidsDataset: + """ + Get a BIDS dataset from the database using its LORIS data-directory-relative path, + or create it if it does not already exist. + """ + + bids_dataset = try_get_bids_dataset_with_path(env.db, bids_path) + if bids_dataset is not None: + return bids_dataset + + bids_dataset = DbBidsDataset( + path = bids_path, + insert_time = datetime.now(), + ) + + env.db.add(bids_dataset) + env.db.flush() + + return bids_dataset + + +def get_or_create_loris_bids_file( + env: Env, + importer: BidsImporter, + source_file_path: Path, + loris_file_path: Path, +) -> DbBidsFile: + """ + Create or update the LORIS database record for a BIDS file. + """ + + # The LORIS file path is relative to the LORIS data directory, it needs to be made relative to + # its LORIS BIDS dataset instead. + bids_file_path = loris_file_path.relative_to(importer.loris_bids_dataset.path) + + source_bids_file_path = source_file_path.relative_to(importer.args.source_bids_path) + + derivative = source_bids_file_path.parts[0] == 'derivatives' + + blake2b_hash = compute_file_blake2b_hash(source_file_path) + + bids_file = try_get_bids_file_with_dataset_id_path(env.db, importer.loris_bids_dataset.id, bids_file_path) + + if bids_file is None: + bids_file = DbBidsFile( + dataset_id = importer.loris_bids_dataset.id, + path = bids_file_path, + source_path = source_bids_file_path, + insert_time = datetime.now(), + blake2b_hash = blake2b_hash, + derivative = derivative, + ) + + env.db.add(bids_file) + else: + bids_file.source_path = source_bids_file_path + bids_file.blake2b_hash = blake2b_hash + + env.db.flush() + + return bids_file diff --git a/python/loris_bids_importer/src/loris_bids_importer/eeg/main.py b/python/loris_bids_importer/src/loris_bids_importer/eeg/main.py index 0ea7714fe..0a35b3161 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/eeg/main.py +++ b/python/loris_bids_importer/src/loris_bids_importer/eeg/main.py @@ -29,10 +29,11 @@ from loris_bids_importer.archive import import_physio_event_archive, import_physio_file_archive from loris_bids_importer.channels import insert_bids_channels_file from loris_bids_importer.copy_files import copy_loris_bids_file, get_loris_bids_file_path, get_loris_scans_path +from loris_bids_importer.dataset import get_or_create_loris_bids_file from loris_bids_importer.eeg.physiological import Physiological -from loris_bids_importer.env import BidsImportEnv from loris_bids_importer.events import insert_bids_event_dict_file, insert_bids_events_file from loris_bids_importer.file_type import get_check_bids_imaging_file_type_from_extension +from loris_bids_importer.importer import BidsImporter from loris_bids_importer.physio import ( get_check_bids_physio_file_hash, get_check_bids_physio_modality, @@ -46,8 +47,8 @@ class Eeg: into the database by calling the loris_bids_importer.eeg.physiological class. """ - def __init__(self, env: Env, import_env: BidsImportEnv, bids_layout, bids_info: BidsDataTypeInfo, - session: DbSession, db, dataset_tag_dict, dataset_type): + def __init__(self, env: Env, importer: BidsImporter, bids_layout, bids_info: BidsDataTypeInfo, + session: DbSession, db, dataset_tag_dict): """ Constructor method for the Eeg class. @@ -60,8 +61,6 @@ def __init__(self, env: Env, import_env: BidsImportEnv, bids_layout, bids_info: :param info : The BIDS import pipeline information :param dataset_tag_dict : Dict of dataset-inherited HED tags :type dataset_tag_dict : dict - :param dataset_type : raw | derivative. Type of the dataset - :type dataset_type : string """ self.env = env @@ -71,14 +70,14 @@ def __init__(self, env: Env, import_env: BidsImportEnv, bids_layout, bids_info: # load the LORIS BIDS import root directory where the eeg files will # be copied - self.info = import_env - self.data_dir = self.info.data_dir_path + self.importer = importer + self.data_dir = self.importer.data_dir_path # load bids subject, visit and modality self.bids_info = bids_info # load dataset tag dict. Used to ensure HED tags aren't duplicated - self.dataset_tag_dict = dataset_tag_dict + self.dataset_tag_dict = dataset_tag_dict # load database handler object self.db = db @@ -99,13 +98,14 @@ def __init__(self, env: Env, import_env: BidsImportEnv, bids_layout, bids_info: self.scans_file = BidsScansTsvFile(Path(scans_file_path)) # register the data into LORIS - if (dataset_type and dataset_type == 'raw'): - self.register_data(detect=False) - elif (dataset_type and dataset_type == 'derivative'): - self.register_data(derivatives=True, detect=False) - else: - self.register_data() - self.register_data(derivatives=True) + match importer.args.type: + case 'raw': + self.register_data(detect=False) + case 'derivative': + self.register_data(derivatives=True, detect=False) + case None: + self.register_data() + self.register_data(derivatives=True) env.db.commit() @@ -216,7 +216,7 @@ def register_data(self, derivatives=False, detect=True): if get_ephys_visualization_enabled_config(self.env): create_physio_channels_chunks(self.env, eeg_file) - self.info.imported_acquisitions_count += 1 + self.importer.imported_acquisitions_count += 1 def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): """ @@ -288,6 +288,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): eeg_file_data = sidecar_json.data sidecar_json_path = self.copy_file_to_loris_bids_dir(sidecar_json.path, derivatives) + get_or_create_loris_bids_file(self.env, self.importer, sidecar_json.path, sidecar_json_path) eeg_file_data['eegjson_file'] = str(sidecar_json_path) json_blake2 = compute_file_blake2b_hash(sidecar_json.path) @@ -313,7 +314,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): print(f"ERROR: {error}") sys.exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - loris_scans_path = get_loris_scans_path(self.info, self.scans_file, self.session) + loris_scans_path = get_loris_scans_path(self.importer, self.scans_file, self.session) eeg_file_data['scans_tsv_file'] = loris_scans_path scans_blake2 = compute_file_blake2b_hash(self.scans_file.path) eeg_file_data['physiological_scans_tsv_file_bake2hash'] = scans_blake2 @@ -346,6 +347,13 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): eeg_file.path, derivatives ) + bids_info = get_or_create_loris_bids_file( + self.env, + self.importer, + eeg_file_path, + eeg_path, + ) + # insert the file along with its information into # physiological_file and physiological_parameter_file tables physio_file = insert_physio_file( @@ -355,13 +363,14 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): file_type, modality, output_type, - eeg_acq_time + eeg_acq_time, + bids_info, ) insert_physio_file_parameters(self.env, physio_file, eeg_file_data) self.env.db.commit() - if self.info.loris_bids_path: + if self.importer.args.copy: # If we copy the file in assembly_bids and # if the EEG file was a set file, then update the filename for the .set # and .fdt files in the .set file so it can find the proper file for @@ -437,6 +446,7 @@ def fetch_and_insert_electrode_file( # get the blake2b hash of the electrode file blake2 = compute_file_blake2b_hash(electrode_file.path) + get_or_create_loris_bids_file(self.env, self.importer, Path(electrode_file.path), electrode_path) # insert the electrode data in the database electrode_ids = physiological.insert_electrode_file( electrode_data, electrode_path, physiological_file, blake2 @@ -477,6 +487,14 @@ def fetch_and_insert_electrode_file( electrode_metadata = json.load(metadata_file) # get the blake2b hash of the json events file blake2 = compute_file_blake2b_hash(coordsystem_metadata_file.path) + + get_or_create_loris_bids_file( + self.env, + self.importer, + Path(coordsystem_metadata_file.path), + electrode_metadata_path, + ) + # insert event metadata in the database physiological.insert_electrode_metadata( electrode_metadata, @@ -533,7 +551,7 @@ def fetch_and_insert_channel_file( # Insert the channel data in the database. return insert_bids_channels_file( self.env, - self.info, + self.importer, physiological_file, self.session, self.bids_info, @@ -611,6 +629,7 @@ def fetch_and_insert_event_files( _, file_tag_dict = insert_bids_event_dict_file( self.env, + self.importer, EventDictFileSource.from_file(physiological_file), event_dict_file, event_metadata_path, @@ -628,6 +647,7 @@ def fetch_and_insert_event_files( # insert event data in the database insert_bids_events_file( self.env, + self.importer, physiological_file, events_data_file, event_path, @@ -658,7 +678,7 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False): """ loris_file_path = get_loris_bids_file_path( - self.info, + self.importer, self.session, self.bids_info.data_type, Path(file), @@ -669,7 +689,7 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False): # the same metadata file several times if it is shared acquisitions. A warning has been # added. # TODO: Properly handle metadata files shared across several acquisitions. - full_file_path = self.info.data_dir_path / loris_file_path + full_file_path = self.importer.data_dir_path / loris_file_path if full_file_path.exists(): log_warning( self.env, @@ -678,5 +698,5 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False): return loris_file_path - copy_loris_bids_file(self.info, Path(file), loris_file_path) + copy_loris_bids_file(self.importer, Path(file), loris_file_path) return loris_file_path diff --git a/python/loris_bids_importer/src/loris_bids_importer/events.py b/python/loris_bids_importer/src/loris_bids_importer/events.py index a3e962ba8..c806ee604 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/events.py +++ b/python/loris_bids_importer/src/loris_bids_importer/events.py @@ -24,12 +24,13 @@ from loris_utils.crypto import compute_file_blake2b_hash from loris_bids_importer.copy_files import copy_loris_bids_file, get_loris_bids_root_file_path -from loris_bids_importer.env import BidsImportEnv +from loris_bids_importer.dataset import get_or_create_loris_bids_file +from loris_bids_importer.importer import BidsImporter def import_bids_root_event_dict_file( env: Env, - import_env: BidsImportEnv, + importer: BidsImporter, project: DbProject, bids_event_dict_file: BidsJsonFile, ) -> tuple[DbPhysioEventFile, dict[str, dict[str, list[list[TagGroupMember]]]]]: @@ -37,12 +38,13 @@ def import_bids_root_event_dict_file( Import a root-level BIDS event dictionary file and its associated HED tags into LORIS. """ - loris_event_dict_file_path = get_loris_bids_root_file_path(import_env, bids_event_dict_file.path) + loris_event_dict_file_path = get_loris_bids_root_file_path(importer, bids_event_dict_file.path) - copy_loris_bids_file(import_env, bids_event_dict_file.path, loris_event_dict_file_path) + copy_loris_bids_file(importer, bids_event_dict_file.path, loris_event_dict_file_path) event_dict_file, hed_tags_dict = insert_bids_event_dict_file( env, + importer, EventDictFileSource.from_dataset(project), bids_event_dict_file, loris_event_dict_file_path, @@ -55,6 +57,7 @@ def import_bids_root_event_dict_file( def insert_bids_event_dict_file( env: Env, + importer: BidsImporter, source: EventDictFileSource, bids_event_dict_file: BidsJsonFile, loris_event_dict_file_path: Path, @@ -63,7 +66,8 @@ def insert_bids_event_dict_file( Insert a BIDS event dictionary file and its associated HED tags into the LORIS database. """ - event_dict_file = insert_event_dict_file(env, source, loris_event_dict_file_path) + bids_info = get_or_create_loris_bids_file(env, importer, bids_event_dict_file.path, loris_event_dict_file_path) + event_dict_file = insert_event_dict_file(env, bids_info, source, loris_event_dict_file_path) blake2b_hash = compute_file_blake2b_hash(bids_event_dict_file.path) @@ -80,6 +84,7 @@ def insert_bids_event_dict_file( # TODO: This function contains a lot of legacy code and needs to be refactored. def insert_bids_events_file( env: Env, + importer: BidsImporter, physio_file: DbPhysioFile, events_file: BidsEventsTsvFile, loris_events_file_path: Path, @@ -99,7 +104,8 @@ def insert_bids_events_file( blake2_hash = compute_file_blake2b_hash(events_file.path) - event_file = insert_events_file(env, physio_file, loris_events_file_path) + bids_info = get_or_create_loris_bids_file(env, importer, events_file.path, loris_events_file_path) + event_file = insert_events_file(env, physio_file, bids_info, loris_events_file_path) # insert blake2b hash of task event file into physiological_parameter_file insert_physio_file_parameter(env, physio_file, 'event_file_blake2b_hash', blake2_hash) diff --git a/python/loris_bids_importer/src/loris_bids_importer/env.py b/python/loris_bids_importer/src/loris_bids_importer/importer.py similarity index 58% rename from python/loris_bids_importer/src/loris_bids_importer/env.py rename to python/loris_bids_importer/src/loris_bids_importer/importer.py index d1b074e80..efa44f373 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/env.py +++ b/python/loris_bids_importer/src/loris_bids_importer/importer.py @@ -1,26 +1,44 @@ from dataclasses import dataclass from pathlib import Path +from typing import Literal + +from lib.db.models.bids_dataset import DbBidsDataset @dataclass -class BidsImportEnv: +class BidsImporterArgs: """ - Information about a specific BIDS import pipeline run. + The CLI arguments given to the BIDS importer. """ - data_dir_path: Path + source_bids_path: Path + type: Literal['raw', 'derivative', None] + bids_validation: bool + create_candidate: bool + create_session: bool + copy: bool + verbose: bool + + +@dataclass +class BidsImporter: """ - The LORIS data directory path. + Information about the current BIDS import pipeline run. """ - source_bids_path: Path + args: BidsImporterArgs + """ + The CLI arguments given to the BIDS importer. + """ + + data_dir_path: Path """ - The source BIDS directory path. + The LORIS data directory path. """ - loris_bids_path: Path | None + loris_bids_dataset: DbBidsDataset """ - The LORIS BIDS directory path for this import, relative to the LORIS data directory. + The LORIS BIDS dataset populated by this import. """ imported_acquisitions_count: int = 0 diff --git a/python/loris_bids_importer/src/loris_bids_importer/main.py b/python/loris_bids_importer/src/loris_bids_importer/main.py index 538011ac2..1f350c051 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/main.py +++ b/python/loris_bids_importer/src/loris_bids_importer/main.py @@ -1,6 +1,6 @@ from typing import Any -from lib.config import get_data_dir_path_config, get_default_bids_visit_label_config +from lib.config import get_default_bids_visit_label_config from lib.database import Database from lib.db.models.session import DbSession from lib.db.queries.candidate import try_get_candidate_with_psc_id @@ -10,31 +10,28 @@ from loris_bids_utils.mri.reader import BidsMriDataTypeReader from loris_bids_utils.reader import BidsDatasetReader, BidsDataTypeReader, BidsSessionReader -from loris_bids_importer.args import Args from loris_bids_importer.copy_files import ( copy_bids_participants_file, copy_bids_scans_file, copy_bids_static_files, - get_loris_bids_dataset_path, get_loris_bids_root_file_path, get_loris_scans_path, ) +from loris_bids_importer.dataset import make_bids_importer from loris_bids_importer.eeg.main import Eeg -from loris_bids_importer.env import BidsImportEnv from loris_bids_importer.events import import_bids_root_event_dict_file +from loris_bids_importer.importer import BidsImporter, BidsImporterArgs from loris_bids_importer.mri.main import import_bids_mri_data_type from loris_bids_importer.print import print_bids_import_summary, print_bids_info from loris_bids_importer.validation.sessions import validate_bids_sessions from loris_bids_importer.validation.subjects import validate_bids_subjects -def import_bids_dataset(env: Env, args: Args, legacy_db: Database): +def import_bids_dataset(env: Env, args: BidsImporterArgs, legacy_db: Database): """ Read the provided BIDS dataset and import it into LORIS. """ - data_dir_path = get_data_dir_path_config(env) - log(env, "Parsing BIDS dataset...") bids = BidsDatasetReader(args.source_bids_path, args.type == 'derivative', args.bids_validation) @@ -61,33 +58,11 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): env.db.commit() - # Get the LORIS BIDS import directory path and create the directory if needed. - - if args.copy: - try: - dataset_description = bids.dataset_description_file - except Exception as error: - log_error_exit(env, str(error)) - - if dataset_description is None: - log_error_exit( - env, - "No file 'dataset_description.json' found in the input BIDS dataset.", - ) - - loris_bids_path = get_loris_bids_dataset_path(env, dataset_description) - else: - loris_bids_path = None - - import_env = BidsImportEnv( - data_dir_path = data_dir_path, - loris_bids_path = loris_bids_path.relative_to(data_dir_path) if loris_bids_path is not None else None, - source_bids_path = args.source_bids_path, - ) + importer = make_bids_importer(env, args, bids) # Copy the static BIDS files. - copy_bids_static_files(import_env) + copy_bids_static_files(env, importer) # Get the BIDS event dictionary. @@ -97,7 +72,7 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): else: _, dataset_tag_dict = import_bids_root_event_dict_file( env, - import_env, + importer, single_project, bids.event_dict_file, ) @@ -105,23 +80,22 @@ def import_bids_dataset(env: Env, args: Args, legacy_db: Database): # Copy the `participants.tsv` file rows. if bids.participants_file is not None: - loris_participants_path = get_loris_bids_root_file_path(import_env, bids.participants_file.path) - copy_bids_participants_file(import_env, bids.participants_file, loris_participants_path) + loris_participants_path = get_loris_bids_root_file_path(importer, bids.participants_file.path) + copy_bids_participants_file(env, importer, bids.participants_file, loris_participants_path) # Process each session directory. for bids_session in bids.sessions: - import_bids_session(env, import_env, args, bids_session, dataset_tag_dict, legacy_db) + import_bids_session(env, importer, bids_session, dataset_tag_dict, legacy_db) # Print import summary. - print_bids_import_summary(env, import_env) + print_bids_import_summary(env, importer) def import_bids_session( env: Env, - import_env: BidsImportEnv, - args: Args, + importer: BidsImporter, bids_session: BidsSessionReader, dataset_tag_dict: dict[Any, Any], legacy_db: Database, @@ -155,8 +129,8 @@ def import_bids_session( try: # Read the scans.tsv property to raise an exception if the file is incorrect. if bids_session.scans_file is not None: - loris_scans_path = get_loris_scans_path(import_env, bids_session.scans_file, session) - copy_bids_scans_file(import_env, bids_session.scans_file, loris_scans_path) + loris_scans_path = get_loris_scans_path(importer, bids_session.scans_file, session) + copy_bids_scans_file(env, importer, bids_session.scans_file, loris_scans_path) except Exception as exception: log_warning( env, @@ -166,13 +140,12 @@ def import_bids_session( # Process each data type directory. for data_type in bids_session.data_types: - import_bids_data_type(env, import_env, args, session, data_type, dataset_tag_dict, legacy_db) + import_bids_data_type(env, importer, session, data_type, dataset_tag_dict, legacy_db) def import_bids_data_type( env: Env, - import_env: BidsImportEnv, - args: Args, + importer: BidsImporter, session: DbSession, data_type: BidsDataTypeReader, dataset_tag_dict: dict[Any, Any], @@ -189,15 +162,14 @@ def import_bids_data_type( match data_type: case BidsMriDataTypeReader(): - import_bids_mri_data_type(env, import_env, session, data_type) + import_bids_mri_data_type(env, importer, session, data_type) case BidsDataTypeReader(): - import_bids_eeg_data_type_files(env, import_env, args, session, data_type, dataset_tag_dict, legacy_db) + import_bids_eeg_data_type_files(env, importer, session, data_type, dataset_tag_dict, legacy_db) def import_bids_eeg_data_type_files( env: Env, - import_env: BidsImportEnv, - args: Args, + importer: BidsImporter, session: DbSession, data_type: BidsDataTypeReader, dataset_tag_dict: dict[Any, Any], @@ -210,13 +182,12 @@ def import_bids_eeg_data_type_files( try: Eeg( env = env, - import_env = import_env, + importer = importer, bids_layout = data_type.session.subject.dataset.layout, bids_info = data_type.info, db = legacy_db, session = session, dataset_tag_dict = dataset_tag_dict, - dataset_type = args.type, ) except Exception as exception: log_error( @@ -227,4 +198,4 @@ def import_bids_eeg_data_type_files( "Skipping." ) ) - import_env.failed_acquisitions_count += 1 + importer.failed_acquisitions_count += 1 diff --git a/python/loris_bids_importer/src/loris_bids_importer/mri/main.py b/python/loris_bids_importer/src/loris_bids_importer/mri/main.py index 829b6988a..2e246c5a8 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/mri/main.py +++ b/python/loris_bids_importer/src/loris_bids_importer/mri/main.py @@ -20,8 +20,9 @@ from loris_bids_importer.acquisitions import import_bids_acquisitions from loris_bids_importer.copy_files import copy_loris_bids_file, get_loris_bids_file_path -from loris_bids_importer.env import BidsImportEnv +from loris_bids_importer.dataset import get_or_create_loris_bids_file from loris_bids_importer.file_type import get_check_bids_imaging_file_type_from_extension +from loris_bids_importer.importer import BidsImporter from loris_bids_importer.mri.sidecar import add_bids_mri_sidecar_file_parameters from loris_bids_importer.scans import add_bids_scans_file_parameters @@ -44,7 +45,7 @@ def import_bids_mri_data_type( env: Env, - import_env: BidsImportEnv, + importer: BidsImporter, session: DbSession, data_type: BidsMriDataTypeReader, ): @@ -54,11 +55,11 @@ def import_bids_mri_data_type( import_bids_acquisitions( env, - import_env, + importer, data_type.acquisitions, lambda acquisition, bids_info: import_bids_mri_acquisition( env, - import_env, + importer, session, acquisition, bids_info, @@ -68,7 +69,7 @@ def import_bids_mri_data_type( def import_bids_mri_acquisition( env: Env, - import_env: BidsImportEnv, + importer: BidsImporter, session: DbSession, acquisition: MriAcquisition, bids_info: BidsAcquisitionInfo, @@ -80,14 +81,14 @@ def import_bids_mri_acquisition( # The files to copy to LORIS, with the source path on the left and the LORIS path on the right. files_to_copy: list[tuple[Path, Path]] = [] - loris_file_path = get_loris_bids_file_path(import_env, session, bids_info.data_type, acquisition.nifti_path) + loris_file_path = get_loris_bids_file_path(importer, session, bids_info.data_type, acquisition.nifti_path) files_to_copy.append((acquisition.nifti_path, loris_file_path)) # Check whether the file is already registered in LORIS. loris_file = try_get_file_with_path(env.db, loris_file_path) if loris_file is not None: - import_env.ignored_acquisitions_count += 1 + importer.ignored_acquisitions_count += 1 log(env, f"File '{loris_file_path}' is already registered in LORIS. Skipping.") return @@ -125,7 +126,7 @@ def import_bids_mri_acquisition( if acquisition.sidecar_file is not None: add_bids_mri_sidecar_file_parameters(env, acquisition.sidecar_file, file_parameters) json_loris_path = get_loris_bids_file_path( - import_env, + importer, session, bids_info.data_type, acquisition.sidecar_file.path, @@ -143,22 +144,26 @@ def import_bids_mri_acquisition( for aux_file_type, aux_file_path in aux_file_paths: aux_file_hash = compute_file_blake2b_hash(aux_file_path) - aux_file_loris_path = get_loris_bids_file_path(import_env, session, bids_info.data_type, aux_file_path) + aux_file_loris_path = get_loris_bids_file_path(importer, session, bids_info.data_type, aux_file_path) + get_or_create_loris_bids_file(env, importer, aux_file_path, aux_file_loris_path) files_to_copy.append((aux_file_path, aux_file_loris_path)) file_parameters[f'bids_{aux_file_type}'] = str(aux_file_loris_path) file_parameters[f'bids_{aux_file_type}_blake2b_hash'] = aux_file_hash # Copy the files on the file system. for copied_file_path, loris_copied_file_path in files_to_copy: - copy_loris_bids_file(import_env, copied_file_path, loris_copied_file_path) + copy_loris_bids_file(importer, copied_file_path, loris_copied_file_path) # Register the file and its parameters in the database. + bids_file = get_or_create_loris_bids_file(env, importer, acquisition.nifti_path, loris_file_path) + file = register_mri_file( env, loris_file_path, file_type, session, + bids_file, scan_type, None, None, diff --git a/python/loris_bids_importer/src/loris_bids_importer/print.py b/python/loris_bids_importer/src/loris_bids_importer/print.py index 8d82be18d..55e069d4f 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/print.py +++ b/python/loris_bids_importer/src/loris_bids_importer/print.py @@ -2,7 +2,7 @@ from lib.logging import log from loris_bids_utils.reader import BidsDatasetReader -from loris_bids_importer.env import BidsImportEnv +from loris_bids_importer.importer import BidsImporter def print_bids_info(env: Env, bids: BidsDatasetReader): @@ -23,7 +23,7 @@ def print_bids_info(env: Env, bids: BidsDatasetReader): log(env, f"- {data_type_name}") -def print_bids_import_summary(env: Env, import_env: BidsImportEnv): +def print_bids_import_summary(env: Env, importer: BidsImporter): """ Print a summary of this BIDS import process. """ @@ -31,8 +31,8 @@ def print_bids_import_summary(env: Env, import_env: BidsImportEnv): log( env, ( - f"Processed {import_env.processed_acquisitions_count} acquisitions, including" - f" {import_env.imported_acquisitions_count} imports, {import_env.ignored_acquisitions_count} ignores, and" - f" {import_env.failed_acquisitions_count} errors." + f"Processed {importer.processed_acquisitions_count} acquisitions, including" + f" {importer.imported_acquisitions_count} imports, {importer.ignored_acquisitions_count} ignores, and" + f" {importer.failed_acquisitions_count} errors." ), ) diff --git a/python/loris_bids_importer/src/loris_bids_importer/scripts/import_bids_dataset.py b/python/loris_bids_importer/src/loris_bids_importer/scripts/import_bids_dataset.py index 4a3cccca8..fb7262a52 100755 --- a/python/loris_bids_importer/src/loris_bids_importer/scripts/import_bids_dataset.py +++ b/python/loris_bids_importer/src/loris_bids_importer/scripts/import_bids_dataset.py @@ -7,12 +7,12 @@ from lib.logging import log_error_exit from lib.lorisgetopt import LorisGetOpt -from loris_bids_importer.args import Args +from loris_bids_importer.importer import BidsImporterArgs from loris_bids_importer.main import import_bids_dataset -def pack_args(options_dict: dict[str, Any]) -> Args: - return Args( +def pack_args(options_dict: dict[str, Any]) -> BidsImporterArgs: + return BidsImporterArgs( source_bids_path = Path(options_dict['directory']['value']), type = options_dict['type']['value'], bids_validation = not options_dict['no-bids-validation']['value'], diff --git a/revert-bids-tables.sql b/revert-bids-tables.sql new file mode 100644 index 000000000..c1897327c --- /dev/null +++ b/revert-bids-tables.sql @@ -0,0 +1,29 @@ +ALTER TABLE `physiological_file` + DROP FOREIGN KEY `physiological_file_head_shape_file_id_fk`, + ADD CONSTRAINT `physiological_file_head_shape_file_id_fk` + FOREIGN KEY (`HeadShapeFileID`) REFERENCES `meg_ctf_head_shape_file` (`ID`); + +ALTER TABLE `meg_ctf_head_shape_file` + DROP FOREIGN KEY `meg_ctf_head_shape_file_bids_info_id_fk`, + DROP KEY `meg_ctf_head_shape_file_bids_info_id_fk_idx`, + DROP COLUMN `InsertTime`, + DROP COLUMN `BidsInfoID`; + +ALTER TABLE `physiological_event_file` + DROP FOREIGN KEY `physiological_event_file_bids_info_id_fk`, + DROP KEY `physiological_event_file_bids_info_id_fk_idx`, + DROP COLUMN `BidsInfoID`; + +ALTER TABLE `physiological_file` + DROP FOREIGN KEY `physiological_file_bids_info_id_fk`, + DROP KEY `physiological_file_bids_info_id_fk_idx`, + DROP COLUMN `BidsInfoID`; + +ALTER TABLE `files` + DROP FOREIGN KEY `files_bids_info_id_fk`, + DROP KEY `files_bids_info_id_fk_idx`, + DROP COLUMN `BidsInfoID`; + +DROP TABLE `bids_file`; + +DROP TABLE `bids_dataset`;