Source code for neuroconv.tools.data_transfers._dandi

"""Collection of helper functions for assessing and performing automated data transfers for the DANDI archive."""

import os
from pathlib import Path
from shutil import rmtree
from tempfile import mkdtemp
from typing import Optional, Union
from warnings import warn

from pydantic import DirectoryPath
from pynwb import NWBHDF5IO


[docs]def automatic_dandi_upload( dandiset_id: str, nwb_folder_path: DirectoryPath, dandiset_folder_path: Optional[DirectoryPath] = None, version: str = "draft", staging: bool = False, cleanup: bool = False, number_of_jobs: Union[int, None] = None, number_of_threads: Union[int, None] = None, ) -> list[Path]: """ Fully automated upload of NWB files to a Dandiset. Requires an API token set as an environment variable named ``DANDI_API_KEY``. To set this in your bash terminal in Linux or macOS, run export DANDI_API_KEY=... or in Windows set DANDI_API_KEY=... DO NOT STORE THIS IN ANY PUBLICLY SHARED CODE. Parameters ---------- dandiset_id : str Six-digit string identifier for the Dandiset the NWB files will be uploaded to. nwb_folder_path : folder path Folder containing the NWB files to be uploaded. dandiset_folder_path : folder path, optional A separate folder location within which to download the dandiset. Used in cases where you do not have write permissions for the parent of the 'nwb_folder_path' directory. Default behavior downloads the DANDISet to a folder adjacent to the 'nwb_folder_path'. version : str, default="draft" The version of the Dandiset to download. Even if no data has been uploaded yes, this step downloads an essential Dandiset metadata yaml file. Default is "draft", which is the latest state. staging : bool, default: False Is the Dandiset hosted on the staging server? This is mostly for testing purposes. cleanup : bool, default: False Whether to remove the Dandiset folder path and nwb_folder_path. number_of_jobs : int, optional The number of jobs to use in the DANDI upload process. number_of_threads : int, optional The number of threads to use in the DANDI upload process. """ from dandi.download import download as dandi_download from dandi.organize import organize as dandi_organize from dandi.upload import upload as dandi_upload assert os.getenv("DANDI_API_KEY"), ( "Unable to find environment variable 'DANDI_API_KEY'. " "Please retrieve your token from DANDI and set this environment variable." ) dandiset_folder_path = ( Path(mkdtemp(dir=nwb_folder_path.parent)) if dandiset_folder_path is None else dandiset_folder_path ) dandiset_path = dandiset_folder_path / dandiset_id # Odd big of logic upstream: https://github.com/dandi/dandi-cli/blob/master/dandi/cli/cmd_upload.py#L92-L96 if number_of_threads is not None and number_of_threads > 1 and number_of_jobs is None: number_of_jobs = -1 url_base = "https://gui-staging.dandiarchive.org" if staging else "https://dandiarchive.org" dandiset_url = f"{url_base}/dandiset/{dandiset_id}/{version}" dandi_download(urls=dandiset_url, output_dir=str(dandiset_folder_path), get_metadata=True, get_assets=False) assert dandiset_path.exists(), "DANDI download failed!" # TODO: need PR on DANDI to expose number of jobs dandi_organize( paths=str(nwb_folder_path), dandiset_path=str(dandiset_path), devel_debug=True if number_of_jobs == 1 else False ) organized_nwbfiles = dandiset_path.rglob("*.nwb") # DANDI has yet to implement forcing of session_id inclusion in organize step # This manually enforces it when only a single session per subject is organized for organized_nwbfile in organized_nwbfiles: if "ses" not in organized_nwbfile.stem: with NWBHDF5IO(path=organized_nwbfile, mode="r") as io: nwbfile = io.read() session_id = nwbfile.session_id dandi_stem = organized_nwbfile.stem dandi_stem_split = dandi_stem.split("_") dandi_stem_split.insert(1, f"ses-{session_id}") corrected_name = "_".join(dandi_stem_split) + ".nwb" organized_nwbfile.rename(organized_nwbfile.parent / corrected_name) organized_nwbfiles = [str(x) for x in dandiset_path.rglob("*.nwb")] # The above block can be removed once they add the feature assert len(list(dandiset_path.iterdir())) > 1, "DANDI organize failed!" dandi_instance = "dandi-staging" if staging else "dandi" # Test dandi_upload( paths=organized_nwbfiles, dandi_instance=dandi_instance, jobs=number_of_jobs, jobs_per_file=number_of_threads, ) # Cleanup should be confirmed manually; Windows especially can complain if cleanup: try: rmtree(path=dandiset_folder_path) rmtree(path=nwb_folder_path) except PermissionError: # pragma: no cover warn("Unable to clean up source files and dandiset! Please manually delete them.") return organized_nwbfiles