habit.core.preprocessing.dcm2niix_converter 源代码

"""
DICOM to NIfTI converter using dcm2niix tool.

This module provides functionality to batch convert DICOM files to NIfTI format
using the dcm2niix tool, with integration into the HABIT preprocessing pipeline.
"""

import os
import subprocess
import shutil
from typing import Dict, Any, Optional, Union, List
from pathlib import Path
import tempfile
import SimpleITK as sitk
import json

from .base_preprocessor import BasePreprocessor
from .preprocessor_factory import PreprocessorFactory
from habit.utils.progress_utils import CustomTqdm
from habit.utils.file_system_utils import safe_mkdir


[文档] @PreprocessorFactory.register("dcm2nii") class Dcm2niixConverter(BasePreprocessor): """ Convert DICOM files to NIfTI format using dcm2niix tool. This preprocessor takes DICOM directories and converts them to NIfTI format using the dcm2niix command-line tool. It supports batch processing and integrates with the HABIT preprocessing pipeline. """
[文档] def __init__( self, keys: Union[str, List[str]], # 默认不指定键 dcm2niix_path: Optional[str] = None, # 默认不指定dcm2niix可执行文件路径 filename_format: Optional[str] = None, # 默认不指定输出文件名格式 adjacent_dicoms: bool = True, # 默认相邻DICOMs在同一文件夹 compress: bool = True, # 默认压缩输出文件 anonymize: bool = False, # 默认不匿名化文件名 ignore_derived: bool = False, # 默认不忽略衍生图像 crop_images: bool = False, # 默认裁剪图像 generate_json: bool = False, # 默认不生成JSON文件 verbose: bool = False, # 默认不输出详细信息 batch_mode: bool = True, # 默认启用批处理模式 merge_slices: Optional[str] = "2", # 合并模式: "y"/"1"=默认, "2"=按序列, "n"/"0"=不合并, None=不指定 single_file_mode: Optional[bool] = None, # 单文件模式: True=强制单文件(-s y), False=允许多文件(-s n), None=不指定(推荐) allow_missing_keys: bool = False, # 默认不允许缺失键 **kwargs ): """ Initialize the dcm2niix converter. Args: keys (Union[str, List[str]]): Keys containing DICOM directory paths to convert dcm2niix_path (Optional[str]): Full path to dcm2niix executable or directory containing it filename_format (Optional[str]): Output filename format (default: uses subject name) compress (bool): Compress output files (default: True) anonymize (bool): Anonymize filenames (default: False) ignore_derived (bool): Ignore derived images (default: False) crop_images (bool): Crop images (default: False) generate_json (bool): Generate BIDS JSON sidecar files (default: False) verbose (bool): Verbose output (default: False) batch_mode (bool): Enable batch mode (default: True) merge_slices (Optional[str]): Merge mode - "y"/"1"=default merge, "2"=merge by series, "n"/"0"=no merge, None=don't specify (default: "2") single_file_mode (Optional[bool]): Single file mode - True=force single file (-s y), False=allow multiple (-s n), None=don't specify/use default (default: None) allow_missing_keys (bool): Allow missing keys (default: False) **kwargs: Additional parameters """ super().__init__(keys=keys, allow_missing_keys=allow_missing_keys) # Setup logging first from habit.utils.log_utils import get_module_logger self.logger = get_module_logger(__name__) # Setup dcm2niix path and environment self.dcm2niix_executable = self._setup_dcm2niix_environment(dcm2niix_path) self.filename_format = filename_format self.compress = compress self.adjacent_dicoms = adjacent_dicoms self.anonymize = anonymize self.ignore_derived = ignore_derived self.crop_images = crop_images self.generate_json = generate_json self.verbose = verbose self.batch_mode = batch_mode self.merge_slices = merge_slices self.single_file_mode = single_file_mode # Verify dcm2niix is available self._verify_dcm2niix()
def _setup_dcm2niix_environment(self, dcm2niix_path: Optional[str]) -> str: """ Setup dcm2niix environment by adding the executable path to PATH if provided. Args: dcm2niix_path (Optional[str]): Path to dcm2niix executable or directory containing it Returns: str: Name of the dcm2niix executable to use """ if not dcm2niix_path: # Use default executable name if no path provided return "dcm2niix" dcm2niix_path_obj = Path(dcm2niix_path) if dcm2niix_path_obj.is_file(): # If path points to the executable itself executable_name = dcm2niix_path_obj.name dcm2niix_dir = dcm2niix_path_obj.parent elif dcm2niix_path_obj.is_dir(): # If path points to directory containing the executable executable_name = "dcm2niix" dcm2niix_dir = dcm2niix_path_obj else: self.logger.warning(f"Specified dcm2niix path does not exist: {dcm2niix_path}") return "dcm2niix" # Add to PATH environment variable current_path = os.environ.get('PATH', '') dcm2niix_path_str = str(dcm2niix_dir) # Check if path is already in PATH if dcm2niix_path_str not in current_path: # Add to beginning of PATH new_path = f"{dcm2niix_path_str}{os.pathsep}{current_path}" os.environ['PATH'] = new_path self.logger.info(f"Added dcm2niix path to environment: {dcm2niix_path_str}") else: self.logger.info(f"dcm2niix path already in environment: {dcm2niix_path_str}") return executable_name def _verify_dcm2niix(self) -> None: """ Verify that dcm2niix executable is available. Raises: RuntimeError: If dcm2niix is not found or not executable """ try: result = subprocess.run( [self.dcm2niix_executable, "--help"], capture_output=True, text=True, check=False ) if result.returncode != 0 and "dcm2niix" not in result.stderr.lower(): raise RuntimeError(self._get_dcm2niix_not_found_message()) self.logger.info(f"dcm2niix executable verified: {self.dcm2niix_executable}") except FileNotFoundError: raise RuntimeError(self._get_dcm2niix_not_found_message()) def _get_dcm2niix_not_found_message(self) -> str: """ Generate helpful error message with installation instructions when dcm2niix is not found. Returns: str: Error message with installation instructions """ return f"""dcm2niix executable not found: {self.dcm2niix_executable} Please install dcm2niix using one of the following methods: 1. Using pip (recommended): python -m pip install dcm2niix 2. Using conda: conda install -c conda-forge dcm2niix 3. Using package manager: - Debian/Ubuntu: sudo apt-get install dcm2niix - MacOS Homebrew: brew install dcm2niix - MacOS MacPorts: sudo port install dcm2niix 4. Download pre-built binaries: - Linux: curl -fLO https://github.com/rordenlab/dcm2niix/releases/latest/download/dcm2niix_lnx.zip - MacOS: curl -fLO https://github.com/rordenlab/dcm2niix/releases/latest/download/macos_dcm2niix.pkg - Windows: curl -fLO https://github.com/rordenlab/dcm2niix/releases/latest/download/dcm2niix_win.zip 5. MRIcroGL includes dcm2niix: - NITRC: https://www.nitrc.org/projects/mricrogl - GitHub: https://github.com/rordenlab/MRIcroGL For more information, visit: https://github.com/rordenlab/dcm2niix """ def _build_dcm2niix_command( self, input_dir: str, output_dir: str, filename_format: Optional[str] = None ) -> List[str]: """ Build dcm2niix command with specified parameters. Args: input_dir (str): Input DICOM directory output_dir (str): Output directory for NIfTI files filename_format (Optional[str]): Output filename format Returns: List[str]: Command components for subprocess execution """ cmd = [self.dcm2niix_executable] # Add filename format if specified if filename_format: cmd.extend(["-f", filename_format]) # -a adjacent DICOMs (images from same series always in same folder) for faster conversion (n/y, default n) if self.adjacent_dicoms: cmd.extend(["-a", "y"]) else: cmd.extend(["-a", "n"]) # Control JSON generation (BIDS sidecar) if self.generate_json: cmd.extend(["-b", "y"]) else: cmd.extend(["-b", "n"]) # Add boolean options if self.ignore_derived: cmd.extend(["-i", "y"]) if self.batch_mode: cmd.extend(["-l", "y"]) # Merge 2D slices into 3D volumes # Note: -m parameter controls slice merging behavior # -m 0 or -m n: no merging # -m 1 or -m y: merge 2D slices (default) # -m 2: merge based on series (more aggressive) # None: don't specify, use dcm2niix default if self.merge_slices is not None: cmd.extend(["-m", str(self.merge_slices)]) if self.anonymize: cmd.extend(["-p", "y"]) # Single file mode # Note: -s y may force 4D structure, -s n splits volumes # For 3D output, it's often best to NOT specify this parameter if self.single_file_mode is not None: if self.single_file_mode: cmd.extend(["-s", "y"]) else: cmd.extend(["-s", "n"]) if self.crop_images: cmd.extend(["-x", "y"]) if self.verbose: cmd.extend(["-v", "y"]) if self.compress: cmd.extend(["-z", "y"]) # Add output directory cmd.extend(["-o", output_dir]) # Add input directory cmd.append(input_dir) return cmd def _convert_single_dicom_dir( self, input_dir: str, subject_id: str, sequence_name: Optional[str] = None, output_dir: Optional[str] = None ) -> Dict[str, sitk.Image]: """ Convert a single DICOM directory to NIfTI format and return as SimpleITK Image objects. Args: input_dir (str): Input DICOM directory path subject_id (str): Subject identifier sequence_name (Optional[str]): Sequence name for filename formatting output_dir (Optional[str]): Output directory for converted files. If None, uses temporary directory Returns: Dict[str, sitk.Image]: Dictionary containing SimpleITK Image objects Raises: RuntimeError: If conversion fails """ input_path = Path(input_dir) if not input_path.exists(): raise FileNotFoundError(f"Input directory does not exist: {input_dir}") if not input_path.is_dir(): raise ValueError(f"Input path is not a directory: {input_dir}") # Determine output directory if output_dir: # Use provided output directory subject_output_dir = Path(output_dir) safe_mkdir(str(subject_output_dir)) use_temp_dir = False self.logger.info(f"[{subject_id}] Using output directory: {subject_output_dir}") else: # Create temporary directory for conversion temp_dir = tempfile.mkdtemp(prefix=f"dcm2niix_{subject_id}_") subject_output_dir = Path(temp_dir) use_temp_dir = True self.logger.debug(f"[{subject_id}] Using temporary directory: {subject_output_dir}") try: # Determine filename format filename_format = self.filename_format if not filename_format: if sequence_name: filename_format = f"{subject_id}_{sequence_name}" else: filename_format = subject_id # Build dcm2niix command as list cmd_list = self._build_dcm2niix_command( str(input_path), str(subject_output_dir), filename_format ) # Convert command list to string for os.system() # Need to quote paths that may contain spaces cmd_parts = [] for i, part in enumerate(cmd_list): # Quote the executable path and directory paths if i == 0 or part in [str(input_path), str(subject_output_dir)]: # Check if path contains spaces if ' ' in part: cmd_parts.append(f'"{part}"') else: cmd_parts.append(part) else: cmd_parts.append(part) cmd_string = ' '.join(cmd_parts) self.logger.info(f"[{subject_id}] Converting DICOM directory: {input_dir}") self.logger.debug(f"[{subject_id}] Command: {cmd_string}") # Select execution method # Different methods may produce different results with dcm2niix # Try "os.system" if subprocess methods give unexpected 4D output execution_method = "subprocess.Popen" # Options: "os.system", "subprocess.run", "subprocess.Popen" if execution_method == "os.system": # Method 1: os.system() - Most similar to terminal behavior # This is the closest to typing the command directly in terminal # Recommended for dcm2niix to avoid 3D/4D conversion issues exit_code = os.system(cmd_string) # os.system returns the exit status in platform-specific format # On Windows, it's the actual exit code; on Unix, it may be shifted if exit_code != 0: raise RuntimeError(f"dcm2niix conversion failed with exit code {exit_code}") elif execution_method == "subprocess.run": # Method 2: subprocess.run() with shell=True result = subprocess.run( cmd_string, shell=True, capture_output=True, text=True, check=True ) exit_code = result.returncode if exit_code != 0: raise RuntimeError(f"dcm2niix conversion failed with exit code {exit_code}") elif execution_method == "subprocess.Popen": # Method 3: subprocess.Popen() - Real-time output process = subprocess.Popen( cmd_string, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) # Wait for completion and capture output stdout, stderr = process.communicate() exit_code = process.returncode if exit_code != 0: self.logger.error(f"[{subject_id}] dcm2niix stderr: {stderr}") raise RuntimeError(f"dcm2niix conversion failed with exit code {exit_code}") # Log output at debug level if stdout: self.logger.debug(f"[{subject_id}] dcm2niix output: {stdout}") else: raise ValueError(f"Unknown execution method: {execution_method}") # Find converted files and load as SimpleITK Image objects converted_images = {} for ext in ['.nii', '.nii.gz']: pattern = f"*{ext}" nifti_files = list(subject_output_dir.glob(pattern)) for nifti_file in nifti_files: # Extract sequence name from filename if possible file_stem = nifti_file.stem.replace('.nii', '') if sequence_name: key = sequence_name else: # Try to extract sequence info from filename key = file_stem.replace(subject_id, '').strip('_') if not key: key = 'image' # Load as SimpleITK Image object try: sitk_image = sitk.ReadImage(str(nifti_file)) converted_images[key] = sitk_image self.logger.debug(f"[{subject_id}] Loaded {nifti_file} as SimpleITK Image") # Store the output file path converted_images[f"{key}_output_path"] = str(nifti_file) # Find corresponding JSON file json_file = nifti_file.with_suffix('.json') if json_file.exists(): with open(json_file, 'r') as f: json_metadata = json.load(f) # Add JSON metadata to image metadata meta_key = f"{key}_meta_dict" if meta_key not in converted_images: converted_images[meta_key] = {} converted_images[meta_key]['dcm2niix_json'] = json_metadata self.logger.debug(f"[{subject_id}] Loaded JSON metadata for {key}") except Exception as e: self.logger.error(f"[{subject_id}] Failed to load {nifti_file} as SimpleITK Image: {e}") raise RuntimeError(f"Failed to load converted NIfTI file: {e}") if not converted_images: raise RuntimeError(f"No NIfTI files were created for {input_dir}") self.logger.debug(f"[{subject_id}] Conversion saved to {subject_output_dir}") return converted_images except RuntimeError: # Re-raise RuntimeError as is raise except Exception as e: error_msg = f"[{subject_id}] dcm2niix conversion failed for {input_dir}: {str(e)}" self.logger.error(error_msg) raise RuntimeError(error_msg) finally: # Clean up temporary directory only if using temp directory if use_temp_dir: try: shutil.rmtree(str(subject_output_dir)) self.logger.debug(f"[{subject_id}] Cleaned up temporary directory: {subject_output_dir}") except Exception as e: self.logger.warning(f"[{subject_id}] Failed to clean up temporary directory {subject_output_dir}: {e}")
[文档] def batch_convert_subjects( self, subjects_data: Dict[str, Dict[str, str]] ) -> Dict[str, Dict[str, sitk.Image]]: """ Batch convert DICOM directories for multiple subjects. Args: subjects_data (Dict[str, Dict[str, str]]): Dictionary with subject IDs as keys and sequence dictionaries as values Format: {subject_id: {sequence_name: dicom_dir_path}} Returns: Dict[str, Dict[str, sitk.Image]]: Dictionary containing SimpleITK Image objects Format: {subject_id: {sequence_name: sitk_image}} """ all_converted_files = {} # Calculate total number of conversions for progress bar total_conversions = sum(len(sequences) for sequences in subjects_data.values()) progress_bar = CustomTqdm(total=total_conversions, desc="Converting DICOM to NIfTI") try: for subject_id, sequences in subjects_data.items(): subject_converted = {} for sequence_name, dicom_dir in sequences.items(): try: converted_images = self._convert_single_dicom_dir( dicom_dir, subject_id, sequence_name ) subject_converted.update(converted_images) except Exception as e: self.logger.error(f"[{subject_id}] Failed to convert {sequence_name}: {e}") if not self.allow_missing_keys: raise progress_bar.update(1) if subject_converted: all_converted_files[subject_id] = subject_converted finally: # Progress bar automatically handles completion display pass return all_converted_files
[文档] def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ Process the input data to convert DICOM directories to SimpleITK Image objects. Args: data (Dict[str, Any]): Input data dictionary containing DICOM directory paths Returns: Dict[str, Any]: Data dictionary with SimpleITK Image objects added """ self._check_keys(data) # Extract subject ID if available (try different common keys) subject_id = data.get('subj', data.get('subject_id', 'unknown_subject')) # Process each specified key for key in self.keys: if key not in data: if self.allow_missing_keys: continue else: raise KeyError(f"Key {key} not found in data dictionary") # Get the value from data[key] value = data[key] # Case 1: If already a SimpleITK Image, skip (already processed) if isinstance(value, sitk.Image): self.logger.info(f"[{subject_id}] Key {key} is already a SimpleITK Image, skipping dcm2niix conversion") continue # Case 2: If not a string, skip (invalid type) if not isinstance(value, str): self.logger.warning(f"[{subject_id}] Key {key} has invalid type {type(value)}, expected str or sitk.Image") continue # Case 3: value is a string path (file or directory) dicom_path = value try: # Get output directory from data if available # The pipeline will set output_dirs to intermediate directory if save_intermediate is enabled output_dir = None if 'output_dirs' in data and key in data['output_dirs']: output_dir = data['output_dirs'][key] self.logger.debug(f"[{subject_id}] Using output directory for {key}: {output_dir}") # Convert single DICOM directory converted_images = self._convert_single_dicom_dir( dicom_path, subject_id, key, output_dir=output_dir ) # Update data with SimpleITK Image objects for seq_name, sitk_image in converted_images.items(): # Skip metadata entries and output path entries if seq_name.endswith('_meta_dict') or seq_name.endswith('_output_path'): continue # Use the original key name for consistency with other preprocessors data[key] = sitk_image self.logger.debug(f"[{subject_id}] Converted {key}") # Store output file path if available output_path_key = f"{seq_name}_output_path" if output_path_key in converted_images: meta_key = f"{key}_meta_dict" if meta_key not in data: data[meta_key] = {} data[meta_key]["output_file_path"] = converted_images[output_path_key] break # Only use the first image if multiple found # Store conversion metadata meta_key = f"{key}_meta_dict" if meta_key not in data: data[meta_key] = {} data[meta_key]["dcm2niix_converted"] = True data[meta_key]["original_dicom_dir"] = dicom_path data[meta_key]["original_path"] = dicom_path data[meta_key]["converted_files"] = len([k for k in converted_images.keys() if not k.endswith('_meta_dict') and not k.endswith('_output_path')]) data[meta_key]["conversion_params"] = { 'compress': self.compress, 'anonymize': self.anonymize, 'ignore_derived': self.ignore_derived, 'crop_images': self.crop_images, 'generate_json': self.generate_json } # Merge JSON metadata if available json_meta_key = f"{key}_meta_dict" if json_meta_key in converted_images: data[meta_key].update(converted_images[json_meta_key]) except Exception as e: self.logger.error(f"[{subject_id}] Error converting DICOM directory for key {key}: {e}") if not self.allow_missing_keys: raise return data
[文档] def batch_convert_dicom_directories( input_mapping: Dict[str, Dict[str, str]], dcm2niix_path: Optional[str] = None, **kwargs ) -> Dict[str, Dict[str, sitk.Image]]: """ Utility function for batch DICOM to NIfTI conversion. Args: input_mapping (Dict[str, Dict[str, str]]): Mapping of subjects to their DICOM directories Format: {subject_id: {sequence_name: dicom_dir_path}} dcm2niix_path (Optional[str]): Full path to dcm2niix executable or directory containing it **kwargs: Additional parameters for Dcm2niixConverter Returns: Dict[str, Dict[str, sitk.Image]]: Dictionary containing SimpleITK Image objects Example: >>> input_data = { ... "subject_001": { ... "T1": "/path/to/subject_001/T1_dicom", ... "T2": "/path/to/subject_001/T2_dicom" ... }, ... "subject_002": { ... "T1": "/path/to/subject_002/T1_dicom" ... } ... } >>> converted = batch_convert_dicom_directories( ... input_data, ... dcm2niix_path="/path/to/dcm2niix/bin" ... ) """ converter = Dcm2niixConverter( keys=["dummy"], # Not used in batch mode dcm2niix_path=dcm2niix_path, **kwargs ) return converter.batch_convert_subjects(input_mapping)