diff --git a/fortex/health/readers/CT_image_reader.py b/fortex/health/readers/CT_image_reader.py index 8ad44b7c..a2de98fe 100644 --- a/fortex/health/readers/CT_image_reader.py +++ b/fortex/health/readers/CT_image_reader.py @@ -1,4 +1,4 @@ -# Copyright 2022 The Forte Authors. All Rights Reserved. +# Copyright 2023 The Forte Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -This class is designed to read CT images from given folder path. +This class is designed to read all CT images from given folder path. For more information for the dataset, visit: -https://data.mendeley.com/datasets/jctsfj2sfn/1 +https://www.kaggle.com/competitions/rsna-str-pulmonary-embolism-detection/data """ import os from typing import Iterator, List @@ -25,7 +25,7 @@ class CTimageReader(PackReader): - r""":class:`CTimageReader` is designed to read CT image files from a given folder.""" + r""":class:`CTimageReader` is designed to read CT image files from a given folder path.""" def _collect(self, image_directory) -> Iterator[List]: r"""Should be called with param ``image_directory`` which is a path to a @@ -46,25 +46,29 @@ def _parse_pack(self, file_path: str) -> Iterator[DataPack]: # Read in image data and store in DataPack img = pydicom.dcmread(file_path, **(self.configs.read_kwargs or {})) + # YBR_FULL allows to read pixels in raw format without any compression img.PhotometricInterpretation = "YBR_FULL" pixel_data = img.pixel_array - pack.add_image(image=pixel_data) + slope=img[('0028','1052')].value + intercept=img[('0028','1052')].value + normalized_image=(img*slope +intercept) + pack.add_image(image=normalized_image) pack.pack_name = file_path.split("/")[-1] yield pack @classmethod def default_configs(cls): - r"""This defines a basic configuration structure for image reader. + r"""This defines a basic configuration structure for CT image reader. Here: - - file_ext (str): The file extension to find the target audio files - under a specific directory path. Default value is ".jpeg". + - file_ext (str): The file extension to find the target CT image files + under a specific directory path. Default value is ".dcm". - read_kwargs (dict): A dictionary containing all the keyword - arguments for `PIL.Image` method. For details, refer to - https://pillow.readthedocs.io/en/stable/reference/Image.html + arguments for `pydicom.dcmread` method. For details, refer to + https://pydicom.github.io/pydicom/stable/ Default value is None. Returns: The default configuration of Image reader.