Skip to content

clean-annotations

Bases: FileOperation, FileRemoverMixin

An operation to remove 'orphan' annotation files.

This class identifies annotation files (like .xml or .txt) that do not have a corresponding image file in the source directory. It helps to maintain dataset integrity by cleaning up labels that are no longer needed.

Attributes:

Name Type Description
a_source Path

The directory path where annotation files are stored.

Source code in file_operations/clean_annotations.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
class CleanAnnotationsOperation(FileOperation, FileRemoverMixin):
    """
    An operation to remove 'orphan' annotation files.

    This class identifies annotation files (like .xml or .txt) that do not have
    a corresponding image file in the source directory. It helps to maintain
    dataset integrity by cleaning up labels that are no longer needed.

    Attributes:
        a_source (Path): The directory path where annotation files are stored.
    """
    def __init__(self, **kwargs):
        """
        Initializes the cleanup operation for annotations.

        Args:
            **kwargs (dict): Arguments from the command line or settings,
                specifically looking for 'a_source' and 'a_suffix'.
        """
        super().__init__(**kwargs)
        self.a_source = self.settings.a_source


    @staticmethod
    def add_arguments(settings: AppSettings, parser: argparse.ArgumentParser) -> None:
        """
        Adds specific CLI arguments for annotation cleaning.

        Args:
            settings (AppSettings): Global configuration for default values.
            parser (argparse.ArgumentParser): The parser to which 'a_suffix'
                and 'a_source' arguments are added.
        """
        parser.add_argument(
            Arguments.a_suffix,
            nargs="+",
            help=HelpStrings.a_suffix,
            default=settings.a_suffix,
        )
        parser.add_argument(
            Arguments.a_source,
            help=HelpStrings.a_source,
            default=settings.a_source,
        )


    def do_task(self) -> None:
        """
        Executes the synchronization and removal process.

        It collects all image names (stems) from the source directory and
        compares them with annotation files. If an annotation stem is not
        found in the image stems, the file is deleted using FileRemoverMixin.
        """
        self.logger.info(f"Checking for orphan annotations in {self.settings.a_source}")
        annotation_paths = self.get_files(
            source_directory=self.a_source,
            pattern=self.settings.a_suffix
        )

        image_stems = set(image.stem for image in self.files_for_task)
        orphans_removed = 0

        for a_path in annotation_paths:
            if a_path.stem not in image_stems:
                if self.remove_file(a_path):
                    orphans_removed += 1
                    self.logger.info(f"Removed {a_path.stem}")

        self.logger.info(f"Removed {orphans_removed} orphan annotations")

        wait(logger=self.logger, timeout=self.sleep)


    @property
    def a_source(self) -> Path:
        """Path: Returns the directory path for annotations."""
        return self._a_source


    @a_source.setter
    def a_source(self, value: Union[Path, str, None]) -> None:
        """
        Sets the annotation source path with type validation.

        If the provided value is None, it defaults to the main source_directory.
        It converts string inputs into Path objects.

        Args:
            value (Union[Path, str, None]): The path to the annotations folder.

        Raises:
            TypeError: If the value is not a Path, string, or None.
        """
        if isinstance(value, Path):
            self._a_source = value
        elif isinstance(value, str):
            self._a_source = Path(value)
        elif value is None:
            self._a_source = self.source_directory
        else:
            msg = f"Invalid value for a_source, can be Union[Path, str, None], got {type(value)}"
            self.logger.error(msg)
            raise TypeError(msg)

a_source property writable

Path: Returns the directory path for annotations.

__init__(**kwargs)

Initializes the cleanup operation for annotations.

Parameters:

Name Type Description Default
**kwargs dict

Arguments from the command line or settings, specifically looking for 'a_source' and 'a_suffix'.

{}
Source code in file_operations/clean_annotations.py
25
26
27
28
29
30
31
32
33
34
def __init__(self, **kwargs):
    """
    Initializes the cleanup operation for annotations.

    Args:
        **kwargs (dict): Arguments from the command line or settings,
            specifically looking for 'a_source' and 'a_suffix'.
    """
    super().__init__(**kwargs)
    self.a_source = self.settings.a_source

add_arguments(settings, parser) staticmethod

Adds specific CLI arguments for annotation cleaning.

Parameters:

Name Type Description Default
settings AppSettings

Global configuration for default values.

required
parser ArgumentParser

The parser to which 'a_suffix' and 'a_source' arguments are added.

required
Source code in file_operations/clean_annotations.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@staticmethod
def add_arguments(settings: AppSettings, parser: argparse.ArgumentParser) -> None:
    """
    Adds specific CLI arguments for annotation cleaning.

    Args:
        settings (AppSettings): Global configuration for default values.
        parser (argparse.ArgumentParser): The parser to which 'a_suffix'
            and 'a_source' arguments are added.
    """
    parser.add_argument(
        Arguments.a_suffix,
        nargs="+",
        help=HelpStrings.a_suffix,
        default=settings.a_suffix,
    )
    parser.add_argument(
        Arguments.a_source,
        help=HelpStrings.a_source,
        default=settings.a_source,
    )

do_task()

Executes the synchronization and removal process.

It collects all image names (stems) from the source directory and compares them with annotation files. If an annotation stem is not found in the image stems, the file is deleted using FileRemoverMixin.

Source code in file_operations/clean_annotations.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def do_task(self) -> None:
    """
    Executes the synchronization and removal process.

    It collects all image names (stems) from the source directory and
    compares them with annotation files. If an annotation stem is not
    found in the image stems, the file is deleted using FileRemoverMixin.
    """
    self.logger.info(f"Checking for orphan annotations in {self.settings.a_source}")
    annotation_paths = self.get_files(
        source_directory=self.a_source,
        pattern=self.settings.a_suffix
    )

    image_stems = set(image.stem for image in self.files_for_task)
    orphans_removed = 0

    for a_path in annotation_paths:
        if a_path.stem not in image_stems:
            if self.remove_file(a_path):
                orphans_removed += 1
                self.logger.info(f"Removed {a_path.stem}")

    self.logger.info(f"Removed {orphans_removed} orphan annotations")

    wait(logger=self.logger, timeout=self.sleep)