Bases: FileOperation, FileRemoverMixin
An operation to remove 'orphan' annotation files.
This class identifies annotation files (like .xml or .txt) that do not have
a corresponding image file in the source directory. It helps to maintain
dataset integrity by cleaning up labels that are no longer needed.
Attributes:
| Name |
Type |
Description |
a_source |
Path
|
The directory path where annotation files are stored.
|
Source code in file_operations/clean_annotations.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 | class CleanAnnotationsOperation(FileOperation, FileRemoverMixin):
"""
An operation to remove 'orphan' annotation files.
This class identifies annotation files (like .xml or .txt) that do not have
a corresponding image file in the source directory. It helps to maintain
dataset integrity by cleaning up labels that are no longer needed.
Attributes:
a_source (Path): The directory path where annotation files are stored.
"""
def __init__(self, **kwargs):
"""
Initializes the cleanup operation for annotations.
Args:
**kwargs (dict): Arguments from the command line or settings,
specifically looking for 'a_source' and 'a_suffix'.
"""
super().__init__(**kwargs)
self.a_source = self.settings.a_source
@staticmethod
def add_arguments(settings: AppSettings, parser: argparse.ArgumentParser) -> None:
"""
Adds specific CLI arguments for annotation cleaning.
Args:
settings (AppSettings): Global configuration for default values.
parser (argparse.ArgumentParser): The parser to which 'a_suffix'
and 'a_source' arguments are added.
"""
parser.add_argument(
Arguments.a_suffix,
nargs="+",
help=HelpStrings.a_suffix,
default=settings.a_suffix,
)
parser.add_argument(
Arguments.a_source,
help=HelpStrings.a_source,
default=settings.a_source,
)
def do_task(self) -> None:
"""
Executes the synchronization and removal process.
It collects all image names (stems) from the source directory and
compares them with annotation files. If an annotation stem is not
found in the image stems, the file is deleted using FileRemoverMixin.
"""
self.logger.info(f"Checking for orphan annotations in {self.settings.a_source}")
annotation_paths = self.get_files(
source_directory=self.a_source,
pattern=self.settings.a_suffix
)
image_stems = set(image.stem for image in self.files_for_task)
orphans_removed = 0
for a_path in annotation_paths:
if a_path.stem not in image_stems:
if self.remove_file(a_path):
orphans_removed += 1
self.logger.info(f"Removed {a_path.stem}")
self.logger.info(f"Removed {orphans_removed} orphan annotations")
wait(logger=self.logger, timeout=self.sleep)
@property
def a_source(self) -> Path:
"""Path: Returns the directory path for annotations."""
return self._a_source
@a_source.setter
def a_source(self, value: Union[Path, str, None]) -> None:
"""
Sets the annotation source path with type validation.
If the provided value is None, it defaults to the main source_directory.
It converts string inputs into Path objects.
Args:
value (Union[Path, str, None]): The path to the annotations folder.
Raises:
TypeError: If the value is not a Path, string, or None.
"""
if isinstance(value, Path):
self._a_source = value
elif isinstance(value, str):
self._a_source = Path(value)
elif value is None:
self._a_source = self.source_directory
else:
msg = f"Invalid value for a_source, can be Union[Path, str, None], got {type(value)}"
self.logger.error(msg)
raise TypeError(msg)
|
a_source
property
writable
Path: Returns the directory path for annotations.
__init__(**kwargs)
Initializes the cleanup operation for annotations.
Parameters:
| Name |
Type |
Description |
Default |
**kwargs
|
dict
|
Arguments from the command line or settings,
specifically looking for 'a_source' and 'a_suffix'.
|
{}
|
Source code in file_operations/clean_annotations.py
25
26
27
28
29
30
31
32
33
34 | def __init__(self, **kwargs):
"""
Initializes the cleanup operation for annotations.
Args:
**kwargs (dict): Arguments from the command line or settings,
specifically looking for 'a_source' and 'a_suffix'.
"""
super().__init__(**kwargs)
self.a_source = self.settings.a_source
|
add_arguments(settings, parser)
staticmethod
Adds specific CLI arguments for annotation cleaning.
Parameters:
| Name |
Type |
Description |
Default |
settings
|
AppSettings
|
Global configuration for default values.
|
required
|
parser
|
ArgumentParser
|
The parser to which 'a_suffix'
and 'a_source' arguments are added.
|
required
|
Source code in file_operations/clean_annotations.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 | @staticmethod
def add_arguments(settings: AppSettings, parser: argparse.ArgumentParser) -> None:
"""
Adds specific CLI arguments for annotation cleaning.
Args:
settings (AppSettings): Global configuration for default values.
parser (argparse.ArgumentParser): The parser to which 'a_suffix'
and 'a_source' arguments are added.
"""
parser.add_argument(
Arguments.a_suffix,
nargs="+",
help=HelpStrings.a_suffix,
default=settings.a_suffix,
)
parser.add_argument(
Arguments.a_source,
help=HelpStrings.a_source,
default=settings.a_source,
)
|
do_task()
Executes the synchronization and removal process.
It collects all image names (stems) from the source directory and
compares them with annotation files. If an annotation stem is not
found in the image stems, the file is deleted using FileRemoverMixin.
Source code in file_operations/clean_annotations.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85 | def do_task(self) -> None:
"""
Executes the synchronization and removal process.
It collects all image names (stems) from the source directory and
compares them with annotation files. If an annotation stem is not
found in the image stems, the file is deleted using FileRemoverMixin.
"""
self.logger.info(f"Checking for orphan annotations in {self.settings.a_source}")
annotation_paths = self.get_files(
source_directory=self.a_source,
pattern=self.settings.a_suffix
)
image_stems = set(image.stem for image in self.files_for_task)
orphans_removed = 0
for a_path in annotation_paths:
if a_path.stem not in image_stems:
if self.remove_file(a_path):
orphans_removed += 1
self.logger.info(f"Removed {a_path.stem}")
self.logger.info(f"Removed {orphans_removed} orphan annotations")
wait(logger=self.logger, timeout=self.sleep)
|