Skip to content

VOC to YOLO converter

Bases: BaseConverter

A high-performance converter for dataset annotations from Pascal VOC (.xml) to YOLO (.txt).

This class implements a two-phase parallel processing pipeline: 1. Discovery Phase: Scans all files to build a consistent class mapping. 2. Execution Phase: Performs the actual coordinate normalization and saves the files.

Attributes:

Name Type Description
CLASSES_FILE str

The name of the output file containing the list of YOLO classes.

tolerance int

Precision of the coordinates in the resulting YOLO files.

objects list

A list of unique class names found during the discovery phase.

class_mapping Dict[str, int]

A dictionary mapping class names to their YOLO IDs.

Source code in tools/annotation_converter/converter/voc_yolo_converter.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
class VocYOLOConverter(BaseConverter):
    """
        A high-performance converter for dataset annotations from Pascal VOC (.xml) to YOLO (.txt).

        This class implements a two-phase parallel processing pipeline:
        1. Discovery Phase: Scans all files to build a consistent class mapping.
        2. Execution Phase: Performs the actual coordinate normalization and saves the files.

        Attributes:
            CLASSES_FILE (str): The name of the output file containing the list of YOLO classes.
            tolerance (int): Precision of the coordinates in the resulting YOLO files.
            objects (list): A list of unique class names found during the discovery phase.
            class_mapping (Dict[str, int]): A dictionary mapping class names to their YOLO IDs.
        """
    CLASSES_FILE = "classes.txt"
    def __init__(self, source_format, dest_format, tolerance: int = 6, **kwargs):
        """
        Initializes the VOC to YOLO converter.

        Args:
            source_format (str): The extension of source files (e.g., '.xml').
            dest_format (str): The extension of destination files (e.g., '.txt').
            tolerance (int): Number of decimal places for coordinates. Defaults to 6.
            **kwargs (dict): Additional parameters passed to the BaseConverter.
        """
        super().__init__(source_format, dest_format, **kwargs)

        self.tolerance = tolerance
        self.objects: list = list()
        self.class_mapping: Dict[str, int] = dict()


    @staticmethod
    def _get_classes_worker(annotation_paths: Path, reader: BaseReader) -> Set[str]:
        """
        Multiprocessing worker for the Discovery Phase.

        Reads a single annotation file and extracts all unique object names.

        Args:
           annotation_paths (Path): Path to the XML annotation file.
           reader (BaseReader): The reader instance used to parse XML data.

        Returns:
           Set[str]: A set of unique class names found in the file.
        """
        try:
            data = reader.read(annotation_paths)
            annotation = data.get("annotation", {})
            objects = annotation.get("object", list())
            if not isinstance(objects, list):
                objects = [objects]
            return {obj["name"] for obj in objects}
        except Exception:
            return set()


    @staticmethod
    def _convert_worker(
            file_path: Path,
            destination_path: Path,
            reader: BaseReader,
            writer: BaseWriter,
            class_mapping: Dict[str, int],
            tolerance: int,
            suffix: str
    ) -> bool:
        """
        Multiprocessing worker for the Execution Phase.

        Performs the core logic: reads XML, calculates YOLO-normalized coordinates
        (center_x, center_y, width, height), and saves the resulting text file.

        Args:
           file_path (Path): Path to the source XML file.
           destination_path (Path): Directory where the output file will be saved.
           reader (BaseReader): Reader instance for XML parsing.
           writer (BaseWriter): Writer instance for saving YOLO data.
           class_mapping (Dict[str, int]): Map of class names to their integer IDs.
           tolerance (int): Precision for rounding coordinates.
           suffix (str): The file extension for the output file.

        Returns:
           bool: True if the file was successfully processed and saved.
        """
        data = reader.read(file_path)

        if data.get("annotation") is None:
            return False

        annotation = data["annotation"]

        try:
            img_width = int(annotation["size"]["width"])
            img_height = int(annotation["size"]["height"])

            if img_width == 0 or img_height == 0:
                raise ValueError(f"Image size is zero in annotation {file_path}!")
        except (KeyError, ValueError, TypeError):
            return False

        annotated_objects = annotation.get("object", list())

        # reader using xmltodict that returns a dict if there is just one object, if more - returns a list
        if not isinstance(annotated_objects, list):
            annotated_objects = [annotated_objects]

        converted_objects: List[str] = list()

        for obj in annotated_objects:
            try:
                # saving objectnames for classes.txt
                name = obj["name"]

                if name not in class_mapping:
                    continue
                class_id = class_mapping[name]

                # calculate yolo format cords
                bbox = obj["bndbox"]
                xmin, ymin, xmax, ymax = (
                    float(bbox["xmin"]), float(bbox["ymin"]),
                    float(bbox["xmax"]), float(bbox["ymax"])
                )

                width = ((xmax - xmin) / img_width)
                height = (ymax - ymin) / img_height
                x_center = (xmin + xmax) / 2 / img_width
                y_center = (ymin + ymax) / 2 / img_height

                x_center, y_center, width, height = map(lambda x: np.clip(x, 0, 1),
                                                        [x_center, y_center, width, height])

                row = (f"{class_id} "
                       f"{x_center:.{tolerance}f} "
                       f"{y_center:.{tolerance}f} "
                       f"{width:.{tolerance}f} "
                       f"{height:.{tolerance}f}")
                converted_objects.append(row)

            except (KeyError, ValueError, TypeError):
                continue

        converted_path = destination_path / f"{file_path.stem}{suffix}"
        writer.write(converted_objects, converted_path)
        return True


    def convert(self, file_paths: Tuple[Path], target_path: Path, n_jobs: int = 1) -> None:
        """
        Orchestrates the batch conversion process using multiple processes.

        Phase 1: Scans all files in parallel to create a unified 'classes.txt'.
        Phase 2: Converts coordinates and saves files in parallel.

        Args:
            file_paths (Tuple[Path, ...]): Collection of source annotation files.
            target_path (Path): Directory path for the converted output.
            n_jobs (int): Number of parallel workers to use. Defaults to 1.
        """
        count_to_convert = len(file_paths)

        if count_to_convert > 0:
            target_path.mkdir(parents=True, exist_ok=True)

        self.logger.info(f"Start converting {count_to_convert} annotations with {n_jobs} workers...")

        classes_func = partial(self._get_classes_worker, reader=self.reader)

        with ProcessPoolExecutor(max_workers=n_jobs) as executor:
            classes = list(executor.map(classes_func, file_paths))

        self.objects = sorted(set().union(*classes))
        class_mapping = {name: i for i, name in enumerate(self.objects)}
        self.logger.info(f"Unified class mapping created: {len(self.objects)} classes")

        worker_func = partial(
            self._convert_worker,
            destination_path=target_path,
            reader=self.reader,
            writer=self.writer,
            class_mapping=class_mapping,
            tolerance=self.tolerance,
            suffix=self.dest_suffix
        )

        self.logger.info(f"converting {count_to_convert} annotations with {n_jobs} workers...")
        converted_count = 0
        with ProcessPoolExecutor(max_workers=n_jobs) as executor:
            converted_results = executor.map(worker_func, file_paths)
            converted_count = sum(converted_results)

        self.logger.info(f"Converted {converted_count}/{count_to_convert} annotations and saved in {target_path}")

        self.writer.write(self.objects, target_path / self.CLASSES_FILE)
        self.logger.info(f"Saved {self.CLASSES_FILE} in {target_path}")


    @property
    def tolerance(self) -> int:
        """int: The number of decimal places for YOLO coordinates."""
        return self._tolerance

    @tolerance.setter
    def tolerance(self, value: Union[int, float, str]) -> None:
        """
        Sets the coordinate precision. Handles conversion from float or string if needed.

        Args:
            value Union[int, float, str]: The precision value.

        Raises:
            TypeError: If the value cannot be converted to an integer.
        """
        if isinstance(value, int):
            self._tolerance = value
        else:
            try:
                self._tolerance = int(float(value))
            except TypeError as e:
                msg = f"Can`t convert {value} to int from type {type(value)})\n{e}"
                self.logger.warning(msg)
                raise TypeError(msg)

tolerance property writable

int: The number of decimal places for YOLO coordinates.

__init__(source_format, dest_format, tolerance=6, **kwargs)

Initializes the VOC to YOLO converter.

Parameters:

Name Type Description Default
source_format str

The extension of source files (e.g., '.xml').

required
dest_format str

The extension of destination files (e.g., '.txt').

required
tolerance int

Number of decimal places for coordinates. Defaults to 6.

6
**kwargs dict

Additional parameters passed to the BaseConverter.

{}
Source code in tools/annotation_converter/converter/voc_yolo_converter.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def __init__(self, source_format, dest_format, tolerance: int = 6, **kwargs):
    """
    Initializes the VOC to YOLO converter.

    Args:
        source_format (str): The extension of source files (e.g., '.xml').
        dest_format (str): The extension of destination files (e.g., '.txt').
        tolerance (int): Number of decimal places for coordinates. Defaults to 6.
        **kwargs (dict): Additional parameters passed to the BaseConverter.
    """
    super().__init__(source_format, dest_format, **kwargs)

    self.tolerance = tolerance
    self.objects: list = list()
    self.class_mapping: Dict[str, int] = dict()

convert(file_paths, target_path, n_jobs=1)

Orchestrates the batch conversion process using multiple processes.

Phase 1: Scans all files in parallel to create a unified 'classes.txt'. Phase 2: Converts coordinates and saves files in parallel.

Parameters:

Name Type Description Default
file_paths Tuple[Path, ...]

Collection of source annotation files.

required
target_path Path

Directory path for the converted output.

required
n_jobs int

Number of parallel workers to use. Defaults to 1.

1
Source code in tools/annotation_converter/converter/voc_yolo_converter.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def convert(self, file_paths: Tuple[Path], target_path: Path, n_jobs: int = 1) -> None:
    """
    Orchestrates the batch conversion process using multiple processes.

    Phase 1: Scans all files in parallel to create a unified 'classes.txt'.
    Phase 2: Converts coordinates and saves files in parallel.

    Args:
        file_paths (Tuple[Path, ...]): Collection of source annotation files.
        target_path (Path): Directory path for the converted output.
        n_jobs (int): Number of parallel workers to use. Defaults to 1.
    """
    count_to_convert = len(file_paths)

    if count_to_convert > 0:
        target_path.mkdir(parents=True, exist_ok=True)

    self.logger.info(f"Start converting {count_to_convert} annotations with {n_jobs} workers...")

    classes_func = partial(self._get_classes_worker, reader=self.reader)

    with ProcessPoolExecutor(max_workers=n_jobs) as executor:
        classes = list(executor.map(classes_func, file_paths))

    self.objects = sorted(set().union(*classes))
    class_mapping = {name: i for i, name in enumerate(self.objects)}
    self.logger.info(f"Unified class mapping created: {len(self.objects)} classes")

    worker_func = partial(
        self._convert_worker,
        destination_path=target_path,
        reader=self.reader,
        writer=self.writer,
        class_mapping=class_mapping,
        tolerance=self.tolerance,
        suffix=self.dest_suffix
    )

    self.logger.info(f"converting {count_to_convert} annotations with {n_jobs} workers...")
    converted_count = 0
    with ProcessPoolExecutor(max_workers=n_jobs) as executor:
        converted_results = executor.map(worker_func, file_paths)
        converted_count = sum(converted_results)

    self.logger.info(f"Converted {converted_count}/{count_to_convert} annotations and saved in {target_path}")

    self.writer.write(self.objects, target_path / self.CLASSES_FILE)
    self.logger.info(f"Saved {self.CLASSES_FILE} in {target_path}")