Skip to content

Base Reporter

Bases: ABC

Abstract base class for dataset reporting and visualization.

This class provides a shared interface and utility methods for creating technical reports. It handles console output formatting, shared logging, and calculation of dataset health metrics such as 'sweet spots' (statistical ranges free of outliers).

Source code in tools/stats/dataset_reporter/base_reporter.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
class BaseDatasetReporter(ABC):
    """
    Abstract base class for dataset reporting and visualization.

    This class provides a shared interface and utility methods for creating
    technical reports. It handles console output formatting, shared logging,
    and calculation of dataset health metrics such as 'sweet spots'
    (statistical ranges free of outliers).
    """
    line: str = "=" * 75

    def __init__(self, settings: AppSettings):
        """
        Initializes the reporter with global settings and logging.

        Args:
            settings (AppSettings): Global configuration containing paths,
                log levels, and report schemas.
        """
        self.settings: AppSettings = settings
        self.log_path: Path = settings.log_path
        log_level = settings.log_level
        self.schema = self.settings.img_dataset_report_schema
        self.report_path = self.settings.report_path
        self.report_path.mkdir(parents=True, exist_ok=True)

        self.logger = LoggerConfigurator.setup(
            name=self.__class__.__name__,
            log_level=log_level,
            log_path=Path(self.log_path) / f"{self.__class__.__name__}.log" if self.log_path else None
        )

    @abstractmethod
    def show_console_report(self, df: pd.DataFrame, target_format: str) -> None:
        """
        Prints a detailed technical report to the console.

        Args:
            df (pd.DataFrame): The feature matrix of the dataset.
            target_format (str): Annotation format identifier (e.g., 'yolo').
        """
        pass

    @abstractmethod
    def generate_visual_report(self, df: pd.DataFrame, destination: Union[Path, str, PdfPages], features: List[str]) -> None:
        """
        Generates visual analytics (plots, heatmaps, manifolds).

        Args:
            df (pd.DataFrame): The feature matrix of the dataset.
            destination (Union[Path, str, PdfPages]): Output target for the visual assets.
            features (List[str]): Numeric columns used for visual correlation and manifold analysis.
        """
        pass


    def _render_section(self, df: Union[pd.DataFrame, pd.Series], section: dict, total_objects: int) -> List[str]:
        """
        Formats a specific data category (numeric or binary) for the report text.

        This method calculates statistical summaries (mean, median, std) for numeric
        columns and identifies 'sweet spots' using the Interquartile Range (IQR) method.
        For binary columns, it calculates frequency and percentage.

        Args:
            df (Union[pd.DataFrame, pd.Series]): Data slice for a specific class or dataset.
            section (dict): Configuration dictionary defining the title, columns, and type.
            total_objects (int): Total count used for calculating percentages in binary sections.

        Returns:
            List[str]: A list of formatted strings ready for console or text file output.
        """
        title = section["title"]
        cols = [col for col in section["columns"] if col in df.columns]

        if not cols:
            self.logger.warning(f"No valid columns found for section '{title}' in the DataFrame.")
            return []

        lines = [f"\n [{title}]"]

        if section["type"] == "numeric":
            stats = df[cols].describe().T
            for col in cols:
                row = stats.loc[col]
                outlier_col = f"outlier_{col}"
                outliers_count = df[outlier_col].sum()

                iqr = row["75%"] - row["25%"]
                min_limit = np.clip(row["25%"] - 1.5 * iqr, a_min=0, a_max=None)
                max_limit = row["75%"] + 1.5 * iqr

                lines.append(
                    f"  - {col:<25}:"
                    f" med {row['50%']:>10.2f} |"
                    f" avg {row['mean']:>10.2f}, std {row['std']:<10.2f} |"
                    f" min {row['min']:>10.2f}, max {row['max']:>10.2f}  |"
                    f" outliers: {int(outliers_count):<4} |"
                    f"{min_limit:10.2f} < sweet spot < {max_limit:10.2f} "
                )
        elif section["type"] == "binary":
            sums = df[cols].sum()
            for col in cols:
                count = int(sums[col])
                share = (count / total_objects) * 100
                lines.append(f"  - {col:<25}: {count:>10} ({share:>6.1f}%)")
        else:
            self.logger.warning(f"Unknown section type '{section['type']}' for section '{title}'. Skipping.")

        return lines


    @property
    def report_path(self) -> Path:
        """Path: The directory where generated reports are stored."""
        return self._report_path

    @report_path.setter
    def report_path(self, value: Union[Path, str]):
        """
         Sets and validates the reporting output directory.

         Args:
             value (Union[Path, str]): Path to the report folder.

         Raises:
             TypeError: If the value is not a string or a Path object.
         """
        if not isinstance(value, Path):
            try:
                value = Path(value)
            except TypeError:
                msg = f"Invalid type for report_path: expected Path or str, got {type(value)}"
                self.logger.error(msg)
                raise TypeError(msg)
        self._report_path = value

report_path property writable

Path: The directory where generated reports are stored.

__init__(settings)

Initializes the reporter with global settings and logging.

Parameters:

Name Type Description Default
settings AppSettings

Global configuration containing paths, log levels, and report schemas.

required
Source code in tools/stats/dataset_reporter/base_reporter.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, settings: AppSettings):
    """
    Initializes the reporter with global settings and logging.

    Args:
        settings (AppSettings): Global configuration containing paths,
            log levels, and report schemas.
    """
    self.settings: AppSettings = settings
    self.log_path: Path = settings.log_path
    log_level = settings.log_level
    self.schema = self.settings.img_dataset_report_schema
    self.report_path = self.settings.report_path
    self.report_path.mkdir(parents=True, exist_ok=True)

    self.logger = LoggerConfigurator.setup(
        name=self.__class__.__name__,
        log_level=log_level,
        log_path=Path(self.log_path) / f"{self.__class__.__name__}.log" if self.log_path else None
    )

generate_visual_report(df, destination, features) abstractmethod

Generates visual analytics (plots, heatmaps, manifolds).

Parameters:

Name Type Description Default
df DataFrame

The feature matrix of the dataset.

required
destination Union[Path, str, PdfPages]

Output target for the visual assets.

required
features List[str]

Numeric columns used for visual correlation and manifold analysis.

required
Source code in tools/stats/dataset_reporter/base_reporter.py
56
57
58
59
60
61
62
63
64
65
66
@abstractmethod
def generate_visual_report(self, df: pd.DataFrame, destination: Union[Path, str, PdfPages], features: List[str]) -> None:
    """
    Generates visual analytics (plots, heatmaps, manifolds).

    Args:
        df (pd.DataFrame): The feature matrix of the dataset.
        destination (Union[Path, str, PdfPages]): Output target for the visual assets.
        features (List[str]): Numeric columns used for visual correlation and manifold analysis.
    """
    pass

show_console_report(df, target_format) abstractmethod

Prints a detailed technical report to the console.

Parameters:

Name Type Description Default
df DataFrame

The feature matrix of the dataset.

required
target_format str

Annotation format identifier (e.g., 'yolo').

required
Source code in tools/stats/dataset_reporter/base_reporter.py
45
46
47
48
49
50
51
52
53
54
@abstractmethod
def show_console_report(self, df: pd.DataFrame, target_format: str) -> None:
    """
    Prints a detailed technical report to the console.

    Args:
        df (pd.DataFrame): The feature matrix of the dataset.
        target_format (str): Annotation format identifier (e.g., 'yolo').
    """
    pass