"""
Visualization module for Clusterium.
This module provides functions for visualizing clustering results and evaluation
metrics.
"""
from __future__ import annotations
import os
from collections import Counter
from typing import TYPE_CHECKING
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colormaps
from clusx.errors import VisualizationError
from clusx.logging import get_logger
if TYPE_CHECKING:
from typing import Any
from matplotlib.axes import Axes
logger = get_logger(__name__)
MIN_DATASET_SIZE = 10
"""
Minimum dataset size for which visualizations are considered safe.
Note
----
Visualizations may not be meaningful or could be misleading when applied to datasets
smaller than this threshold.
"""
plt.style.use("default")
[docs]
def get_model_colors(model_names: list[str]) -> dict[str, Any]:
"""Generate consistent colors for models using academically popular colormaps.
Selects appropriate colormaps based on visualization best practices for clustering:
- For typical case (≤10 models): Uses 'Set1' which provides distinct, balanced hues
that ensure clear differentiation among groups.
- For more models: Uses 'tab20' which provides up to 20 distinct colors, with alpha
variation for cases beyond 20 models to maintain visual distinction.
This approach follows standard practices in clustering visualization where
colormap selection is based on the number of clusters to ensure optimal
visual clarity and accessibility.
Parameters
----------
model_names : list[str]
List of model names to generate colors for
Returns
-------
dict
Dictionary mapping model names to their assigned colors
"""
num_models = len(model_names)
# For typical case, use Set1 colormap
if num_models <= 10:
cmap = colormaps["Set1"]
colors = [cmap(i / 9) for i in range(num_models)]
return dict(zip(model_names, colors))
cmap = colormaps["tab20"]
colors = []
for i in range(num_models):
color_idx = i % 20 # Cycle through the 20 colors
alpha = 1.0 if i < 20 else 0.7 # Use lower alpha for recycled colors
color = list(cmap(color_idx / 19))
color[3] = alpha # Set alpha value
colors.append(tuple(color))
return dict(zip(model_names, colors))
[docs]
def safe_plot(title: str | None = None, min_dataset_size: int = MIN_DATASET_SIZE):
"""
Decorator for safely executing plotting functions with error handling.
Parameters
----------
title : str or None
Title for the plot. If None, the function name will be used.
min_dataset_size : int
Minimum dataset size threshold for small dataset detection.
Default is :const:`MIN_DATASET_SIZE`.
Returns
-------
collections.abc.Callable
Decorated function that handles errors and provides visual feedback.
Examples
--------
>>> @safe_plot(title="My Custom Plot")
>>> def plot_my_visualization(reports, ax):
>>> # Your plotting code here
>>> # No need for try/except blocks
>>> ax.plot(data)
>>> ax.set_title("My Plot")
>>>
>>> # Usage remains the same as the original function
>>> plot_my_visualization(reports, ax)
Notes
-----
- The decorated function must accept 'reports' and 'ax' as its first two
arguments
- The decorator automatically sets the plot title
- For small datasets, a specific message is displayed
- All exceptions are logged with detailed error messages
"""
def decorator(plot_func):
from functools import wraps
@wraps(plot_func)
def wrapper(reports, ax: Axes, *args, **kwargs):
func_name = plot_func.__name__.replace("plot_", "")
func_name = func_name.replace("_", " ").strip().title()
plot_title = title if title is not None else func_name
ax.set_title(plot_title)
try:
return plot_func(reports, ax, *args, **kwargs)
except Exception as e: # pylint: disable=broad-except
logger.error("Error plotting %s: %s", plot_title, e)
small_dataset = is_small_dataset(reports, min_dataset_size)
render_error_message(ax, plot_title, e, small_dataset, min_dataset_size)
return None
return wrapper
return decorator
[docs]
def is_small_dataset(reports: dict[str, dict[str, Any]], min_size: int) -> bool:
"""
Check if the dataset is considered small based on the number of texts.
Parameters
----------
reports : dict
Dictionary mapping model names to their evaluation reports.
min_size : int
Minimum number of texts threshold.
Returns
-------
bool
True if the dataset is considered small, False otherwise.
Notes
-----
A dataset is considered small if:
1. It's empty (no reports) or not a dictionary
2. No reports have 'cluster_stats'
3. No reports have 'num_texts' in their 'cluster_stats'
4. Any report has fewer than min_size texts (assuming we have the same dataset
for all reports)
"""
if not reports or not isinstance(reports, dict):
return True
has_text_count_info = False
for report in reports.values():
if "cluster_stats" in report and "num_texts" in report["cluster_stats"]:
has_text_count_info = True
if report["cluster_stats"]["num_texts"] < min_size:
return True
return not has_text_count_info
[docs]
def render_error_message(
ax: Axes, plot_title: str, error, small_dataset: bool, min_size: int
):
"""
Display appropriate error message on the plot.
Parameters
----------
ax : Axes
Matplotlib axes to display the error message on.
plot_title : str
Title of the plot.
error : Exception
The exception that was raised.
small_dataset : bool
Whether the dataset is considered small.
min_size : int
Minimum dataset size threshold.
Returns
-------
None
This function modifies the provided axes in-place.
"""
if small_dataset:
message = f"Cannot generate {plot_title} for small datasets"
details = f"(Requires at least {min_size} data points)"
else:
message = f"Error plotting {plot_title}"
error_msg = str(error)
details = error_msg[:50] + ("..." if len(error_msg) > 50 else "")
ax.text(0.5, 0.5, message, ha="center", va="center", fontsize=11)
ax.text(
0.5,
0.4,
details,
ha="center",
va="center",
fontsize=9,
color="gray",
)
ax.set_title(f"{plot_title} (Error)")
# Reset scales if they might have been changed
if hasattr(ax, "set_xscale") and hasattr(ax, "set_yscale"):
try:
ax.set_xscale("linear")
ax.set_yscale("linear")
except Exception: # pylint: disable=broad-except
pass
[docs]
@safe_plot(title="Cluster Size Distribution (Log-Log Scale)")
def plot_cluster_size_distribution(reports, ax: Axes):
"""
Plot cluster size distributions for each model.
Parameters
----------
reports : dict
Dictionary mapping model names to their evaluation reports.
ax :Axes
Matplotlib axes to plot on.
Returns
-------
None
The function modifies the provided axes in-place.
"""
# Generate colors for models
model_colors = get_model_colors(list(reports.keys()))
for model_name, report in reports.items():
# Check if we have the required data
has_cluster_stats = "cluster_stats" in report
has_sizes = has_cluster_stats and "cluster_sizes" in report["cluster_stats"]
if not has_sizes:
logger.warning("Skipping %s: No cluster size distribution data", model_name)
continue
# Use pre-computed cluster size distribution
cluster_size_dist = report["cluster_stats"]["cluster_sizes"]
# Get clustering parameters
alpha = report["parameters"].get("alpha", "N/A")
sigma = report["parameters"].get("sigma", "N/A")
# Create label with model name and parameters
label = f"{model_name} (α={alpha}, σ={sigma})"
# Convert string keys to integers and create a Counter
size_frequency = Counter()
for _, size in cluster_size_dist.items():
size_frequency[size] += 1
# Convert to lists for plotting
sizes = sorted(size_frequency.keys()) # Unique cluster sizes
frequencies = [
size_frequency[size] for size in sizes
] # Number of clusters with each size
# Filter out zeros for log scale
valid_indices = [
i for i, freq in enumerate(frequencies) if freq > 0 and sizes[i] > 0
]
valid_sizes = [sizes[i] for i in valid_indices]
valid_frequencies = [frequencies[i] for i in valid_indices]
color = model_colors.get(model_name)
# Plot rank vs size
if valid_sizes and valid_frequencies:
ax.loglog(
valid_sizes,
valid_frequencies,
marker="o",
linestyle="--",
label=label,
color=color,
alpha=0.7,
)
else:
logger.warning("No valid cluster sizes for %s", model_name)
ax.set_xlabel("Cluster Size")
ax.set_ylabel("Number of Clusters")
ax.grid(True, which="both", ls="-", alpha=0.2)
ax.legend()
# If no data was plotted, show a message
if not ax.get_lines():
ax.text(
0.5,
0.5,
"No cluster count data available",
ha="center",
va="center",
fontsize=12,
)
ax.set_xscale("linear")
ax.set_yscale("linear")
[docs]
@safe_plot(title="Number of Clusters")
def plot_cluster_counts(reports, ax: Axes):
"""
Plot the number of clusters for each model.
Parameters
----------
reports : dict
Dictionary mapping model names to their evaluation reports.
ax : Axes
Matplotlib axes to plot on.
Returns
-------
None
"""
# Generate colors for models
model_colors = get_model_colors(list(reports.keys()))
models = []
counts = []
colors = []
for model_name, report in reports.items():
if "cluster_stats" in report and "num_clusters" in report["cluster_stats"]:
models.append(model_name)
counts.append(report["cluster_stats"]["num_clusters"])
colors.append(model_colors.get(model_name))
if not models:
raise VisualizationError("No cluster count data available")
ax.bar(models, counts, color=colors)
ax.set_xlabel("Model")
ax.set_ylabel("Count")
# Add value labels on top of bars
for i, count in enumerate(counts):
ax.text(i, count + 0.5, str(count), ha="center")
[docs]
@safe_plot(title="Similarity Comparison")
def plot_similarity_metrics(reports, ax: Axes):
"""
Plot similarity metrics for each model.
Parameters
----------
reports : dict
Dictionary mapping model names to their evaluation reports.
ax : Axes
Matplotlib axes to plot on.
Returns
-------
None
"""
has_similarity_data = False
# Generate colors for models
model_colors = get_model_colors(list(reports.keys()))
# Prepare data for grouped bar chart
model_names = []
intra_values = []
inter_values = []
colors = []
for model_name, report in reports.items():
# Check if we have similarity metrics
has_metrics = "metrics" in report and "similarity" in report["metrics"]
if not has_metrics:
logger.warning("No similarity metrics for %s", model_name)
continue
similarity_metrics = report["metrics"]["similarity"]
if not similarity_metrics:
logger.warning("Empty similarity metrics for %s", model_name)
continue
# Extract metrics
intra_sim = similarity_metrics.get("intra_cluster_similarity", 0)
inter_sim = similarity_metrics.get("inter_cluster_similarity", 0)
# Store for plotting
model_names.append(model_name)
intra_values.append(intra_sim)
inter_values.append(inter_sim)
colors.append(model_colors.get(model_name))
has_similarity_data = True
if not has_similarity_data:
raise VisualizationError("Similarity metrics not available")
# Set up positions for the bars
x = np.arange(2) # Two groups: intra and inter
width = 0.8 / len(model_names) # Width of each bar, adjusted for number of models
# Plot bars for each model
for i, (model, intra, inter, color) in enumerate(
zip(model_names, intra_values, inter_values, colors)
):
# Calculate position offset for this model's bars
offset = i * width - (len(model_names) - 1) * width / 2
# Plot the bars
ax.bar(x[0] + offset, intra, width, label=model, color=color)
ax.bar(x[1] + offset, inter, width, color=color, alpha=1.0)
# Calculate dynamic y-axis limit based on data
intra_max = max(intra_values, default=0)
inter_max = max(inter_values, default=0)
max_value = max(intra_max, inter_max)
# Add a small padding (25%) above the highest bar for aesthetics
y_max = min(1.0, max_value * 1.25) if max_value > 0 else 0.1
# Ensure minimum height for very small values
y_max = max(y_max, 0.1)
ax.set_ylabel("Cosine Similarity")
ax.set_xticks(x)
ax.set_xticklabels(["Intra-cluster", "Inter-cluster"])
ax.set_ylim(0, y_max)
ax.legend()
def _get_valid_powerlaw_data(
report, model_name
): # pylint: disable=too-many-return-statements
"""
Extract and validate power law data from a report.
Parameters
----------
report : dict
Evaluation report for a model.
model_name : str
Name of the model.
Returns
-------
tuple or None
If valid, returns (alpha, sigma, xmin, valid_sizes, valid_frequencies).
Returns None if the data is intentionally invalid (e.g., small dataset case).
Raises
------
VisualizationError
If the powerlaw metrics are not available or contain invalid values.
"""
has_metrics = "metrics" in report and "powerlaw" in report["metrics"]
if not has_metrics:
raise VisualizationError(f"No powerlaw metrics for {model_name}")
powerlaw_metrics = report["metrics"]["powerlaw"]
if not powerlaw_metrics:
raise VisualizationError(f"Empty powerlaw metrics for {model_name}")
# Get parameters
alpha = powerlaw_metrics.get("alpha", None)
sigma = powerlaw_metrics.get("sigma_error", None)
xmin = powerlaw_metrics.get("xmin", None)
# Check if parameters are intentionally None (small dataset case)
if alpha is None and xmin is None:
# This is an expected case for small datasets, not an error
return None
# Check for NaN values
if (
np.isnan(alpha)
if alpha is not None
else False or np.isnan(xmin) if xmin is not None else False
):
raise VisualizationError(
f"Invalid powerlaw parameters for {model_name}: alpha={alpha}, xmin={xmin}"
)
# Get cluster size distribution
if "cluster_stats" not in report or "cluster_sizes" not in report["cluster_stats"]:
raise VisualizationError(f"No cluster size distribution for {model_name}")
cluster_sizes = report["cluster_stats"]["cluster_sizes"]
# Convert to frequency distribution
size_frequency = Counter(cluster_sizes.values())
# Convert to lists for plotting
sizes = sorted(size_frequency.keys())
frequencies = [size_frequency[size] for size in sizes]
# Filter out zeros for log scale
valid_indices = [
i for i, freq in enumerate(frequencies) if freq > 0 and sizes[i] > 0
]
valid_sizes = [sizes[i] for i in valid_indices]
valid_frequencies = [frequencies[i] for i in valid_indices]
if not valid_sizes:
raise VisualizationError(f"No valid sizes for powerlaw fit for {model_name}")
return alpha, sigma, xmin, valid_sizes, valid_frequencies
def _generate_powerlaw_fit_line(
valid_sizes, valid_frequencies, alpha, xmin, xmin_index, color, model_name
):
"""
Generate and plot a power-law fit line.
Parameters
----------
valid_sizes : list
List of valid cluster sizes.
valid_frequencies : list
List of frequencies for each size.
alpha : float
Power-law exponent.
xmin : float
Minimum value for which power-law holds.
xmin_index : int
Index of xmin in valid_sizes.
color : str or tuple
Color to use for the plot.
model_name : str
Name of the model for the label.
Returns
-------
tuple
(success, line_data) where success is a boolean and line_data is a tuple
containing (x, y, color, label) or None if unsuccessful.
"""
try:
x = np.logspace(np.log10(xmin), np.log10(max(valid_sizes)), 50)
y = [
item ** (-alpha) * valid_frequencies[xmin_index] * (xmin**alpha)
for item in x
]
return True, (x, y, color, f"{model_name} (α={alpha:.2f})")
except Exception as e: # pylint: disable=broad-except
raise VisualizationError(
f"Error generating power-law fit for {model_name}: {e}"
) from e
def _display_no_powerlaw_message(ax: Axes, small_dataset: bool):
"""
Display a message when power-law analysis is not available.
Args:
ax: Matplotlib axes to plot on
small_dataset: Whether this is a small dataset
"""
if small_dataset:
message = "Power-law analysis requires more data points"
ax.text(0.5, 0.5, message, ha="center", va="center", fontsize=11)
ax.text(
0.5,
0.4,
"Each cluster size needs multiple occurrences",
ha="center",
va="center",
fontsize=9,
)
else:
ax.text(0.5, 0.5, "No power-law fit data available", ha="center", va="center")
ax.set_xscale("linear")
ax.set_yscale("linear")
[docs]
@safe_plot(title="Power-law Fit")
def plot_powerlaw_fit(reports, ax: Axes):
"""
Plot power-law fit for cluster size distributions.
Parameters
----------
reports : dict
Dictionary mapping model names to their evaluation reports.
ax : Axes
Matplotlib axes to plot on.
Returns
-------
None
The function plots directly on the provided axes.
"""
has_powerlaw_data = False
small_dataset = False
# Check if we're dealing with a small dataset
for report in reports.values():
if "cluster_stats" in report and "num_texts" in report["cluster_stats"]:
if report["cluster_stats"]["num_texts"] < MIN_DATASET_SIZE:
small_dataset = True
break
# Generate colors for models
model_colors = get_model_colors(list(reports.keys()))
for model_name, report in reports.items():
# Get and validate power law data
result = _get_valid_powerlaw_data(report, model_name)
if result is None:
continue
alpha, _, xmin, valid_sizes, valid_frequencies = result
color = model_colors.get(model_name)
# Plot empirical distribution
ax.loglog(
valid_sizes,
valid_frequencies,
"o",
color=color,
alpha=0.5,
label=f"{model_name} (data)",
)
# Check if xmin is in valid_sizes
try:
xmin_index = valid_sizes.index(xmin)
except ValueError:
# xmin not in valid_sizes, use the closest value
logger.warning(
"xmin=%s not in valid sizes for %s, using closest value",
xmin,
model_name,
)
closest_idx = min(
# pylint: disable=cell-var-from-loop
range(len(valid_sizes)),
key=lambda i: abs(valid_sizes[i] - xmin),
)
xmin = valid_sizes[closest_idx]
xmin_index = closest_idx
# Generate and plot power-law fit line
success, fit_data = _generate_powerlaw_fit_line(
valid_sizes, valid_frequencies, alpha, xmin, xmin_index, color, model_name
)
if success and fit_data is not None:
x, y, color, label = fit_data
# Plot fit line
ax.loglog(x, y, "-", color=color, label=label)
has_powerlaw_data = True
if not has_powerlaw_data:
_display_no_powerlaw_message(ax, small_dataset)
return
ax.set_xlabel("Cluster Size")
ax.set_ylabel("Probability Density")
ax.grid(True, which="both", ls="-", alpha=0.2)
ax.legend()
[docs]
@safe_plot(title="Outlier Score Distribution")
def plot_outliers(reports, ax: Axes):
"""
Plot outlier scores distribution.
Parameters
----------
reports : dict
Dictionary mapping model names to their evaluation reports.
ax : Axes
Matplotlib axes to plot on.
Returns
-------
None
"""
has_outlier_data = False
# Generate colors for models
model_colors = get_model_colors(list(reports.keys()))
for model_name, report in reports.items():
# Check if we have outlier metrics
has_metrics = "metrics" in report and "outliers" in report["metrics"]
if not has_metrics or not report["metrics"]["outliers"]:
logger.warning("No outlier metrics for %s", model_name)
continue
outlier_scores = report["metrics"]["outliers"]
if not outlier_scores:
logger.warning("Empty outlier scores for %s", model_name)
continue
# Extract scores
scores = list(outlier_scores.values())
# Plot histogram
color = model_colors.get(model_name)
ax.hist(scores, bins=20, alpha=1.0, label=model_name, color=color)
has_outlier_data = True
if not has_outlier_data:
raise VisualizationError("No outlier data available")
ax.set_xlabel("Outlier Score")
ax.set_ylabel("Frequency")
ax.legend()
def _extract_silhouette_data(reports):
"""Extract and categorize silhouette scores from reports."""
models = []
scores = []
error_models = []
zero_score_models = []
for model_name, report in reports.items():
# Check if we have metrics and silhouette score
if "metrics" in report and "silhouette_score" in report["metrics"]:
score = report["metrics"]["silhouette_score"]
if score == 0.0:
# A score of exactly 0.0 often indicates calculation issues
zero_score_models.append(model_name)
elif score != -1: # Check for special error value
models.append(model_name)
scores.append(score)
else:
error_models.append(model_name)
return models, scores, error_models, zero_score_models
def _show_silhouette_message(ax: Axes, error_models, zero_score_models):
"""Display appropriate message when no valid silhouette scores are available.
Parameters
----------
ax : Axes
Matplotlib axes to display the message on.
error_models : list
List of models with errors.
zero_score_models : list
List of models with zero scores.
Returns
-------
None
"""
if error_models or zero_score_models:
# Create a more informative message about why scores couldn't be calculated
message = "Silhouette scores could not be properly calculated\n"
if zero_score_models:
message += "Models with score=0: " + ", ".join(zero_score_models) + "\n"
if error_models:
message += "Models with errors: " + ", ".join(error_models) + "\n"
message += "Reason: Clusters with <2 samples or calculation issues"
ax.text(
0.5,
0.5,
message,
ha="center",
va="center",
fontsize=11,
wrap=True,
bbox={"facecolor": "lightyellow", "alpha": 0.5, "pad": 5},
)
else:
ax.text(
0.5,
0.5,
"No silhouette scores available",
ha="center",
va="center",
fontsize=12,
)
def _add_silhouette_note(ax: Axes, error_models, zero_score_models):
"""Add a note about models with errors or zero scores.
Parameters
----------
ax : Axes
Matplotlib axes to add the note to.
error_models : list
List of models with errors.
zero_score_models : list
List of models with zero scores.
Returns
-------
None
"""
note_lines = []
if zero_score_models:
note_lines.append(f"Models with score=0: {', '.join(zero_score_models)}")
if error_models:
note_lines.append(f"Models with errors: {', '.join(error_models)}")
note_lines.append("Reason: Clusters with <2 samples or calculation issues")
note = "\n".join(note_lines)
ax.text(
0.5,
-0.15,
note,
ha="center",
fontsize=9,
transform=ax.transAxes,
bbox={"facecolor": "lightyellow", "alpha": 0.5, "pad": 5},
)
[docs]
@safe_plot(title="Silhouette Score Comparison")
def plot_silhouette_scores(reports, ax: Axes):
"""Plot silhouette scores for each model.
Parameters
----------
reports : dict
Dictionary mapping model names to their evaluation reports.
ax : Axes
Matplotlib axes to plot on.
Returns
-------
None
"""
# Extract data from reports
models, scores, error_models, zero_score_models = _extract_silhouette_data(reports)
# Handle case where no valid scores are available
if not models:
_show_silhouette_message(ax, error_models, zero_score_models)
return
# Generate colors for models
model_colors = get_model_colors(models)
colors = [model_colors[model] for model in models]
# Create bar chart
ax.bar(models, scores, color=colors)
ax.set_xlabel("Clustering Model")
ax.set_ylabel("Silhouette Score")
ax.set_ylim(-1, 1) # Silhouette score range
# Add value labels on top of bars
for i, score in enumerate(scores):
ax.text(i, score + 0.05, f"{score:.4f}", ha="center")
# If some models had errors or zero scores, add a note
if error_models or zero_score_models:
_add_silhouette_note(ax, error_models, zero_score_models)
[docs]
def visualize_evaluation_dashboard(
reports: dict[str, dict[str, Any]],
output_dir: str,
filename: str = "evaluation_dashboard.png",
show_plot: bool = False,
) -> str:
"""Generate a comprehensive dashboard visualization of evaluation metrics.
This creates a 3x2 grid of plots showing:
1. Cluster size distribution (log-log scale)
2. Silhouette score comparison
3. Similarity metrics comparison
4. Power-law fit visualization
5. Outlier distribution
6. Number of clusters comparison
Parameters
----------
reports : dict[str, dict[str, Any]]
Dictionary mapping model names to their evaluation reports.
output_dir : str
Directory to save the visualization.
filename : str
Name of the output file.
Default is ``evaluation_dashboard.png``
show_plot : bool
Whether to display the plot interactively.
Default is ``False``.
Returns
-------
str
Path to the saved visualization file.
"""
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
# Create figure with 3x2 grid
fig, axes = plt.subplots(3, 2, figsize=(15, 18))
# Plot layout visualization (3x2 grid):
#
# +-------------------------+-------------------------+
# | | |
# | Cluster Size | Number of |
# | Distribution | Clusters |
# | [0,0] | [0,1] |
# | | |
# +-------------------------+-------------------------+
# | | |
# | Similarity | Power-law |
# | Metrics | Fit |
# | [1,0] | [1,1] |
# | | |
# +-------------------------+-------------------------+
# | | |
# | Outlier | Silhouette |
# | Distribution | Scores |
# | [2,0] | [2,1] |
# | | |
# +-------------------------+-------------------------+
#
plot_cluster_size_distribution(reports, axes[0, 0])
plot_cluster_counts(reports, axes[0, 1])
plot_similarity_metrics(reports, axes[1, 0])
plot_powerlaw_fit(reports, axes[1, 1])
plot_outliers(reports, axes[2, 0])
plot_silhouette_scores(reports, axes[2, 1])
plt.tight_layout()
if is_small_dataset(reports, MIN_DATASET_SIZE):
plt.subplots_adjust(bottom=0.08)
warning_text = (
f"Small dataset (fewer than {MIN_DATASET_SIZE} data points).\n"
"Some visualizations may not be available or may not accurately "
"represent the data patterns."
)
fig.text(0.055, 0.035, warning_text, ha="left", va="bottom", fontsize=11)
plt.savefig(output_path, dpi=300, bbox_inches="tight")
logger.debug("Evaluation dashboard saved to %s", output_path)
if show_plot:
plt.show()
plt.close(fig)
return output_path