Visualization
Visualization Functions¶
In order to use functions in this module, import visualize as follows:
import ludwig
from ludwig import visualize
ludwig.visualize.learning_curves
¶
learning_curves(train_stats_per_model: list[dict], output_feature_name: str | None = None, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', callbacks: list[Callback] | None = None, **kwargs) -> None
Show how model metrics change over training and validation data epochs.
For each model and for each output feature and metric of the model, it produces a line plot showing how that metric changed over the course of the epochs of training on the training and validation sets.
| PARAMETER | DESCRIPTION |
|---|---|
train_stats_per_model
|
List containing dictionary of training statistics per model.
TYPE:
|
output_feature_name
|
Name of the output feature to use for the visualization. If None, use all output features.
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots — 'pdf' or 'png'.
TYPE:
|
callbacks
|
A list of
TYPE:
|
Source code in ludwig/visualize/training.py
@DeveloperAPI
def learning_curves(
train_stats_per_model: "list[dict]",
output_feature_name: "str | None" = None,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
callbacks: "list[Callback] | None" = None,
**kwargs,
) -> None:
"""Show how model metrics change over training and validation data epochs.
For each model and for each output feature and metric of the model,
it produces a line plot showing how that metric changed over the course
of the epochs of training on the training and validation sets.
Args:
train_stats_per_model: List containing dictionary of training statistics per model.
output_feature_name: Name of the output feature to use for the visualization.
If None, use all output features.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots will
be displayed in a window.
file_format: File format of output plots — 'pdf' or 'png'.
callbacks: A list of `ludwig.callbacks.Callback` objects that provide hooks
into the Ludwig pipeline.
"""
filename_template = "learning_curves_{}_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
train_stats_per_model_list = convert_to_list(train_stats_per_model)
model_names_list = convert_to_list(model_names)
output_feature_names = _validate_output_feature_name_from_train_stats(
output_feature_name, train_stats_per_model_list
)
metrics = [LOSS, ACCURACY, HITS_AT_K, EDIT_DISTANCE]
for output_feature_name in output_feature_names:
for metric in metrics:
if metric in train_stats_per_model_list[0].training[output_feature_name]:
filename = None
if filename_template_path:
filename = filename_template_path.format(output_feature_name, metric)
training_stats = [
learning_stats.training[output_feature_name][metric]
for learning_stats in train_stats_per_model_list
]
validation_stats = []
for learning_stats in train_stats_per_model_list:
if learning_stats.validation and output_feature_name in learning_stats.validation:
validation_stats.append(learning_stats.validation[output_feature_name][metric])
else:
validation_stats.append(None)
evaluation_frequency = train_stats_per_model_list[0].evaluation_frequency
visualization_utils.learning_curves_plot(
training_stats,
validation_stats,
metric,
x_label=evaluation_frequency.period,
x_step=evaluation_frequency.frequency,
algorithm_names=model_names_list,
title=f"Learning Curves {output_feature_name}",
filename=filename,
callbacks=callbacks,
)
ludwig.visualize.compare_performance
¶
compare_performance(test_stats_per_model: list[dict], output_feature_name: str | None = None, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', **kwargs) -> None
Produces model comparison barplot visualization for each overall metric.
For each model (in the aligned lists of test_statistics and model_names) it produces bars in a bar plot, one for each overall metric available in the test_statistics file for the specified output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
test_stats_per_model
|
Dictionary containing evaluation performance statistics.
TYPE:
|
output_feature_name
|
Name of the output feature to use for the visualization.
If
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
Example usage:
model_a = LudwigModel(config)
model_a.train(dataset)
a_evaluation_stats, _, _ = model_a.evaluate(eval_set)
model_b = LudwigModel.load("path/to/model/")
b_evaluation_stats, _, _ = model_b.evaluate(eval_set)
compare_performance([a_evaluation_stats, b_evaluation_stats], model_names=["A", "B"])
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def compare_performance(
test_stats_per_model: "list[dict]",
output_feature_name: "str | None" = None,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
**kwargs,
) -> None:
"""Produces model comparison barplot visualization for each overall metric.
For each model (in the aligned lists of test_statistics and model_names)
it produces bars in a bar plot, one for each overall metric available
in the test_statistics file for the specified output_feature_name.
Args:
test_stats_per_model: Dictionary containing evaluation performance statistics.
output_feature_name: Name of the output feature to use for the visualization.
If `None`, use all output features.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
Example usage:
```python
model_a = LudwigModel(config)
model_a.train(dataset)
a_evaluation_stats, _, _ = model_a.evaluate(eval_set)
model_b = LudwigModel.load("path/to/model/")
b_evaluation_stats, _, _ = model_b.evaluate(eval_set)
compare_performance([a_evaluation_stats, b_evaluation_stats], model_names=["A", "B"])
```
"""
ignore_names = {
"overall_stats",
"confusion_matrix",
"per_class_stats",
"predictions",
"probabilities",
"roc_curve",
"precision_recall_curve",
LOSS,
}
filename_template = "compare_performance_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
test_stats_per_model_list = convert_to_list(test_stats_per_model)
model_names_list = convert_to_list(model_names)
output_feature_names = _validate_output_feature_name_from_test_stats(output_feature_name, test_stats_per_model_list)
for output_feature_name in output_feature_names:
metric_names_sets = [set(tspr[output_feature_name].keys()) for tspr in test_stats_per_model_list]
metric_names = metric_names_sets[0]
for metric_names_set in metric_names_sets:
metric_names = metric_names.intersection(metric_names_set)
metric_names = metric_names - ignore_names
metrics_dict = {name: [] for name in metric_names}
for test_stats_per_model in test_stats_per_model_list:
for metric_name in metric_names:
metrics_dict[metric_name].append(test_stats_per_model[output_feature_name][metric_name])
# are there any metrics to compare?
if metrics_dict:
metrics = []
metrics_names = []
min_val = float("inf")
max_val = float("-inf")
for metric_name, metric_vals in metrics_dict.items():
if len(metric_vals) > 0:
metrics.append(metric_vals)
metrics_names.append(metric_name)
curr_min = min(metric_vals)
if curr_min < min_val:
min_val = curr_min
curr_max = max(metric_vals)
if curr_max > max_val:
max_val = curr_max
filename = None
if filename_template_path:
filename = filename_template_path.format(output_feature_name)
os.makedirs(output_directory, exist_ok=True)
visualization_utils.compare_classifiers_plot(
metrics,
metrics_names,
model_names_list,
adaptive=min_val < 0 or max_val > 1,
title=f"Performance comparison on {output_feature_name}",
filename=filename,
)
ludwig.visualize.compare_classifiers_performance_from_prob
¶
compare_classifiers_performance_from_prob(probabilities_per_model: list[ndarray], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, labels_limit: int = 0, top_n_classes: list[int] | int = 3, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Produces model comparison barplot visualization from probabilities.
For each model it produces bars in a bar plot, one for each overall metric
computed on the fly from the probabilities of predictions for the specified
model_names.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
Path to experiment probabilities file.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
top_n_classes
|
List containing the number of classes to plot.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def compare_classifiers_performance_from_prob(
probabilities_per_model: "list[np.ndarray]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
labels_limit: int = 0,
top_n_classes: "list[int] | int" = 3,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Produces model comparison barplot visualization from probabilities.
For each model it produces bars in a bar plot, one for each overall metric
computed on the fly from the probabilities of predictions for the specified
`model_names`.
Args:
probabilities_per_model: Path to experiment probabilities file.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
top_n_classes: List containing the number of classes to plot.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
top_n_classes_list = convert_to_list(top_n_classes)
k = top_n_classes_list[0]
model_names_list = convert_to_list(model_names)
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
probs = probabilities_per_model
accuracies = []
hits_at_ks = []
mrrs = []
for _i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
prob = prob_limit
prob = np.argsort(prob, axis=1)
top1 = prob[:, -1]
topk = prob[:, -k:]
accuracies.append((ground_truth == top1).sum() / len(ground_truth))
hits_at_k = 0
for j in range(len(ground_truth)):
hits_at_k += np.isin(ground_truth[j], topk[j])
hits_at_ks.append(hits_at_k.item() / len(ground_truth))
mrr = 0
for j in range(len(ground_truth)):
ground_truth_pos_in_probs = prob[j] == ground_truth[j]
if np.any(ground_truth_pos_in_probs):
mrr += 1 / -(np.argwhere(ground_truth_pos_in_probs).item() - prob.shape[1])
mrrs.append(mrr / len(ground_truth))
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "compare_classifiers_performance_from_prob." + file_format)
visualization_utils.compare_classifiers_plot(
[accuracies, hits_at_ks, mrrs], [ACCURACY, HITS_AT_K, "mrr"], model_names_list, filename=filename
)
ludwig.visualize.compare_classifiers_performance_from_pred
¶
compare_classifiers_performance_from_pred(predictions_per_model: list[ndarray], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, labels_limit: int, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Produces model comparison barplot visualization from predictions.
For each model it produces bars in a bar plot, one for each overall metric
computed on the fly from the predictions for the specified
model_names.
| PARAMETER | DESCRIPTION |
|---|---|
predictions_per_model
|
Path to experiment predictions file.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Name of the output feature to visualize.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def compare_classifiers_performance_from_pred(
predictions_per_model: "list[np.ndarray]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
labels_limit: int,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Produces model comparison barplot visualization from predictions.
For each model it produces bars in a bar plot, one for each overall metric
computed on the fly from the predictions for the specified
`model_names`.
Args:
predictions_per_model: Path to experiment predictions file.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Name of the output feature to visualize.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
predictions_per_model = [np.ndarray.flatten(np.array(pred)) for pred in predictions_per_model]
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
preds = predictions_per_model
model_names_list = convert_to_list(model_names)
mapped_preds = []
try:
for pred in preds:
mapped_preds.append([metadata[output_feature_name]["str2idx"][val] for val in pred])
preds = mapped_preds
# If predictions are coming from npy file there is no need to convert to
# numeric labels using metadata
except (TypeError, KeyError):
pass
accuracies = []
precisions = []
recalls = []
f1s = []
for _i, pred in enumerate(preds):
accuracies.append(sklearn.metrics.accuracy_score(ground_truth, pred))
precisions.append(sklearn.metrics.precision_score(ground_truth, pred, average="macro"))
recalls.append(sklearn.metrics.recall_score(ground_truth, pred, average="macro"))
f1s.append(sklearn.metrics.f1_score(ground_truth, pred, average="macro"))
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "compare_classifiers_performance_from_pred." + file_format)
visualization_utils.compare_classifiers_plot(
[accuracies, precisions, recalls, f1s],
[ACCURACY, "precision", "recall", "f1"],
model_names_list,
filename=filename,
)
ludwig.visualize.compare_classifiers_performance_subset
¶
compare_classifiers_performance_subset(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, top_n_classes: list[int], labels_limit: int, subset: str, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Produces model comparison barplot visualization from train subset.
For each model it produces bars in a bar plot, one for each overall metric
computed on the fly from the probabilities predictions for the
specified model_names, considering only a subset of the full training set.
The way the subset is obtained is using the top_n_classes and
subset parameters.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
top_n_classes
|
List containing the number of classes to plot.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
subset
|
String specifying type of subset filtering. Valid values are
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def compare_classifiers_performance_subset(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
top_n_classes: "list[int]",
labels_limit: int,
subset: str,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Produces model comparison barplot visualization from train subset.
For each model it produces bars in a bar plot, one for each overall metric
computed on the fly from the probabilities predictions for the
specified `model_names`, considering only a subset of the full training set.
The way the subset is obtained is using the `top_n_classes` and
`subset` parameters.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
top_n_classes: List containing the number of classes to plot.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
subset: String specifying type of subset filtering. Valid values are
`ground_truth` or `predictions`.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
top_n_classes_list = convert_to_list(top_n_classes)
k = top_n_classes_list[0]
model_names_list = convert_to_list(model_names)
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
subset_indices = ground_truth > 0
gt_subset = ground_truth
if subset == "ground_truth":
subset_indices = ground_truth < k
gt_subset = ground_truth[subset_indices]
logger.info(f"Subset is {len(gt_subset) / len(ground_truth) * 100:.2f}% of the data")
probs = probabilities_per_model
accuracies = []
hits_at_ks = []
for i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
prob = prob_limit
if subset == PREDICTIONS:
subset_indices = np.argmax(prob, axis=1) < k
gt_subset = ground_truth[subset_indices]
logger.info(
f"Subset for model_name {model_names[i] if model_names and i < len(model_names) else i} is {len(gt_subset) / len(ground_truth) * 100:.2f}% of the data"
)
model_names[i] = (
f"{model_names[i] if model_names and i < len(model_names) else i} ({len(gt_subset) / len(ground_truth) * 100:.2f}%)"
)
prob_subset = prob[subset_indices]
prob_subset = np.argsort(prob_subset, axis=1)
top1_subset = prob_subset[:, -1]
top3_subset = prob_subset[:, -3:]
accuracies.append(np.sum(gt_subset == top1_subset) / len(gt_subset))
hits_at_k = 0
for j in range(len(gt_subset)):
hits_at_k += np.isin(gt_subset[j], top3_subset[i, :])
hits_at_ks.append(hits_at_k.item() / len(gt_subset))
title = None
if subset == "ground_truth":
title = "Classifier performance on first {} class{} ({:.2f}%)".format(
k, "es" if k > 1 else "", len(gt_subset) / len(ground_truth) * 100
)
elif subset == PREDICTIONS:
title = "Classifier performance on first {} class{}".format(k, "es" if k > 1 else "")
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "compare_classifiers_performance_subset." + file_format)
visualization_utils.compare_classifiers_plot(
[accuracies, hits_at_ks], [ACCURACY, HITS_AT_K], model_names_list, title=title, filename=filename
)
ludwig.visualize.compare_classifiers_performance_changing_k
¶
compare_classifiers_performance_changing_k(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, top_k: int, labels_limit: int, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Produce lineplot that show Hits@K metric while k goes from 1 to top_k.
For each model it produces a line plot that shows the Hits@K metric
(that counts a prediction as correct if the model produces it among the
first k) while changing k from 1 to top_k for the specified
output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
top_k
|
Number of elements in the ranklist to consider.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def compare_classifiers_performance_changing_k(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
top_k: int,
labels_limit: int,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Produce lineplot that show Hits@K metric while k goes from 1 to `top_k`.
For each model it produces a line plot that shows the Hits@K metric
(that counts a prediction as correct if the model produces it among the
first k) while changing k from 1 to top_k for the specified
`output_feature_name`.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
top_k: Number of elements in the ranklist to consider.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
k = top_k
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
probs = probabilities_per_model
hits_at_ks = []
model_names_list = convert_to_list(model_names)
for _i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
prob = prob_limit
prob = np.argsort(prob, axis=1)
hits_at_k = [0.0] * k
for g in range(len(ground_truth)):
for j in range(k):
hits_at_k[j] += np.isin(ground_truth[g], prob[g, -j - 1 :])
hits_at_ks.append(np.array(hits_at_k) / len(ground_truth))
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "compare_classifiers_performance_changing_k." + file_format)
visualization_utils.compare_classifiers_line_plot(
np.arange(1, k + 1),
hits_at_ks,
"hits@k",
model_names_list,
title="Classifier comparison (hits@k)",
filename=filename,
)
ludwig.visualize.compare_classifiers_multiclass_multimetric
¶
compare_classifiers_multiclass_multimetric(test_stats_per_model: list[dict], metadata: dict, output_feature_name: str, top_n_classes: list[int], model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', **kwargs) -> None
Show the precision, recall and F1 of the model for the specified output_feature_name.
For each model it produces four plots that show the precision, recall and F1 of the model on several classes for the specified output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
test_stats_per_model
|
List containing dictionary of evaluation performance statistics.
TYPE:
|
metadata
|
Intermediate preprocess structure created during training containing the mappings of the input dataset.
TYPE:
|
output_feature_name
|
Name of the output feature to use for the visualization.
If
TYPE:
|
top_n_classes
|
List containing the number of classes to plot.
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def compare_classifiers_multiclass_multimetric(
test_stats_per_model: "list[dict]",
metadata: dict,
output_feature_name: str,
top_n_classes: "list[int]",
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
**kwargs,
) -> None:
"""Show the precision, recall and F1 of the model for the specified output_feature_name.
For each model it produces four plots that show the precision,
recall and F1 of the model on several classes for the specified output_feature_name.
Args:
test_stats_per_model: List containing dictionary of evaluation performance statistics.
metadata: Intermediate preprocess structure created during training containing
the mappings of the input dataset.
output_feature_name: Name of the output feature to use for the visualization.
If `None`, use all output features.
top_n_classes: List containing the number of classes to plot.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
"""
filename_template = "compare_classifiers_multiclass_multimetric_{}_{}_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
test_stats_per_model_list = convert_to_list(test_stats_per_model)
model_names_list = convert_to_list(model_names)
output_feature_names = _validate_output_feature_name_from_test_stats(output_feature_name, test_stats_per_model_list)
for i, test_statistics in enumerate(test_stats_per_model_list):
for output_feature_name in output_feature_names:
model_name_name = model_names_list[i] if model_names_list is not None and i < len(model_names_list) else ""
if "per_class_stats" not in test_statistics[output_feature_name]:
logger.warning(
f"The output_feature_name {output_feature_name} in test statistics does not contain "
+ "per_class_stats, skipping it."
)
break
per_class_stats = test_statistics[output_feature_name]["per_class_stats"]
precisions = []
recalls = []
f1_scores = []
labels = []
for _, class_name in sorted(
((metadata[output_feature_name]["str2idx"][key], key) for key in per_class_stats),
key=lambda tup: tup[0],
):
class_stats = per_class_stats[class_name]
precisions.append(class_stats["precision"])
recalls.append(class_stats["recall"])
f1_scores.append(class_stats["f1_score"])
labels.append(class_name)
for k in top_n_classes:
k = min(k, len(precisions)) if k > 0 else len(precisions)
ps = precisions[0:k]
rs = recalls[0:k]
fs = f1_scores[0:k]
ls = labels[0:k]
filename = None
if filename_template_path:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format(model_name_name, output_feature_name, f"top{k}")
visualization_utils.compare_classifiers_multiclass_multimetric_plot(
[ps, rs, fs],
["precision", "recall", "f1 score"],
labels=ls,
title=f"{model_name_name} Multiclass Precision / Recall / F1 Score top {k} {output_feature_name}",
filename=filename,
)
p_np = np.nan_to_num(np.array(precisions, dtype=np.float32))
r_np = np.nan_to_num(np.array(recalls, dtype=np.float32))
f1_np = np.nan_to_num(np.array(f1_scores, dtype=np.float32))
labels_np = np.nan_to_num(np.array(labels))
sorted_indices = f1_np.argsort()
higher_f1s = sorted_indices[-k:][::-1]
filename = None
if filename_template_path:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format(model_name_name, output_feature_name, f"best{k}")
visualization_utils.compare_classifiers_multiclass_multimetric_plot(
[p_np[higher_f1s], r_np[higher_f1s], f1_np[higher_f1s]],
["precision", "recall", "f1 score"],
labels=labels_np[higher_f1s].tolist(),
title=f"{model_name_name} Multiclass Precision / Recall / "
f"F1 Score best {k} classes {output_feature_name}",
filename=filename,
)
lower_f1s = sorted_indices[:k]
filename = None
if filename_template_path:
filename = filename_template_path.format(model_name_name, output_feature_name, f"worst{k}")
visualization_utils.compare_classifiers_multiclass_multimetric_plot(
[p_np[lower_f1s], r_np[lower_f1s], f1_np[lower_f1s]],
["precision", "recall", "f1 score"],
labels=labels_np[lower_f1s].tolist(),
title=(
f"{model_name_name} Multiclass Precision / Recall / F1 Score worst "
+ f"{k} classes {output_feature_name}"
),
filename=filename,
)
filename = None
if filename_template_path:
filename = filename_template_path.format(model_name_name, output_feature_name, "sorted")
visualization_utils.compare_classifiers_multiclass_multimetric_plot(
[p_np[sorted_indices[::-1]], r_np[sorted_indices[::-1]], f1_np[sorted_indices[::-1]]],
["precision", "recall", "f1 score"],
labels=labels_np[sorted_indices[::-1]].tolist(),
title=f"{model_name_name} Multiclass Precision / Recall / F1 Score {output_feature_name} sorted",
filename=filename,
)
logger.info("\n")
logger.info(model_name_name)
tmp_str = f"{output_feature_name} best 5 classes: "
tmp_str += "{}"
logger.info(tmp_str.format(higher_f1s))
logger.info(f1_np[higher_f1s])
tmp_str = f"{output_feature_name} worst 5 classes: "
tmp_str += "{}"
logger.info(tmp_str.format(lower_f1s))
logger.info(f1_np[lower_f1s])
tmp_str = f"{output_feature_name} number of classes with f1 score > 0: "
tmp_str += "{}"
logger.info(tmp_str.format(np.sum(f1_np > 0)))
tmp_str = f"{output_feature_name} number of classes with f1 score = 0: "
tmp_str += "{}"
logger.info(tmp_str.format(np.sum(f1_np == 0)))
ludwig.visualize.compare_classifiers_predictions
¶
compare_classifiers_predictions(predictions_per_model: list[list], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, labels_limit: int, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show two models comparison of their output_feature_name predictions.
| PARAMETER | DESCRIPTION |
|---|---|
predictions_per_model
|
List containing the model predictions for the specified output_feature_name.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def compare_classifiers_predictions(
predictions_per_model: "list[list]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
labels_limit: int,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show two models comparison of their output_feature_name predictions.
Args:
predictions_per_model: List containing the model predictions for the
specified output_feature_name.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
model_names_list = convert_to_list(model_names)
name_c1 = model_names_list[0] if model_names is not None and len(model_names) > 0 else "c1"
name_c2 = model_names_list[1] if model_names is not None and len(model_names) > 1 else "c2"
pred_c1 = predictions_per_model[0]
pred_c2 = predictions_per_model[1]
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
pred_c1[pred_c1 > labels_limit] = labels_limit
pred_c2[pred_c2 > labels_limit] = labels_limit
# TODO all shadows built in name - come up with a more descriptive name
all = len(ground_truth)
if all == 0:
logger.error("No labels in the ground truth")
return
both_right = 0
both_wrong_same = 0
both_wrong_different = 0
c1_right_c2_wrong = 0
c1_wrong_c2_right = 0
for i in range(all):
if ground_truth[i] == pred_c1[i] and ground_truth[i] == pred_c2[i]:
both_right += 1
elif ground_truth[i] != pred_c1[i] and ground_truth[i] != pred_c2[i]:
if pred_c1[i] == pred_c2[i]:
both_wrong_same += 1
else:
both_wrong_different += 1
elif ground_truth[i] == pred_c1[i] and ground_truth[i] != pred_c2[i]:
c1_right_c2_wrong += 1
elif ground_truth[i] != pred_c1[i] and ground_truth[i] == pred_c2[i]:
c1_wrong_c2_right += 1
one_right = c1_right_c2_wrong + c1_wrong_c2_right
both_wrong = both_wrong_same + both_wrong_different
logger.info(f"Test datapoints: {all}")
logger.info(f"Both right: {both_right} {100 * both_right / all:.2f}%")
logger.info(f"One right: {one_right} {100 * one_right / all:.2f}%")
logger.info(
f" {name_c1} right / {name_c2} wrong: {c1_right_c2_wrong} {100 * c1_right_c2_wrong / all:.2f}% {100 * c1_right_c2_wrong / one_right if one_right > 0 else 0:.2f}%"
)
logger.info(
f" {name_c1} wrong / {name_c2} right: {c1_wrong_c2_right} {100 * c1_wrong_c2_right / all:.2f}% {100 * c1_wrong_c2_right / one_right if one_right > 0 else 0:.2f}%"
)
logger.info(f"Both wrong: {both_wrong} {100 * both_wrong / all:.2f}%")
logger.info(
f" same prediction: {both_wrong_same} {100 * both_wrong_same / all:.2f}% {100 * both_wrong_same / both_wrong if both_wrong > 0 else 0:.2f}%"
)
logger.info(
f" different prediction: {both_wrong_different} {100 * both_wrong_different / all:.2f}% {100 * both_wrong_different / both_wrong if both_wrong > 0 else 0:.2f}%"
)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, f"compare_classifiers_predictions_{name_c1}_{name_c2}.{file_format}")
visualization_utils.donut(
[both_right, one_right, both_wrong],
["both right", "one right", "both wrong"],
[both_right, c1_right_c2_wrong, c1_wrong_c2_right, both_wrong_same, both_wrong_different],
[
"both right",
f"{name_c1} right / {name_c2} wrong",
f"{name_c1} wrong / {name_c2} right",
"same prediction",
"different prediction",
],
[0, 1, 1, 2, 2],
title=f"{name_c1} vs {name_c2}",
tight_layout=kwargs.pop("tight_layout", True),
filename=filename,
)
ludwig.visualize.confidence_thresholding_2thresholds_2d
¶
confidence_thresholding_2thresholds_2d(probabilities_per_model: list[array], ground_truths: list[array] | list[Series], metadata, threshold_output_feature_names: list[str], labels_limit: int, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', **kwargs) -> None
Show confidence threshold data vs accuracy for two output feature names.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truths
|
Containing ground truth data.
TYPE:
|
metadata
|
Feature metadata dictionary.
|
threshold_output_feature_names
|
List containing two output feature names for visualization.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
Source code in ludwig/visualize/threshold.py
@DeveloperAPI
def confidence_thresholding_2thresholds_2d(
probabilities_per_model: "list[np.array]",
ground_truths: "list[np.array] | list[pd.Series]",
metadata,
threshold_output_feature_names: "list[str]",
labels_limit: int,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
**kwargs,
) -> None:
"""Show confidence threshold data vs accuracy for two output feature names.
Args:
probabilities_per_model: List of model probabilities.
ground_truths: Containing ground truth data.
metadata: Feature metadata dictionary.
threshold_output_feature_names: List containing two output feature names
for visualization.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
"""
try:
validate_conf_thresholds_and_probabilities_2d_3d(probabilities_per_model, threshold_output_feature_names)
except RuntimeError:
return
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
filename_template = "confidence_thresholding_2thresholds_2d_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
if not isinstance(ground_truths[0], np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[threshold_output_feature_names[0]]
vfunc = np.vectorize(_encode_categorical_feature)
gt_1 = vfunc(ground_truths[0], feature_metadata["str2idx"])
feature_metadata = metadata[threshold_output_feature_names[1]]
gt_2 = vfunc(ground_truths[1], feature_metadata["str2idx"])
else:
gt_1 = ground_truths[0]
gt_2 = ground_truths[1]
if labels_limit > 0:
gt_1[gt_1 > labels_limit] = labels_limit
gt_2[gt_2 > labels_limit] = labels_limit
thresholds = [t / 100 for t in range(0, 101, 5)]
fixed_step_coverage = thresholds
name_t1 = f"{threshold_output_feature_names[0]} threshold"
name_t2 = f"{threshold_output_feature_names[1]} threshold"
accuracies = []
dataset_kept = []
interps = []
table = [[name_t1, name_t2, "coverage", ACCURACY]]
if labels_limit > 0 and probs[0].shape[1] > labels_limit + 1:
prob_limit = probs[0][:, : labels_limit + 1]
prob_limit[:, labels_limit] = probs[0][:, labels_limit:].sum(1)
probs[0] = prob_limit
if labels_limit > 0 and probs[1].shape[1] > labels_limit + 1:
prob_limit = probs[1][:, : labels_limit + 1]
prob_limit[:, labels_limit] = probs[1][:, labels_limit:].sum(1)
probs[1] = prob_limit
max_prob_1 = np.max(probs[0], axis=1)
predictions_1 = np.argmax(probs[0], axis=1)
max_prob_2 = np.max(probs[1], axis=1)
predictions_2 = np.argmax(probs[1], axis=1)
for threshold_1 in thresholds:
threshold_1 = threshold_1 if threshold_1 < 1 else 0.999
curr_accuracies = []
curr_dataset_kept = []
for threshold_2 in thresholds:
threshold_2 = threshold_2 if threshold_2 < 1 else 0.999
filtered_indices = np.logical_and(max_prob_1 >= threshold_1, max_prob_2 >= threshold_2)
filtered_gt_1 = gt_1[filtered_indices]
filtered_predictions_1 = predictions_1[filtered_indices]
filtered_gt_2 = gt_2[filtered_indices]
filtered_predictions_2 = predictions_2[filtered_indices]
coverage = len(filtered_gt_1) / len(gt_1)
accuracy = (
np.logical_and(filtered_gt_1 == filtered_predictions_1, filtered_gt_2 == filtered_predictions_2)
).sum() / len(filtered_gt_1)
curr_accuracies.append(accuracy)
curr_dataset_kept.append(coverage)
table.append([threshold_1, threshold_2, coverage, accuracy])
accuracies.append(curr_accuracies)
dataset_kept.append(curr_dataset_kept)
interps.append(
np.interp(
fixed_step_coverage, list(reversed(curr_dataset_kept)), list(reversed(curr_accuracies)), left=1, right=0
)
)
logger.info("CSV table")
for row in table:
logger.info(",".join([str(e) for e in row]))
# ===========#
# Multiline #
# ===========#
filename = None
if filename_template_path:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format("multiline")
visualization_utils.confidence_filtering_data_vs_acc_multiline_plot(
accuracies, dataset_kept, model_names_list, title="Coverage vs Accuracy, two thresholds", filename=filename
)
# ==========#
# Max line #
# ==========#
filename = None
if filename_template_path:
filename = filename_template_path.format("maxline")
max_accuracies = np.amax(np.array(interps), 0)
visualization_utils.confidence_filtering_data_vs_acc_plot(
[max_accuracies],
[thresholds],
model_names_list,
title="Coverage vs Accuracy, two thresholds",
filename=filename,
)
# ==========================#
# Max line with thresholds #
# ==========================#
acc_matrix = np.array(accuracies)
cov_matrix = np.array(dataset_kept)
t1_maxes = [1]
t2_maxes = [1]
for i in range(len(fixed_step_coverage) - 1):
lower = fixed_step_coverage[i]
upper = fixed_step_coverage[i + 1]
indices = np.logical_and(cov_matrix >= lower, cov_matrix < upper)
selected_acc = acc_matrix.copy()
selected_acc[np.logical_not(indices)] = -1
threshold_indices = np.unravel_index(np.argmax(selected_acc, axis=None), selected_acc.shape)
t1_maxes.append(thresholds[threshold_indices[0]])
t2_maxes.append(thresholds[threshold_indices[1]])
model_name = model_names_list[0] if model_names_list is not None and len(model_names_list) > 0 else ""
filename = None
if filename_template_path:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format("maxline_with_thresholds")
visualization_utils.confidence_filtering_data_vs_acc_plot(
[max_accuracies, t1_maxes, t2_maxes],
[fixed_step_coverage, fixed_step_coverage, fixed_step_coverage],
model_names=[model_name + " accuracy", name_t1, name_t2],
dotted=[False, True, True],
y_label="",
title="Coverage vs Accuracy & Threshold",
filename=filename,
)
ludwig.visualize.confidence_thresholding_2thresholds_3d
¶
confidence_thresholding_2thresholds_3d(probabilities_per_model: list[array], ground_truths: list[array] | list[Series], metadata, threshold_output_feature_names: list[str], labels_limit: int, output_directory: str | None = None, file_format: str = 'pdf', **kwargs) -> None
Show 3d confidence threshold data vs accuracy for two output feature names.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truths
|
Containing ground truth data.
TYPE:
|
metadata
|
Feature metadata dictionary.
|
threshold_output_feature_names
|
List containing two output feature names for visualization.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
Source code in ludwig/visualize/threshold.py
@DeveloperAPI
def confidence_thresholding_2thresholds_3d(
probabilities_per_model: "list[np.array]",
ground_truths: "list[np.array] | list[pd.Series]",
metadata,
threshold_output_feature_names: "list[str]",
labels_limit: int,
output_directory: "str | None" = None,
file_format: str = "pdf",
**kwargs,
) -> None:
"""Show 3d confidence threshold data vs accuracy for two output feature names.
Args:
probabilities_per_model: List of model probabilities.
ground_truths: Containing ground truth data.
metadata: Feature metadata dictionary.
threshold_output_feature_names: List containing two output feature names
for visualization.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
"""
try:
validate_conf_thresholds_and_probabilities_2d_3d(probabilities_per_model, threshold_output_feature_names)
except RuntimeError:
return
probs = probabilities_per_model
if not isinstance(ground_truths[0], np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[threshold_output_feature_names[0]]
vfunc = np.vectorize(_encode_categorical_feature)
gt_1 = vfunc(ground_truths[0], feature_metadata["str2idx"])
feature_metadata = metadata[threshold_output_feature_names[1]]
gt_2 = vfunc(ground_truths[1], feature_metadata["str2idx"])
else:
gt_1 = ground_truths[0]
gt_2 = ground_truths[1]
if labels_limit > 0:
gt_1[gt_1 > labels_limit] = labels_limit
gt_2[gt_2 > labels_limit] = labels_limit
thresholds = [t / 100 for t in range(0, 101, 5)]
accuracies = []
dataset_kept = []
if labels_limit > 0 and probs[0].shape[1] > labels_limit + 1:
prob_limit = probs[0][:, : labels_limit + 1]
prob_limit[:, labels_limit] = probs[0][:, labels_limit:].sum(1)
probs[0] = prob_limit
if labels_limit > 0 and probs[1].shape[1] > labels_limit + 1:
prob_limit = probs[1][:, : labels_limit + 1]
prob_limit[:, labels_limit] = probs[1][:, labels_limit:].sum(1)
probs[1] = prob_limit
max_prob_1 = np.max(probs[0], axis=1)
predictions_1 = np.argmax(probs[0], axis=1)
max_prob_2 = np.max(probs[1], axis=1)
predictions_2 = np.argmax(probs[1], axis=1)
for threshold_1 in thresholds:
threshold_1 = threshold_1 if threshold_1 < 1 else 0.999
curr_accuracies = []
curr_dataset_kept = []
for threshold_2 in thresholds:
threshold_2 = threshold_2 if threshold_2 < 1 else 0.999
filtered_indices = np.logical_and(max_prob_1 >= threshold_1, max_prob_2 >= threshold_2)
filtered_gt_1 = gt_1[filtered_indices]
filtered_predictions_1 = predictions_1[filtered_indices]
filtered_gt_2 = gt_2[filtered_indices]
filtered_predictions_2 = predictions_2[filtered_indices]
accuracy = (
np.logical_and(filtered_gt_1 == filtered_predictions_1, filtered_gt_2 == filtered_predictions_2)
).sum() / len(filtered_gt_1)
curr_accuracies.append(accuracy)
curr_dataset_kept.append(len(filtered_gt_1) / len(gt_1))
accuracies.append(curr_accuracies)
dataset_kept.append(curr_dataset_kept)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "confidence_thresholding_2thresholds_3d." + file_format)
visualization_utils.confidence_filtering_3d_plot(
np.array(thresholds),
np.array(thresholds),
np.array(accuracies),
np.array(dataset_kept),
threshold_output_feature_names,
title="Confidence_Thresholding, two thresholds",
filename=filename,
)
ludwig.visualize.confidence_thresholding
¶
confidence_thresholding(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, labels_limit: int, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show models accuracy and data coverage while increasing treshold.
For each model it produces a pair of lines indicating the accuracy of the model and the data coverage while increasing a threshold (x axis) on the probabilities of predictions for the specified output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/threshold.py
@DeveloperAPI
def confidence_thresholding(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
labels_limit: int,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show models accuracy and data coverage while increasing treshold.
For each model it produces a pair of lines indicating the accuracy of
the model and the data coverage while increasing a threshold (x axis) on
the probabilities of predictions for the specified output_feature_name.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
thresholds = [t / 100 for t in range(0, 101, 5)]
accuracies = []
dataset_kept = []
for _i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
prob = prob_limit
max_prob = np.max(prob, axis=1)
predictions = np.argmax(prob, axis=1)
accuracies_alg = []
dataset_kept_alg = []
for threshold in thresholds:
threshold = threshold if threshold < 1 else 0.999
filtered_indices = max_prob >= threshold
filtered_gt = ground_truth[filtered_indices]
filtered_predictions = predictions[filtered_indices]
accuracy = (filtered_gt == filtered_predictions).sum() / len(filtered_gt)
accuracies_alg.append(accuracy)
dataset_kept_alg.append(len(filtered_gt) / len(ground_truth))
accuracies.append(accuracies_alg)
dataset_kept.append(dataset_kept_alg)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "confidence_thresholding." + file_format)
visualization_utils.confidence_filtering_plot(
thresholds, accuracies, dataset_kept, model_names_list, title="Confidence_Thresholding", filename=filename
)
ludwig.visualize.confidence_thresholding_data_vs_acc
¶
confidence_thresholding_data_vs_acc(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, labels_limit: int, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show models comparison of confidence threshold data vs accuracy.
For each model it produces a line indicating the accuracy of the model and the data coverage while increasing a threshold on the probabilities of predictions for the specified output_feature_name. The difference with confidence_thresholding is that it uses two axes instead of three, not visualizing the threshold and having coverage as x axis instead of the threshold.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/threshold.py
@DeveloperAPI
def confidence_thresholding_data_vs_acc(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
labels_limit: int,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show models comparison of confidence threshold data vs accuracy.
For each model it produces a line indicating the accuracy of the model
and the data coverage while increasing a threshold on the probabilities
of predictions for the specified output_feature_name. The difference with
confidence_thresholding is that it uses two axes instead of three,
not visualizing the threshold and having coverage as x axis instead of
the threshold.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
thresholds = [t / 100 for t in range(0, 101, 5)]
accuracies = []
dataset_kept = []
for _i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
prob = prob_limit
max_prob = np.max(prob, axis=1)
predictions = np.argmax(prob, axis=1)
accuracies_alg = []
dataset_kept_alg = []
for threshold in thresholds:
threshold = threshold if threshold < 1 else 0.999
filtered_indices = max_prob >= threshold
filtered_gt = ground_truth[filtered_indices]
filtered_predictions = predictions[filtered_indices]
accuracy = (filtered_gt == filtered_predictions).sum() / len(filtered_gt)
accuracies_alg.append(accuracy)
dataset_kept_alg.append(len(filtered_gt) / len(ground_truth))
accuracies.append(accuracies_alg)
dataset_kept.append(dataset_kept_alg)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "confidence_thresholding_data_vs_acc." + file_format)
visualization_utils.confidence_filtering_data_vs_acc_plot(
accuracies,
dataset_kept,
model_names_list,
title="Confidence_Thresholding (Data vs Accuracy)",
filename=filename,
)
ludwig.visualize.confidence_thresholding_data_vs_acc_subset
¶
confidence_thresholding_data_vs_acc_subset(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, top_n_classes: list[int], labels_limit: int, subset: str, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show models comparison of confidence threshold data vs accuracy on a subset of data.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
top_n_classes
|
List containing the number of classes to plot.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
subset
|
String specifying type of subset filtering. Valid values are
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/threshold.py
@DeveloperAPI
def confidence_thresholding_data_vs_acc_subset(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
top_n_classes: "list[int]",
labels_limit: int,
subset: str,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show models comparison of confidence threshold data vs accuracy on a subset of data.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
top_n_classes: List containing the number of classes to plot.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
subset: String specifying type of subset filtering. Valid values are
`ground_truth` or `predictions`.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
top_n_classes_list = convert_to_list(top_n_classes)
k = top_n_classes_list[0]
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
thresholds = [t / 100 for t in range(0, 101, 5)]
accuracies = []
dataset_kept = []
subset_indices = ground_truth > 0
gt_subset = ground_truth
if subset == "ground_truth":
subset_indices = ground_truth < k
gt_subset = ground_truth[subset_indices]
logger.info(f"Subset is {len(gt_subset) / len(ground_truth) * 100:.2f}% of the data")
for i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
prob = prob_limit
if subset == PREDICTIONS:
subset_indices = np.argmax(prob, axis=1) < k
gt_subset = ground_truth[subset_indices]
logger.info(
f"Subset for model_name {model_names[i] if model_names and i < len(model_names) else i} is {len(gt_subset) / len(ground_truth) * 100:.2f}% of the data"
)
prob_subset = prob[subset_indices]
max_prob = np.max(prob_subset, axis=1)
predictions = np.argmax(prob_subset, axis=1)
accuracies_alg = []
dataset_kept_alg = []
for threshold in thresholds:
threshold = threshold if threshold < 1 else 0.999
filtered_indices = max_prob >= threshold
filtered_gt = gt_subset[filtered_indices]
filtered_predictions = predictions[filtered_indices]
accuracy = (filtered_gt == filtered_predictions).sum() / len(filtered_gt)
accuracies_alg.append(accuracy)
dataset_kept_alg.append(len(filtered_gt) / len(ground_truth))
accuracies.append(accuracies_alg)
dataset_kept.append(dataset_kept_alg)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "confidence_thresholding_data_vs_acc_subset." + file_format)
visualization_utils.confidence_filtering_data_vs_acc_plot(
accuracies,
dataset_kept,
model_names_list,
title="Confidence_Thresholding (Data vs Accuracy)",
filename=filename,
)
ludwig.visualize.binary_threshold_vs_metric
¶
binary_threshold_vs_metric(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, metrics: list[str], positive_label: int = 1, model_names: list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show confidence of the model against metric for the specified output_feature_name.
For each metric specified in metrics (options are f1, precision, recall,
accuracy), this visualization produces a line chart plotting a threshold
on the confidence of the model against the metric for the specified
output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
metrics
|
Metrics to display (
TYPE:
|
positive_label
|
Numeric encoded value for the positive class.
TYPE:
|
model_names
|
List of the names of the models to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/threshold.py
@DeveloperAPI
def binary_threshold_vs_metric(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
metrics: "list[str]",
positive_label: int = 1,
model_names: "list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show confidence of the model against metric for the specified output_feature_name.
For each metric specified in metrics (options are `f1`, `precision`, `recall`,
`accuracy`), this visualization produces a line chart plotting a threshold
on the confidence of the model against the metric for the specified
output_feature_name.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
metrics: Metrics to display (`'f1'`, `'precision'`, `'recall'`, `'accuracy'`).
positive_label: Numeric encoded value for the positive class.
model_names: List of the names of the models to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth, positive_label = _convert_ground_truth(
ground_truth, feature_metadata, ground_truth_apply_idx, positive_label
)
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
metrics_list = convert_to_list(metrics)
filename_template = "binary_threshold_vs_metric_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
thresholds = [t / 100 for t in range(0, 101, 5)]
supported_metrics = {"f1", "precision", "recall", "accuracy"}
for metric in metrics_list:
if metric not in supported_metrics:
logger.error(f"Metric {metric} not supported")
continue
scores = []
for _i, prob in enumerate(probs):
scores_alg = []
if len(prob.shape) == 2:
if prob.shape[1] > positive_label:
prob = prob[:, positive_label]
else:
raise Exception(
f"the specified positive label {positive_label} is not present in the probabilities"
)
for threshold in thresholds:
threshold = threshold if threshold < 1 else 0.99
predictions = prob >= threshold
if metric == "f1":
metric_score = sklearn.metrics.f1_score(ground_truth, predictions)
elif metric == "precision":
metric_score = sklearn.metrics.precision_score(ground_truth, predictions)
elif metric == "recall":
metric_score = sklearn.metrics.recall_score(ground_truth, predictions)
elif metric == ACCURACY:
metric_score = sklearn.metrics.accuracy_score(ground_truth, predictions)
scores_alg.append(metric_score)
scores.append(scores_alg)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format(metric)
visualization_utils.threshold_vs_metric_plot(
thresholds, scores, model_names_list, title=f"Binary threshold vs {metric}", filename=filename
)
ludwig.visualize.roc_curves
¶
roc_curves(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, positive_label: int = 1, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show the roc curves for output features in the specified models.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
positive_label
|
Numeric encoded value for the positive class.
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/curves.py
@DeveloperAPI
def roc_curves(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
positive_label: int = 1,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show the roc curves for output features in the specified models.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
positive_label: Numeric encoded value for the positive class.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth, positive_label = _convert_ground_truth(
ground_truth, feature_metadata, ground_truth_apply_idx, positive_label
)
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
fpr_tprs = []
for _i, prob in enumerate(probs):
if len(prob.shape) > 1:
prob = prob[:, positive_label]
fpr, tpr, _ = sklearn.metrics.roc_curve(ground_truth, prob, pos_label=positive_label)
fpr_tprs.append((fpr, tpr))
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = os.path.join(output_directory, "roc_curves." + file_format)
visualization_utils.roc_curves(fpr_tprs, model_names_list, title="ROC curves", filename=filename)
ludwig.visualize.roc_curves_from_test_statistics
¶
roc_curves_from_test_statistics(test_stats_per_model: list[dict], output_feature_name: str, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', **kwargs) -> None
Show the roc curves for the specified models output binary output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
test_stats_per_model
|
Dictionary containing evaluation performance statistics.
TYPE:
|
output_feature_name
|
Name of the output feature to use for the visualization.
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
Source code in ludwig/visualize/curves.py
@DeveloperAPI
def roc_curves_from_test_statistics(
test_stats_per_model: "list[dict]",
output_feature_name: str,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
**kwargs,
) -> None:
"""Show the roc curves for the specified models output binary `output_feature_name`.
Args:
test_stats_per_model: Dictionary containing evaluation performance statistics.
output_feature_name: Name of the output feature to use for the visualization.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
"""
model_names_list = convert_to_list(model_names)
filename_template = "roc_curves_from_prediction_statistics." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
fpr_tprs = []
for curr_test_statistics in test_stats_per_model:
fpr = curr_test_statistics[output_feature_name]["roc_curve"]["false_positive_rate"]
tpr = curr_test_statistics[output_feature_name]["roc_curve"]["true_positive_rate"]
fpr_tprs.append((fpr, tpr))
visualization_utils.roc_curves(fpr_tprs, model_names_list, title="ROC curves", filename=filename_template_path)
ludwig.visualize.calibration_1_vs_all
¶
calibration_1_vs_all(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, top_n_classes: list[int], labels_limit: int, model_names: list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show models probability of predictions for the specified output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
top_n_classes
|
List containing the number of classes to plot.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
List of the names of the models to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/curves.py
@DeveloperAPI
def calibration_1_vs_all(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
top_n_classes: "list[int]",
labels_limit: int,
model_names: "list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show models probability of predictions for the specified output_feature_name.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
top_n_classes: List containing the number of classes to plot.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: List of the names of the models to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
feature_metadata = metadata[output_feature_name]
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
filename_template = "calibration_1_vs_all_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
for i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
probs[i] = prob_limit
num_classes = len(metadata[output_feature_name]["str2idx"])
brier_scores = []
classes = min(num_classes, top_n_classes[0]) if top_n_classes[0] > 0 else num_classes
class_names = [feature_metadata["idx2str"][i] for i in range(classes)]
for class_idx in range(classes):
fraction_positives_class = []
mean_predicted_vals_class = []
probs_class = []
brier_scores_class = []
for prob in probs:
gt_class = (ground_truth == class_idx).astype(int)
prob_class = prob[:, class_idx]
curr_fraction_positives, curr_mean_predicted_vals = calibration_curve(gt_class, prob_class, n_bins=21)
if len(curr_fraction_positives) < 2:
curr_fraction_positives = np.concatenate((np.array([0.0]), curr_fraction_positives))
if len(curr_mean_predicted_vals) < 2:
curr_mean_predicted_vals = np.concatenate((np.array([0.0]), curr_mean_predicted_vals))
fraction_positives_class.append(curr_fraction_positives)
mean_predicted_vals_class.append(curr_mean_predicted_vals)
probs_class.append(prob[:, class_idx])
brier_scores_class.append(brier_score_loss(gt_class, prob_class, pos_label=1))
brier_scores.append(brier_scores_class)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format(class_idx)
visualization_utils.calibration_plot(
fraction_positives_class,
mean_predicted_vals_class,
model_names_list,
class_name=class_names[class_idx],
filename=filename,
)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format("prediction_distribution_" + str(class_idx))
visualization_utils.predictions_distribution_plot(probs_class, model_names_list, filename=filename)
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format("brier")
visualization_utils.brier_plot(
np.array(brier_scores),
algorithm_names=model_names_list,
class_names=class_names,
title="Brier scores for each class",
filename=filename,
)
ludwig.visualize.calibration_multiclass
¶
calibration_multiclass(probabilities_per_model: list[array], ground_truth: Series | ndarray, metadata: dict, output_feature_name: str, labels_limit: int, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', ground_truth_apply_idx: bool = True, **kwargs) -> None
Show models probability of predictions for each class of the specified output_feature_name.
| PARAMETER | DESCRIPTION |
|---|---|
probabilities_per_model
|
List of model probabilities.
TYPE:
|
ground_truth
|
Ground truth values.
TYPE:
|
metadata
|
Feature metadata dictionary.
TYPE:
|
output_feature_name
|
Output feature name.
TYPE:
|
labels_limit
|
Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than
TYPE:
|
model_names
|
List of the names of the models to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
ground_truth_apply_idx
|
Whether to use metadata['str2idx'] in np.vectorize.
TYPE:
|
Source code in ludwig/visualize/curves.py
@DeveloperAPI
def calibration_multiclass(
probabilities_per_model: "list[np.array]",
ground_truth: "pd.Series | np.ndarray",
metadata: dict,
output_feature_name: str,
labels_limit: int,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
ground_truth_apply_idx: bool = True,
**kwargs,
) -> None:
"""Show models probability of predictions for each class of the specified output_feature_name.
Args:
probabilities_per_model: List of model probabilities.
ground_truth: Ground truth values.
metadata: Feature metadata dictionary.
output_feature_name: Output feature name.
labels_limit: Upper limit on the numeric encoded label value. Encoded
numeric label values in dataset that are higher than `labels_limit`
are considered to be "rare" labels.
model_names: List of the names of the models to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
ground_truth_apply_idx: Whether to use metadata['str2idx'] in np.vectorize.
"""
if not isinstance(ground_truth, np.ndarray):
# not np array, assume we need to translate raw value to encoded value
feature_metadata = metadata[output_feature_name]
ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx)
probs = probabilities_per_model
model_names_list = convert_to_list(model_names)
filename_template = "calibration_multiclass{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
if labels_limit > 0:
ground_truth[ground_truth > labels_limit] = labels_limit
prob_classes = 0
for i, prob in enumerate(probs):
if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
prob_limit = prob[:, : labels_limit + 1]
prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
probs[i] = prob_limit
if probs[i].shape[1] > prob_classes:
prob_classes = probs[i].shape[1]
gt_one_hot_dim_2 = max(prob_classes, max(ground_truth) + 1)
gt_one_hot = np.zeros((len(ground_truth), gt_one_hot_dim_2))
gt_one_hot[np.arange(len(ground_truth)), ground_truth] = 1
gt_one_hot_flat = gt_one_hot.flatten()
fraction_positives = []
mean_predicted_vals = []
brier_scores = []
for prob in probs:
# flatten probabilities to be compared to flatten ground truth
prob_flat = prob.flatten()
curr_fraction_positives, curr_mean_predicted_vals = calibration_curve(gt_one_hot_flat, prob_flat, n_bins=21)
fraction_positives.append(curr_fraction_positives)
mean_predicted_vals.append(curr_mean_predicted_vals)
brier_scores.append(brier_score_loss(gt_one_hot_flat, prob_flat, pos_label=1))
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format("")
visualization_utils.calibration_plot(fraction_positives, mean_predicted_vals, model_names_list, filename=filename)
filename = None
if output_directory:
filename = filename_template_path.format("_brier")
visualization_utils.compare_classifiers_plot(
[brier_scores], ["brier"], model_names_list, adaptive=True, decimals=8, filename=filename
)
for i, brier_score in enumerate(brier_scores):
if i < len(model_names_list):
tokenizer_name = f"{model_names_list[i]}: "
tokenizer_name += "{}"
else:
tokenizer_name = "{}"
logger.info(tokenizer_name.format(brier_score))
ludwig.visualize.confusion_matrix
¶
confusion_matrix(test_stats_per_model: list[dict], metadata: dict, output_feature_name: str | None, top_n_classes: list[int], normalize: bool, model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', **kwargs) -> None
Show confusion matrix in the models predictions for each output_feature_name.
For each model (in the aligned lists of test_statistics and model_names)
it produces a heatmap of the confusion matrix in the predictions for
each output_feature_name that has a confusion matrix in test_statistics.
The value of top_n_classes limits the heatmap to the n most frequent
classes.
| PARAMETER | DESCRIPTION |
|---|---|
test_stats_per_model
|
Dictionary containing evaluation performance statistics.
TYPE:
|
metadata
|
Intermediate preprocess structure created during training containing the mappings of the input dataset.
TYPE:
|
output_feature_name
|
Name of the output feature to use for the visualization. If None, use all output features.
TYPE:
|
top_n_classes
|
Number of top classes or list containing the number of top classes to plot.
TYPE:
|
normalize
|
Flag to normalize rows in confusion matrix.
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots — 'pdf' or 'png'.
TYPE:
|
Source code in ludwig/visualize/confusion.py
@DeveloperAPI
def confusion_matrix(
test_stats_per_model: "list[dict]",
metadata: dict,
output_feature_name: "str | None",
top_n_classes: "list[int]",
normalize: bool,
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
**kwargs,
) -> None:
"""Show confusion matrix in the models predictions for each `output_feature_name`.
For each model (in the aligned lists of test_statistics and model_names)
it produces a heatmap of the confusion matrix in the predictions for
each output_feature_name that has a confusion matrix in test_statistics.
The value of `top_n_classes` limits the heatmap to the n most frequent
classes.
Args:
test_stats_per_model: Dictionary containing evaluation performance statistics.
metadata: Intermediate preprocess structure created during training containing
the mappings of the input dataset.
output_feature_name: Name of the output feature to use for the visualization.
If None, use all output features.
top_n_classes: Number of top classes or list containing the number of top
classes to plot.
normalize: Flag to normalize rows in confusion matrix.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots will
be displayed in a window.
file_format: File format of output plots — 'pdf' or 'png'.
"""
test_stats_per_model_list = test_stats_per_model
model_names_list = convert_to_list(model_names)
filename_template = "confusion_matrix_{}_{}_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
output_feature_names = _validate_output_feature_name_from_test_stats(output_feature_name, test_stats_per_model_list)
confusion_matrix_found = False
for i, test_statistics in enumerate(test_stats_per_model_list):
for output_feature_name in output_feature_names:
if "confusion_matrix" in test_statistics[output_feature_name]:
confusion_matrix_found = True
_confusion_matrix = np.array(test_statistics[output_feature_name]["confusion_matrix"])
model_name_name = (
model_names_list[i] if (model_names_list is not None and i < len(model_names_list)) else ""
)
if (
metadata is not None
and output_feature_name in metadata
and ("idx2str" in metadata[output_feature_name] or "bool2str" in metadata[output_feature_name])
):
if "bool2str" in metadata[output_feature_name]: # Handles the binary output case
labels = metadata[output_feature_name]["bool2str"]
else:
labels = metadata[output_feature_name]["idx2str"]
else:
labels = list(range(len(_confusion_matrix)))
for k in top_n_classes:
k = min(k, _confusion_matrix.shape[0]) if k > 0 else _confusion_matrix.shape[0]
cm = _confusion_matrix[:k, :k]
if normalize:
with np.errstate(divide="ignore", invalid="ignore"):
cm_norm = np.true_divide(cm, cm.sum(1)[:, np.newaxis])
cm_norm[cm_norm == np.inf] = 0
cm_norm = np.nan_to_num(cm_norm)
cm = cm_norm
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format(model_name_name, output_feature_name, "top" + str(k))
visualization_utils.confusion_matrix_plot(
cm, labels[:k], output_feature_name=output_feature_name, filename=filename
)
entropies = []
for row in cm:
if np.count_nonzero(row) > 0:
entropies.append(entropy(row))
else:
entropies.append(0)
class_entropy = np.array(entropies)
class_desc_entropy = np.argsort(class_entropy)[::-1]
desc_entropy = class_entropy[class_desc_entropy]
filename = None
if output_directory:
filename = filename_template_path.format(
"entropy_" + model_name_name, output_feature_name, "top" + str(k)
)
visualization_utils.bar_plot(
class_desc_entropy,
desc_entropy,
labels=[labels[i] for i in class_desc_entropy],
title="Classes ranked by entropy of Confusion Matrix row",
filename=filename,
)
if not confusion_matrix_found:
logger.error("Cannot find confusion_matrix in evaluation data")
raise FileNotFoundError("Cannot find confusion_matrix in evaluation data")
ludwig.visualize.frequency_vs_f1
¶
frequency_vs_f1(test_stats_per_model: list[dict], metadata: dict, output_feature_name: str | None, top_n_classes: list[int], model_names: str | list[str] | None = None, output_directory: str | None = None, file_format: str = 'pdf', **kwargs)
Show prediction statistics for the specified output_feature_name for each model.
For each model (in the aligned lists of test_stats_per_model and
model_names), produces two plots statistics of predictions for the
specified output_feature_name.
The first plot is a line plot with one x axis representing the different classes and two vertical axes colored in orange and blue respectively. The orange one is the frequency of the class and an orange line is plotted to show the trend. The blue one is the F1 score for that class and a blue line is plotted to show the trend. The classes on the x axis are sorted by f1 score.
The second plot has the same structure of the first one, but the axes are flipped and the classes on the x axis are sorted by frequency.
| PARAMETER | DESCRIPTION |
|---|---|
test_stats_per_model
|
Dictionary containing evaluation performance statistics.
TYPE:
|
metadata
|
Intermediate preprocess structure created during training containing the mappings of the input dataset.
TYPE:
|
output_feature_name
|
Name of the output feature to use for the visualization.
If
TYPE:
|
top_n_classes
|
Number of top classes or list containing the number of top classes to plot.
TYPE:
|
model_names
|
Model name or list of the model names to use as labels.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots -
TYPE:
|
Source code in ludwig/visualize/performance.py
@DeveloperAPI
def frequency_vs_f1(
test_stats_per_model: "list[dict]",
metadata: dict,
output_feature_name: "str | None",
top_n_classes: "list[int]",
model_names: "str | list[str] | None" = None,
output_directory: "str | None" = None,
file_format: str = "pdf",
**kwargs,
):
"""Show prediction statistics for the specified `output_feature_name` for each model.
For each model (in the aligned lists of `test_stats_per_model` and
`model_names`), produces two plots statistics of predictions for the
specified `output_feature_name`.
The first plot is a line plot with one x axis representing the different
classes and two vertical axes colored in orange and blue respectively.
The orange one is the frequency of the class and an orange line is plotted
to show the trend. The blue one is the F1 score for that class and a blue
line is plotted to show the trend. The classes on the x axis are sorted by
f1 score.
The second plot has the same structure of the first one,
but the axes are flipped and the classes on the x axis are sorted by
frequency.
Args:
test_stats_per_model: Dictionary containing evaluation performance statistics.
metadata: Intermediate preprocess structure created during training containing
the mappings of the input dataset.
output_feature_name: Name of the output feature to use for the visualization.
If `None`, use all output features.
top_n_classes: Number of top classes or list containing the number of top
classes to plot.
model_names: Model name or list of the model names to use as labels.
output_directory: Directory where to save plots. If not specified, plots
will be displayed in a window.
file_format: File format of output plots - `'pdf'` or `'png'`.
"""
test_stats_per_model_list = test_stats_per_model
model_names_list = convert_to_list(model_names)
filename_template = "frequency_vs_f1_{}_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
output_feature_names = _validate_output_feature_name_from_test_stats(output_feature_name, test_stats_per_model_list)
k = top_n_classes[0]
for i, test_stats in enumerate(test_stats_per_model_list):
for of_name in output_feature_names:
# Figure out model name
model_name = model_names_list[i] if model_names_list is not None and i < len(model_names_list) else ""
# setup directory and filename
filename = None
if output_directory:
os.makedirs(output_directory, exist_ok=True)
filename = filename_template_path.format(model_name, of_name)
# setup local variables
per_class_stats = test_stats[of_name]["per_class_stats"]
class_names = metadata[of_name]["idx2str"]
# get np arrays of frequencies, f1s and labels
idx2freq = {metadata[of_name]["str2idx"][key]: val for key, val in metadata[of_name]["str2freq"].items()}
freq_np = np.array([idx2freq[class_id] for class_id in sorted(idx2freq)], dtype=np.int32)
if k > 0:
class_names = class_names[:k]
freq_np = freq_np[:k]
f1_scores = []
labels = []
for class_name in class_names:
class_stats = per_class_stats[class_name]
f1_scores.append(class_stats["f1_score"])
labels.append(class_name)
f1_np = np.nan_to_num(np.array(f1_scores, dtype=np.float32))
labels_np = np.array(labels)
# sort by f1
f1_sort_idcs = f1_np.argsort()[::-1]
len_f1_sort_idcs = len(f1_sort_idcs)
freq_sorted_by_f1 = freq_np[f1_sort_idcs]
freq_sorted_by_f1 = freq_sorted_by_f1[:len_f1_sort_idcs]
f1_sorted_by_f1 = f1_np[f1_sort_idcs]
f1_sorted_by_f1 = f1_sorted_by_f1[:len_f1_sort_idcs]
labels_sorted_by_f1 = labels_np[f1_sort_idcs]
labels_sorted_by_f1 = labels_sorted_by_f1[:len_f1_sort_idcs]
# create viz sorted by f1
visualization_utils.double_axis_line_plot(
f1_sorted_by_f1,
freq_sorted_by_f1,
"F1 score",
"frequency",
labels=labels_sorted_by_f1,
title=f"{model_name} F1 Score vs Frequency {of_name}",
filename=filename,
)
# sort by freq
freq_sort_idcs = freq_np.argsort()[::-1]
len_freq_sort_idcs = len(freq_sort_idcs)
freq_sorted_by_freq = freq_np[freq_sort_idcs]
freq_sorted_by_freq = freq_sorted_by_freq[:len_freq_sort_idcs]
f1_sorted_by_freq = f1_np[freq_sort_idcs]
f1_sorted_by_freq = f1_sorted_by_freq[:len_freq_sort_idcs]
labels_sorted_by_freq = labels_np[freq_sort_idcs]
labels_sorted_by_freq = labels_sorted_by_freq[:len_freq_sort_idcs]
# create viz sorted by freq
visualization_utils.double_axis_line_plot(
freq_sorted_by_freq,
f1_sorted_by_freq,
"frequency",
"F1 score",
labels=labels_sorted_by_freq,
title=f"{model_name} F1 Score vs Frequency {of_name}",
filename=filename,
)
ludwig.visualize.hyperopt_report
¶
hyperopt_report(hyperopt_stats_path: str, output_directory: str | None = None, file_format: str = 'pdf', **kwargs) -> None
Produces a report about hyperparameter optimization creating one graph per hyperparameter to show the distribution of results and one additional graph of pairwise hyperparameters interactions.
| PARAMETER | DESCRIPTION |
|---|---|
hyperopt_stats_path
|
Path to the hyperopt results JSON file.
TYPE:
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
TYPE:
|
file_format
|
File format of output plots — 'pdf' or 'png'.
TYPE:
|
Source code in ludwig/visualize/hyperopt.py
@DeveloperAPI
def hyperopt_report(
hyperopt_stats_path: str, output_directory: "str | None" = None, file_format: str = "pdf", **kwargs
) -> None:
"""Produces a report about hyperparameter optimization creating one graph per hyperparameter to show the
distribution of results and one additional graph of pairwise hyperparameters interactions.
Args:
hyperopt_stats_path: Path to the hyperopt results JSON file.
output_directory: Directory where to save plots. If not specified, plots will be displayed in a window.
file_format: File format of output plots — 'pdf' or 'png'.
"""
filename_template = "hyperopt_{}." + file_format
filename_template_path = generate_filename_template_path(output_directory, filename_template)
hyperopt_stats = load_json(hyperopt_stats_path)
visualization_utils.hyperopt_report(
hyperopt_stats["hyperopt_config"]["parameters"],
hyperopt_results_to_dataframe(
hyperopt_stats["hyperopt_results"],
hyperopt_stats["hyperopt_config"]["parameters"],
hyperopt_stats["hyperopt_config"]["metric"],
),
metric=hyperopt_stats["hyperopt_config"]["metric"],
filename_template=filename_template_path,
)
ludwig.visualize.hyperopt_hiplot
¶
hyperopt_hiplot(hyperopt_stats_path, output_directory=None, **kwargs)
Produces a parallel coordinate plot about hyperparameter optimization creating one HTML file and optionally a CSV file to be read by hiplot.
| PARAMETER | DESCRIPTION |
|---|---|
hyperopt_stats_path
|
Path to the hyperopt results JSON file.
|
output_directory
|
Directory where to save plots. If not specified, plots will be displayed in a window.
DEFAULT:
|
Source code in ludwig/visualize/hyperopt.py
@DeveloperAPI
def hyperopt_hiplot(hyperopt_stats_path, output_directory=None, **kwargs):
"""Produces a parallel coordinate plot about hyperparameter optimization creating one HTML file and optionally
a CSV file to be read by hiplot.
Args:
hyperopt_stats_path: Path to the hyperopt results JSON file.
output_directory: Directory where to save plots. If not specified, plots will be displayed in a window.
"""
filename = "hyperopt_hiplot.html"
filename_path = generate_filename_template_path(output_directory, filename)
hyperopt_stats = load_json(hyperopt_stats_path)
hyperopt_df = hyperopt_results_to_dataframe(
hyperopt_stats["hyperopt_results"],
hyperopt_stats["hyperopt_config"]["parameters"],
hyperopt_stats["hyperopt_config"]["metric"],
)
visualization_utils.hyperopt_hiplot(
hyperopt_df,
filename=filename_path,
)