Skip to content

Commit

Permalink
Add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mwydmuch committed Apr 24, 2024
1 parent 0e0cea7 commit 64e4817
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 12 deletions.
53 changes: 52 additions & 1 deletion tests/test_block_coordinate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from pytest import _report_data_type, _test_prediction_method_with_different_types
from scipy.sparse import csr_matrix

from xcolumns.block_coordinate import predict_using_bc_with_0approx
from xcolumns.block_coordinate import (
predict_optimizing_coverage_using_bc,
predict_using_bc_with_0approx,
)
from xcolumns.confusion_matrix import calculate_confusion_matrix
from xcolumns.metrics import binary_recall_on_conf_matrix, macro_recall_on_conf_matrix
from xcolumns.weighted_prediction import predict_optimizing_macro_recall, predict_top_k
Expand Down Expand Up @@ -91,3 +94,51 @@ def test_block_coordinate_with_different_types(generated_test_data):
)
assert bc_score >= top_k_score
assert abs(opt_recall_score - bc_score) < 0.02


def _run_block_coordinate_for_coverage(y_test, y_proba_test, k, init_y_pred):
_report_data_type(y_proba_test)
y_pred, meta = predict_optimizing_coverage_using_bc(
y_proba_test,
k,
return_meta=True,
seed=2024,
init_y_pred=init_y_pred,
)
print(f" time={meta['time']}s")

assert type(y_pred) == type(y_proba_test)
assert y_pred.dtype == y_proba_test.dtype
if k > 0:
assert (y_pred.sum(axis=1) == k).all()
return (
calculate_confusion_matrix(y_test, y_pred, normalize=False, skip_tn=False),
y_pred,
)


def test_block_coordinate_for_coverage(generated_test_data):
y_test = generated_test_data["y_test"]
y_proba_test = generated_test_data["y_proba_test"]
k = 3

# Run predict_top_k to get baseline classifier and initial prediction
top_k_y_pred = predict_top_k(y_proba_test, k)
top_k_C = calculate_confusion_matrix(
y_test, top_k_y_pred, normalize=False, skip_tn=False
)

conf_mats, y_preds = _test_prediction_method_with_different_types(
_run_block_coordinate_for_coverage,
(y_test, y_proba_test, k, top_k_y_pred),
test_torch=False,
)

# Compare with top-k
top_k_score = macro_recall_on_conf_matrix(*top_k_C)
bc_score = macro_recall_on_conf_matrix(*conf_mats[0])
opt_recall_score = macro_recall_on_conf_matrix(*opt_recall_C)
print(
f"Top-k score={top_k_score}, BC score={bc_score}, opt recall score={opt_recall_score}"
)
assert bc_score >= top_k_score
63 changes: 63 additions & 0 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from xcolumns.confusion_matrix import calculate_confusion_matrix
from xcolumns.metrics import (
binary_0_1_loss_on_conf_matrix,
binary_accuracy_on_conf_matrix,
binary_balanced_accuracy_on_conf_matrix,
binary_f1_score_on_conf_matrix,
binary_fbeta_score_on_conf_matrix,
Expand All @@ -10,7 +12,27 @@
binary_jaccard_score_on_conf_matrix,
binary_precision_on_conf_matrix,
binary_recall_on_conf_matrix,
coverage_on_conf_matrix,
hamming_loss_on_conf_matrix,
hamming_score_on_conf_matrix,
macro_balanced_accuracy_on_conf_matrix,
macro_f1_score_on_conf_matrix,
macro_fbeta_score_on_conf_matrix,
macro_gmean_on_conf_matrix,
macro_hmean_on_conf_matrix,
macro_jaccard_score_on_conf_matrix,
macro_precision_on_conf_matrix,
macro_recall_on_conf_matrix,
micro_balanced_accuracy_on_conf_matrix,
micro_f1_score_on_conf_matrix,
micro_fbeta_score_on_conf_matrix,
micro_gmean_on_conf_matrix,
micro_hmean_on_conf_matrix,
micro_jaccard_score_on_conf_matrix,
micro_precision_on_conf_matrix,
micro_recall_on_conf_matrix,
)
from xcolumns.types import Number
from xcolumns.weighted_prediction import predict_top_k


Expand All @@ -26,6 +48,8 @@ def test_binary_metrics_on_conf_matrix(generated_test_data):
)

for metric in [
binary_0_1_loss_on_conf_matrix,
binary_accuracy_on_conf_matrix,
binary_balanced_accuracy_on_conf_matrix,
binary_fbeta_score_on_conf_matrix,
binary_f1_score_on_conf_matrix,
Expand All @@ -41,3 +65,42 @@ def test_binary_metrics_on_conf_matrix(generated_test_data):
assert isinstance(result, np.ndarray)
assert result.shape == (y_proba_test.shape[1],)
assert (0 <= result).all() and (result <= 1).all()


def test_multilabel_metrics_on_conf_matrix(generated_test_data):
y_test = generated_test_data["y_test"]
y_proba_test = generated_test_data["y_proba_test"]
k = 3

# Run predict_top_k to get baseline classifier and initial prediction
top_k_y_pred = predict_top_k(y_proba_test, k)
top_k_C = calculate_confusion_matrix(
y_test, top_k_y_pred, normalize=False, skip_tn=False
)

for metric in [
micro_balanced_accuracy_on_conf_matrix,
micro_f1_score_on_conf_matrix,
micro_fbeta_score_on_conf_matrix,
micro_gmean_on_conf_matrix,
micro_hmean_on_conf_matrix,
micro_jaccard_score_on_conf_matrix,
micro_precision_on_conf_matrix,
micro_recall_on_conf_matrix,
macro_balanced_accuracy_on_conf_matrix,
macro_f1_score_on_conf_matrix,
macro_fbeta_score_on_conf_matrix,
macro_gmean_on_conf_matrix,
macro_hmean_on_conf_matrix,
macro_jaccard_score_on_conf_matrix,
macro_precision_on_conf_matrix,
macro_recall_on_conf_matrix,
coverage_on_conf_matrix,
hamming_loss_on_conf_matrix,
hamming_score_on_conf_matrix,
]:
print(f"testing {metric.__name__}")
result = metric(*top_k_C)
print(f" result={result}")
assert isinstance(result, Number)
assert result >= 0
27 changes: 16 additions & 11 deletions tests/test_weighted_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from scipy.sparse import csr_matrix

from xcolumns.confusion_matrix import calculate_confusion_matrix
from xcolumns.metrics import jpv_inverse_propensities, label_priors
from xcolumns.utils import *
from xcolumns.weighted_prediction import (
predict_log_weighted_per_instance,
predict_optimizing_instance_precision,
predict_optimizing_instance_propensity_scored_precision,
predict_optimizing_macro_balanced_accuracy,
predict_optimizing_macro_recall,
predict_power_law_weighted_per_instance,
predict_weighted_per_instance,
)
Expand Down Expand Up @@ -79,16 +81,19 @@ def test_prediction_optizming_macro_balanced_accuracy(generated_test_data):
)


# def test_wrapper_methods(generated_test_data):
# y_test = generated_test_data["y_test"]
# y_proba_test = generated_test_data["y_proba_test"]
def test_wrapper_methods(generated_test_data):
y_test = generated_test_data["y_test"]
y_proba_test = generated_test_data["y_proba_test"]

# priors = y_test.mean(axis=0)
priors = label_priors(y_test)
inverse_propensities = jpv_inverse_propensities(y_test)

# for func in [
# predict_log_weighted_per_instance,
# predict_power_law_weighted_per_instance,
# predict_optimizing_instance_propensity_scored_precision,
# predict_optimizing_instance_precision,
# ]:
# func(y_proba_test, k=3)
y_pred = predict_optimizing_instance_precision(y_proba_test, k=3)
y_pred = predict_log_weighted_per_instance(y_proba_test, k=3, priors=priors)
y_pred = predict_power_law_weighted_per_instance(
y_proba_test, k=3, priors=priors, beta=0.5
)
y_pred = predict_optimizing_instance_propensity_scored_precision(
y_proba_test, k=3, inverse_propensities=inverse_propensities
)
y_pred = predict_optimizing_macro_recall(y_proba_test, k=3, priors=priors)

0 comments on commit 64e4817

Please sign in to comment.