performance_measurement.py

from numpy import square
import numpy as np
import math
from scipy.stats import t
import pandas as pd

def zero_benchmarked_r_squared(predictions, labels):
    """
    In many out of sample forecasting problems, predictions are compared against the historical mean return.
    However, the historical mean return is so noisy compared to individual stock returns that it artificially 
    lowers the bar for "good" forecasting performance. This problem is avoided by setting the mean excess return
    to zero when calculating R-squared.
    """
    return 1 - ((square(predictions - labels).sum()) / (square(labels)).sum())
    

def sample_binary_predictor(y_pred: pd.Series, y_true: pd.Series, n_samples: int, sample_size: int, replace: bool=True):
    """
    Returns samples of predictor accuracy.
    """
    correct = y_true.eq(y_pred) # a series of bool, but you can treat the bools as numbers
    observations = []
    for _ in range(n_samples):
        sample = np.random.choice(correct, size=sample_size, replace=replace)
        accuracy = sample.sum() / sample.shape[0]
        observations.append(accuracy)
    
    return np.array(observations)

def single_sample_t_test(observations: np.array, mean0, alpha):
    """
    This is an implementation of single-sided and single sample test of the mean of a normal distribution
    with unknown variance. In the context of this project, observations are some form of monthly returns or monthly return difference.
    
    NOTE: Requires that observations are pre-computed.
    """

    mean_obs = observations.mean()
    sample_std = math.sqrt(square(observations - mean_obs).sum() / (len(observations) - 1))

    t_statistic = (mean_obs - mean0) / (sample_std / math.sqrt(len(observations)))

    #  test if t_statistic > t(alpha, n-1)
    # ppf(q, df, loc=0, scale=1)	Percent point function (inverse of cdf — percentiles).
    critical_value = t.ppf(1 - alpha, df=len(observations)-1)
    p_value = (1.0 - t.cdf(t_statistic, df=len(observations)-1))

    if t_statistic > critical_value:
        return "Reject H0 (mean of observations are greater) with t_statistic={}, p-value={}, critical_value={} and alpha={}".format(t_statistic, p_value, critical_value, alpha)

    else:
        return "Filed to reject H0 (mean of observation are not greater) with t_statistic={}, p-value={}, critical_value={} and alpha={}".format(t_statistic, p_value, critical_value, alpha)