Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce spurious test failures by increasing threshold to 4 sigma #44

Merged
merged 1 commit into from
Sep 26, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 32 additions & 32 deletions tests/Tests.dfy
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@
if x < 0.0 then -x else x
}

method testBernoulliIsWithin3SigmaOfTrueMean(
method testBernoulliIsWithin4SigmaOfTrueMean(
n: nat,
empiricalSum: real,
successProb: real,
description: string
)
requires n > 0
{
testEmpiricalIsWithin3SigmaOfTrueMean(n, empiricalSum, successProb, successProb * (1.0 - successProb), description);
testEmpiricalIsWithin4SigmaOfTrueMean(n, empiricalSum, successProb, successProb * (1.0 - successProb), description);
}

method testEmpiricalIsWithin3SigmaOfTrueMean(
method testEmpiricalIsWithin4SigmaOfTrueMean(
n: nat,
empiricalSum: real,
trueMean: real,
Expand All @@ -41,14 +41,14 @@
{
var empiricalMean := empiricalSum / n as real;
var diff := Abs(empiricalMean - trueMean);
var threshold := 3.0 * 3.0 * trueVariance / n as real;
var threshold := 4.0 * 4.0 * trueVariance / n as real;
if diff * diff > threshold {
print "Test failed: ", description, "\n";
print "Difference between empirical and true mean: ", diff, "\n";
print "squared difference: ", diff * diff, "\n";
print "sigma squared: ", trueVariance / n as real, "\n";
}
expect diff * diff < threshold, "Empirical mean should be within 3 sigma of true mean. This individual test may fail with probability of about 0.27%.";
expect diff * diff < threshold, "Empirical mean should be within 3 sigma of true mean. This individual test may fail with probability of about 6.3e-5.";
}


Expand All @@ -63,7 +63,7 @@
t := t + 1;
}
}
testBernoulliIsWithin3SigmaOfTrueMean(n, t as real, 0.5, "p(true)");
testBernoulliIsWithin4SigmaOfTrueMean(n, t as real, 0.5, "p(true)");
}

method TestUniform(n: nat, r: UniformInterface.IUniform)
Expand All @@ -82,9 +82,9 @@
case 2 => c := c + 1;
}
}
testBernoulliIsWithin3SigmaOfTrueMean(n, a as real, 1.0 / 3.0, "p(0)");
testBernoulliIsWithin3SigmaOfTrueMean(n, b as real, 1.0 / 3.0, "p(1)");
testBernoulliIsWithin3SigmaOfTrueMean(n, c as real, 1.0 / 3.0, "p(2)");
testBernoulliIsWithin4SigmaOfTrueMean(n, a as real, 1.0 / 3.0, "p(0)");
testBernoulliIsWithin4SigmaOfTrueMean(n, b as real, 1.0 / 3.0, "p(1)");
testBernoulliIsWithin4SigmaOfTrueMean(n, c as real, 1.0 / 3.0, "p(2)");
}

method TestUniformInterval(n: nat, r: UniformInterface.IUniform)
Expand All @@ -103,9 +103,9 @@
case 9 => c := c + 1;
}
}
testBernoulliIsWithin3SigmaOfTrueMean(n, a as real, 1.0 / 3.0, "p(7)");
testBernoulliIsWithin3SigmaOfTrueMean(n, b as real, 1.0 / 3.0, "p(8)");
testBernoulliIsWithin3SigmaOfTrueMean(n, c as real, 1.0 / 3.0, "p(9)");
testBernoulliIsWithin4SigmaOfTrueMean(n, a as real, 1.0 / 3.0, "p(7)");
testBernoulliIsWithin4SigmaOfTrueMean(n, b as real, 1.0 / 3.0, "p(8)");
testBernoulliIsWithin4SigmaOfTrueMean(n, c as real, 1.0 / 3.0, "p(9)");
}

method TestGeometric(n: nat, r: GeometricInterface.IGeometric)
Expand All @@ -126,9 +126,9 @@
case _ =>
}
}
testBernoulliIsWithin3SigmaOfTrueMean(n, a as real, 0.015625, "p(5)");
testBernoulliIsWithin3SigmaOfTrueMean(n, b as real, 0.00048828125, "p(10)");
testEmpiricalIsWithin3SigmaOfTrueMean(n, sum as real, (1.0 - 0.5) / 0.5, (1.0 - 0.5) / (0.5 * 0.5), "mean");
testBernoulliIsWithin4SigmaOfTrueMean(n, a as real, 0.015625, "p(5)");
testBernoulliIsWithin4SigmaOfTrueMean(n, b as real, 0.00048828125, "p(10)");
testEmpiricalIsWithin4SigmaOfTrueMean(n, sum as real, (1.0 - 0.5) / 0.5, (1.0 - 0.5) / (0.5 * 0.5), "mean");
}

method TestBernoulliRational(n: nat, r: BernoulliRationalInterface.IBernoulliRational)
Expand All @@ -143,7 +143,7 @@
t := t + 1;
}
}
testBernoulliIsWithin3SigmaOfTrueMean(n, t as real, 0.2, "p(true)");
testBernoulliIsWithin4SigmaOfTrueMean(n, t as real, 0.2, "p(true)");
}

method TestBernoulliRational2(n: nat, r: BernoulliRationalInterface.IBernoulliRational)
Expand All @@ -159,7 +159,7 @@
}

expect t == 0;
}
}

method TestBernoulliRational3(n: nat, r: BernoulliRationalInterface.IBernoulliRational)
decreases *
Expand All @@ -174,7 +174,7 @@
}

expect t == n;
}
}

method TestBernoulli(n: nat, r: BernoulliInterface.IBernoulli)
decreases *
Expand All @@ -188,7 +188,7 @@
t := t + 1;
}
}
testBernoulliIsWithin3SigmaOfTrueMean(n, t as real, 0.2, "p(true)");
testBernoulliIsWithin4SigmaOfTrueMean(n, t as real, 0.2, "p(true)");
}

method TestBernoulliExpNeg(n: nat, r: BernoulliExpNegInterface.IBernoulliExpNeg)
Expand All @@ -203,7 +203,7 @@
t := t + 1;
}
}
testBernoulliIsWithin3SigmaOfTrueMean(n, t as real, 0.1, "p(true)");
testBernoulliIsWithin4SigmaOfTrueMean(n, t as real, 0.1, "p(true)");
}

method TestDiscreteLaplace(n: nat, r: DiscreteLaplaceInterface.IDiscreteLaplace)
Expand All @@ -228,13 +228,13 @@
// https://www.wolframalpha.com/input?i=ReplaceAll%5B%28E%5E%5B1%2Ft%5D+-+1%29+%2F+%28E%5E%5B1%2Ft%5D+%2B+1%29+*+E%5E%28-Abs%5Bx%5D%2Ft%29%2C+%7Bt+-%3E+7%2F5%2C+x+-%3E+0%7D%5D
// https://www.wolframalpha.com/input?i=ReplaceAll%5B%28E%5E%5B1%2Ft%5D+-+1%29+%2F+%28E%5E%5B1%2Ft%5D+%2B+1%29+*+E%5E%28-Abs%5Bx%5D%2Ft%29%2C+%7Bt+-%3E+7%2F5%2C+x+-%3E+1%7D%5D
// https://www.wolframalpha.com/input?i=ReplaceAll%5B%28E%5E%5B1%2Ft%5D+-+1%29+%2F+%28E%5E%5B1%2Ft%5D+%2B+1%29+*+E%5E%28-Abs%5Bx%5D%2Ft%29%2C+%7Bt+-%3E+7%2F5%2C+x+-%3E+2%7D%5D
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[0] as real, 0.3426949, "p(0)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[1] as real, 0.1677634, "p(1)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[-1] as real, 0.1677634, "p(-1)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[2] as real, 0.0821272, "p(2)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[-2] as real, 0.0821272, "p(-2)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[0] as real, 0.3426949, "p(0)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[1] as real, 0.1677634, "p(1)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[-1] as real, 0.1677634, "p(-1)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[2] as real, 0.0821272, "p(2)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[-2] as real, 0.0821272, "p(-2)");
var variance := 3.7575005; // variance of DiscreteLaplace(7/5) is (2*exp(5/7))/(exp(5/7)-1)^2
testEmpiricalIsWithin3SigmaOfTrueMean(n, sum as real, 0.0, variance, "mean");
testEmpiricalIsWithin4SigmaOfTrueMean(n, sum as real, 0.0, variance, "mean");
}

method TestDiscreteGaussian(n: nat, r: DiscreteGaussianInterface.IDiscreteGaussian)
Expand All @@ -259,12 +259,12 @@
// https://www.wolframalpha.com/input?i=ReplaceAll%5BE%5E%28-x%5E2+%2F+%282+*%CF%83%5E2%29%29+%2F+Sum%5BE%5E%28-y%5E2%2F%282+%CF%83%5E2%29%29%2C+%7By%2C+-Infinity%2C+Infinity%7D%5D%2C+%7Bx+-%3E+0%2C+%CF%83+-%3E+1.4%7D%5D
// https://www.wolframalpha.com/input?i=ReplaceAll%5BE%5E%28-x%5E2+%2F+%282+*%CF%83%5E2%29%29+%2F+Sum%5BE%5E%28-y%5E2%2F%282+%CF%83%5E2%29%29%2C+%7By%2C+-Infinity%2C+Infinity%7D%5D%2C+%7Bx+-%3E+1%2C+%CF%83+-%3E+1.4%7D%5D
// https://www.wolframalpha.com/input?i=ReplaceAll%5BE%5E%28-x%5E2+%2F+%282+*%CF%83%5E2%29%29+%2F+Sum%5BE%5E%28-y%5E2%2F%282+%CF%83%5E2%29%29%2C+%7By%2C+-Infinity%2C+Infinity%7D%5D%2C+%7Bx+-%3E+2%2C+%CF%83+-%3E+1.4%7D%5D
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[0] as real, 0.284959, "p(0)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[1] as real, 0.220797, "p(1)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[-1] as real, 0.220797, "p(-1)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[2] as real, 0.102713, "p(2)");
testBernoulliIsWithin3SigmaOfTrueMean(n, counts[-2] as real, 0.102713, "p(-2)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[0] as real, 0.284959, "p(0)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[1] as real, 0.220797, "p(1)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[-1] as real, 0.220797, "p(-1)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[2] as real, 0.102713, "p(2)");
testBernoulliIsWithin4SigmaOfTrueMean(n, counts[-2] as real, 0.102713, "p(-2)");
var varianceBound := 1.4 * 1.4; // variance of DiscreteGaussian(1.4) is < 1.4^2
testEmpiricalIsWithin3SigmaOfTrueMean(n, sum as real, 0.0, varianceBound, "mean");
testEmpiricalIsWithin4SigmaOfTrueMean(n, sum as real, 0.0, varianceBound, "mean");
}
}