diff --git a/FSharp.Stats.sln b/FSharp.Stats.sln index ce24a0c4..2e4599fe 100644 --- a/FSharp.Stats.sln +++ b/FSharp.Stats.sln @@ -57,6 +57,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{23F9FB2E-6 docs\Intervals.fsx = docs\Intervals.fsx docs\LinearAlgebra.fsx = docs\LinearAlgebra.fsx docs\Matrix_Vector.fsx = docs\Matrix_Vector.fsx + docs\Quantiles.fsx = docs\Quantiles.fsx docs\NuGet.config = docs\NuGet.config docs\Rank.fsx = docs\Rank.fsx docs\Signal.fsx = docs\Signal.fsx diff --git a/docs/Intervals.fsx b/docs/Intervals.fsx index e2bab840..867f84c7 100644 --- a/docs/Intervals.fsx +++ b/docs/Intervals.fsx @@ -1,7 +1,7 @@ (** --- title: Intervals -index: 19 +index: 20 category: Documentation categoryindex: 0 --- diff --git a/docs/ML.fsx b/docs/ML.fsx index 972bb4bc..b866206b 100644 --- a/docs/ML.fsx +++ b/docs/ML.fsx @@ -1,7 +1,7 @@ (** --- title: Machine Learning -index: 19 +index: 21 category: Documentation categoryindex: 0 --- @@ -46,7 +46,7 @@ _Summary:_ this tutorial demonstrates functionality relevant in the context of m ### Table of contents - - [Dimensionality Reduction](#Dimensionality Reduction) + - [Dimensionality Reduction](#Dimensionality-Reduction) - [PCA](#PCA) ## Dimensionality Reduction diff --git a/docs/Quantiles.fsx b/docs/Quantiles.fsx new file mode 100644 index 00000000..c168327d --- /dev/null +++ b/docs/Quantiles.fsx @@ -0,0 +1,430 @@ +(** +--- +title: Quantile +index: 19 +category: Documentation +categoryindex: 0 +--- +*) + +(*** hide ***) + +(*** condition: prepare ***) +#I "../src/FSharp.Stats/bin/Release/netstandard2.0/" +#r "FSharp.Stats.dll" +#r "nuget: Plotly.NET, 2.0.0-preview.16" + +(*** condition: ipynb ***) +#if IPYNB +#r "nuget: Plotly.NET, 2.0.0-preview.16" +#r "nuget: Plotly.NET.Interactive, 2.0.0-preview.16" +#r "nuget: FSharp.Stats" +#endif // IPYNB + + +open Plotly.NET +open Plotly.NET.StyleParam +open Plotly.NET.LayoutObjects + +//some axis styling +module Chart = + let myAxis name = LinearAxis.init(Title=Title.init name,Mirror=StyleParam.Mirror.All,Ticks=StyleParam.TickOptions.Inside,ShowGrid=false,ShowLine=true) + let myAxisRange name (min,max) = LinearAxis.init(Title=Title.init name,Range=Range.MinMax(min,max),Mirror=StyleParam.Mirror.All,Ticks=StyleParam.TickOptions.Inside,ShowGrid=false,ShowLine=true) + let withAxisTitles x y chart = + chart + |> Chart.withTemplate ChartTemplates.lightMirrored + |> Chart.withXAxis (myAxis x) + |> Chart.withYAxis (myAxis y) + +(** + +# Quantile + +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/fslaborg/FSharp.Stats/gh-pages?filepath=Quantile.ipynb) + +_Summary:_ this tutorial demonstrates how to handle quantiles and QQ-Plots + +### Table of contents + + - [Quantiles](#Quantiles) + - [QQ plot](#QQ-plot) + - [Comparing two sample distributions](#Comparing-two-sample-distributions) + - [Comparing a sample against a distribution](#Comparing-a-sample-against-a-distribution) + - [Normal distribution](#Normal-distribution) + - [Uniform Distribution](#Uniform-Distribution) + +## Quantiles + +Quantiles are values that divide data into equally spaced groups. Percentiles are just quantiles that divide the data in 100 equally sized groups. +The median for example defines the 0.5 quantile or 0.5 percentile. You can calculate the quantile by what proportion of values are less than the value you are interested in. + +_Note: There are many possibilities to handle ties or data that cannot be split equally. The default quantile method used here is `Quantile.mode`._ + +Let's sample 1000 data points from a normal distribution and calculate some percentiles. +*) +open System +open FSharp.Stats +open FSharp.Stats.Signal + +let rng = Distributions.ContinuousDistribution.normal 3. 1. + +let sample = Array.init 1000 (fun _ -> rng.Sample()) + +let quantile25 = Quantile.mode 0.25 sample +let quantile50 = Quantile.mode 0.50 sample +let quantile75 = Quantile.mode 0.75 sample +let quantile100 = Quantile.mode 1.00 sample + + +[|quantile25;quantile50;quantile75;quantile100|] +(***include-it-raw***) + + +(** + +These special quantiles are also called quartiles since the divide the data into 4 sections. +Now we can divide the data into the ranges defined by the quantiles and plot them. Here the ranges defines half-open intervals: + +*) + +let range25 = sample |> Array.filter (fun x -> x < quantile25) +let range50 = sample |> Array.filter (fun x -> x > quantile25 && x < quantile50) +let range75 = sample |> Array.filter (fun x -> x > quantile50 && x < quantile75) +let range100 = sample |> Array.filter (fun x -> x > quantile75) + +(*** hide ***) +let quartilePlot = + [| + Chart.Histogram(range25,"25",ShowLegend=false) |> Chart.withTemplate ChartTemplates.lightMirrored |> Chart.withXAxisStyle("",MinMax=(0.,6.)) |> Chart.withYAxisStyle("Quartil 25") + Chart.Histogram(range50,"50",ShowLegend=false) |> Chart.withTemplate ChartTemplates.lightMirrored |> Chart.withXAxisStyle("",MinMax=(0.,6.)) |> Chart.withYAxisStyle("Quartil 50") + Chart.Histogram(range75,"75",ShowLegend=false) |> Chart.withTemplate ChartTemplates.lightMirrored |> Chart.withXAxisStyle("",MinMax=(0.,6.)) |> Chart.withYAxisStyle("Quartil 75") + Chart.Histogram(range100,"100",ShowLegend=false) |> Chart.withTemplate ChartTemplates.lightMirrored |> Chart.withXAxisStyle("",MinMax=(0.,6.)) |> Chart.withYAxisStyle("Quartil 100") + |] + |> Chart.Grid(4,1) + + +(*** condition: ipynb ***) +#if IPYNB +quartilePlot +#endif // IPYNB + +(***hide***) +quartilePlot |> GenericChart.toChartHTML +(***include-it-raw***) + + +(** + +## QQ plot + +QQ plots allow to compare sample distributions if: + + - the underlying population distribution is unknown or if + - the relationship between two distributions should be evaluated in greater detail than just their estimated parameters. + +When a sample is compared to a known distribution, every quantile can be calculated exactly by inverting their CDF. If you compare two samples, there is no uniquely defined CDF, +so quantiles have to be interpolated. + +### Comparing two sample distributions + +Two sample populations can be compared by QQ-plots where quantiles of the first sample are plotted against quantiles of the second sample. If the sample length is equal, both samples are ordered and plotted as pairs. + +$qq_i = X_i,Y_i$ with $X$ and $Y$ beeing ordered sample sequences of length $n$ and $(1 \le i \le n)$ + + +If samples sizes are unequal the quantiles of the larger data set have to be interpolated from the quantiles of the smaller data set. + +**Lets create four samples of size 300 first:** + + - two that are drawn from a normal distribution of mean $3.0$ and standard deviation $0.5$ + + - two that are drawn randomly between 0 and 1 + +*) + + +//create samples +let rnd = System.Random() +let norm = Distributions.ContinuousDistribution.normal 3.0 0.5 + +///Example 1: Aamples from a normal distribution +let normalDistA = Array.init 300 (fun _ -> norm.Sample()) +let normalDistB = Array.init 300 (fun _ -> norm.Sample()) + +///Example 2: Random samples from values between 0 and 1 +let evenRandomA = Array.init 300 (fun _ -> rnd.NextDouble()) +let evenRandomB = Array.init 300 (fun _ -> rnd.NextDouble()) + +let exampleDistributions = + [ + Chart.Histogram(normalDistA,Name="normalDistA") |> Chart.withTemplate ChartTemplates.lightMirrored + Chart.Histogram(normalDistB,Name="normalDistB") |> Chart.withTemplate ChartTemplates.lightMirrored + Chart.Histogram(evenRandomA,Name="evenRandomA") |> Chart.withTemplate ChartTemplates.lightMirrored + Chart.Histogram(evenRandomB,Name="evenRandomB") |> Chart.withTemplate ChartTemplates.lightMirrored + ] + |> Chart.Grid(2,2) + |> Chart.withSize(800.,700.) + +(*** condition: ipynb ***) +#if IPYNB +exampleDistributions +#endif // IPYNB + +(***hide***) +exampleDistributions |> GenericChart.toChartHTML +(***include-it-raw***) + +(** + +To compare if two distributions are equal or to identify ranges in which the distributions differ, a quantile pair from each of the two distributions can be calculated and plotted against each other. +If both distributions are similar, you would expect the quantiles to be identical and therefore are located on a straight line. If the samples are of different length $m$ and $n$ the number +of quantiles is limited to $min$ $m$ $n$. For every data point of the smaller data set a corresponding quantile of the larger data set is determined. + +Lets calculate the quantiles from _normalDistA_ vs _normalDistB_. +*) + +// Here a tuple sequence is generated that pairwise contain the same quantiles from normalDistA and normalDistB +let qqData = QQPlot.fromTwoSamples normalDistA normalDistB + +// Lets check out the first 5 elements in the sequence +Seq.take 5 qqData +(***include-it-raw***) + +(** + +You can use this tuple sequence and plot it against each other. + +*) + +open FSharp.Stats.Signal +open FSharp.Stats.Signal.QQPlot + + +//plots QQ plot from two sample populations +let plotFrom2Populations sampleA sampleB = + + //here the coordinates are calculated + let qqCoordinates = QQPlot.fromTwoSamples sampleA sampleB + + Chart.Point (qqCoordinates,Name="QQ") + |> Chart.withXAxisStyle "Quantiles sample A" + |> Chart.withYAxisStyle "Quantiles sample B" + |> Chart.withTemplate ChartTemplates.lightMirrored + +let myQQplot1 = plotFrom2Populations normalDistA normalDistB + + +(*** condition: ipynb ***) +#if IPYNB +myQQplot1 +#endif // IPYNB + +(***hide***) +myQQplot1 |> GenericChart.toChartHTML +(***include-it-raw***) + + +(** + +Both samples were taken from the same distribution (here normal distribution) and therefore they match pretty well. + +In the following plot you can see four comparisons of the four distributions defined in the beginning (2x normal + 2x uniform). + +*) + +let multipleQQPlots = + [ + plotFrom2Populations normalDistA normalDistB |> Chart.withXAxisStyle "normalA" |> Chart.withYAxisStyle "normalB" + plotFrom2Populations normalDistA evenRandomB |> Chart.withXAxisStyle "normalA" |> Chart.withYAxisStyle "randomB" + plotFrom2Populations evenRandomA normalDistB |> Chart.withXAxisStyle "randomA" |> Chart.withYAxisStyle "normalB" + plotFrom2Populations evenRandomA evenRandomB |> Chart.withXAxisStyle "randomA" |> Chart.withYAxisStyle "randomB" + ] + |> Chart.Grid(2,2) + |> Chart.withLegend false + |> Chart.withSize(800.,700.) + +(*** condition: ipynb ***) +#if IPYNB +multipleQQPlots +#endif // IPYNB + +(***hide***) +multipleQQPlots |> GenericChart.toChartHTML +(***include-it-raw***) + + +(** + +When QQ-plots are generated for pairwise comparisons, it is obvious, that the _random_-_random_ and _normal_-_normal_ samples fit nicely. The cross comparisons between normal and random samples do not match. +Its easy to see that the random samples are distributed between 0 and 1 while the samples from the normal distributions range from $1$ to ~$5$. + + +### Comparing a sample against a distribution + +You can plot the quantiles from a sample versus a known distribution to check if your data follows the given distribution. +There are various methods to determine quantiles that differ in handling ties and uneven spacing. + + +``` +Quantile determination methods(rank,sampleLength): + - Blom -> (rank - 3. / 8.) / (sampleLength + 1. / 4.) + - Rankit -> (rank - 1. / 2.) / sampleLength + - Tukey -> (rank - 1. / 3.) / (sampleLength + 1. / 3.) + - VanDerWerden -> rank / (sampleLength + 1.) +``` + +_Note that this method does not replace a significance test wether the distributions differ statistically._ + +#### Normal distribution + +The data can be z standardized prior to quantile determination to have zero mean and unit variance. If the data is zTransformed the bisector defines a perfect match. + +*) + +// The raw qq-plot data of a standard normal distribution and the sample distribution +// defaults: +// Method: QuantileMethod.Rankit +// ZTransform: false +let qq2Normal sample = QQPlot.toGauss(Method=QuantileMethod.Rankit,ZTransform=true) sample + +// plots QQ plot from a sample population against a standard normal distribution. +// if the data is zTransformed the bisector defines a perfect match. +let plotFromOneSampleGauss sample = + + //this is the main data plotted as x,y diagram + let qqData = QQPlot.toGauss(Method=QuantileMethod.Rankit,ZTransform=true) sample + + let qqChart = + Chart.Point qqData + + let expectedLine = + let minimum = qqData |> Seq.head |> snd + let maximum = qqData |> Seq.last |> snd + [ + minimum,minimum + maximum,maximum + ] + |> Chart.Line + |> Chart.withTraceName "expected" + + [ + qqChart + expectedLine + ] + |> Chart.combine + |> Chart.withXAxisStyle "Theoretical quantiles (normal)" + |> Chart.withYAxisStyle "Sample quantiles" + |> Chart.withTemplate ChartTemplates.lightMirrored + + +let myQQPlotOneSampleGauss = plotFromOneSampleGauss normalDistA + +(*** condition: ipynb ***) +#if IPYNB +myQQPlotOneSampleGauss +#endif // IPYNB + +(***hide***) +myQQPlotOneSampleGauss |> GenericChart.toChartHTML +(***include-it-raw***) + + + +(** + +As seen above the sample perfectly matches the expected quantiles from a normal distribution. This is expected because the sample was generated by sampling from an normal distribution. + +*) + +// compare the uniform sample against a normal distribution +let my2QQPlotOneSampleGauss = plotFromOneSampleGauss evenRandomA + + +(*** condition: ipynb ***) +#if IPYNB +my2QQPlotOneSampleGauss +#endif // IPYNB + +(***hide***) +my2QQPlotOneSampleGauss |> GenericChart.toChartHTML +(***include-it-raw***) + + +(** + +As seen above the sample does not matches the expected quantiles from a normal distribution. The sample derives from an random sampling between 0 and 1 and therefore is overrepresented in the tails. + + +#### Uniform Distribution + +You also can plot your data against a uniform distribution. Data can be standardized to lie between $0$ and $1$ +*) + +let uniform = + QQPlot.toUniform(Method=QuantileMethod.Rankit,Standardize=false) normalDistA + |> Chart.Point + |> Chart.withXAxisStyle "Theoretical quantiles (uniform)" + |> Chart.withYAxisStyle "Sample quantiles" + |> Chart.withTemplate ChartTemplates.lightMirrored + +(*** condition: ipynb ***) +#if IPYNB +uniform +#endif // IPYNB + +(***hide***) +uniform |> GenericChart.toChartHTML +(***include-it-raw***) + +(** + +#### Any specified distribution + +You also can plot your data against a distribution you can specify. You have to define the _inverse CDF_ or also called the _Quantile function_. + +**LogNormal distribution** + +*) + +// generate a sample from a lognormal distriution +let sampleFromLogNormal = + let d = Distributions.ContinuousDistribution.logNormal 0. 1. + Array.init 500 (fun _ -> d.Sample()) + + + +// define the quantile function for the log normal distribution with parameters mu = 0 and sigma = 1 +let quantileFunctionLogNormal p = + let mu = 0. + let sigma = 1. + Math.Exp (mu + Math.Sqrt(2. * (pown sigma 2)) * SpecialFunctions.Errorfunction.inverf(2. * p - 1.)) + +let logNormalNormalDist = QQPlot.toInvCDF(quantileFunctionLogNormal,Method=QuantileMethod.Rankit) normalDistA + +let logNormalLogNormal = QQPlot.toInvCDF(quantileFunctionLogNormal,Method=QuantileMethod.Rankit) sampleFromLogNormal + +let logNormalChart = + [ + Chart.Point(logNormalNormalDist,Name="normal sample") + Chart.Point(logNormalLogNormal,Name="log normal sample") + ] + |> Chart.combine + |> Chart.withXAxisStyle "Theoretical quantiles Log Normal" + |> Chart.withYAxisStyle "Sample quantiles" + |> Chart.withTemplate ChartTemplates.lightMirrored + +(*** condition: ipynb ***) +#if IPYNB +logNormalChart +#endif // IPYNB + +(***hide***) +logNormalChart |> GenericChart.toChartHTML +(***include-it-raw***) + + +(** + +The log normal sample fits nicely to the bisector, but the sample from the normal distribution does not fit + +*) \ No newline at end of file diff --git a/docs/Signal.fsx b/docs/Signal.fsx index df45547d..e4f83a17 100644 --- a/docs/Signal.fsx +++ b/docs/Signal.fsx @@ -540,4 +540,4 @@ fftChart (***hide***) fftChart |> GenericChart.toChartHTML -(***include-it-raw***) \ No newline at end of file +(***include-it-raw***) diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj index ee417b24..8f1f3744 100644 --- a/src/FSharp.Stats/FSharp.Stats.fsproj +++ b/src/FSharp.Stats/FSharp.Stats.fsproj @@ -89,6 +89,7 @@ + diff --git a/src/FSharp.Stats/Signal/QQPlot.fs b/src/FSharp.Stats/Signal/QQPlot.fs new file mode 100644 index 00000000..b2d43b40 --- /dev/null +++ b/src/FSharp.Stats/Signal/QQPlot.fs @@ -0,0 +1,186 @@ +namespace FSharp.Stats.Signal + + +open FSharp.Stats +open System +open FSharp.Stats.SpecialFunctions +open FSharp.Stats.Quantile +open FSharp.Stats.Interpolation + +module QQPlot = + + type QuantileMethod = + | Blom + | Rankit + | Tukey + | VanDerWerden + + /// computes the quantile quantile coordinates of two sample distributions. Uses default quantile (Quantile.mode) + // tested against R qqnorm + //let internal fromTwoSamples method sampleA sampleB = + // let sampleALength = Seq.length sampleA + // let sampleBLength = Seq.length sampleB + // let minSampleLength = float (min sampleALength sampleBLength) + // + // let (smallSet,bigSet) = if sampleALength <= sampleBLength then (sampleA,sampleB) else (sampleB,sampleA) + // + // let getQuantile rank = + // match method with + // | Blom -> (rank - 3. / 8.) / (minSampleLength + 1. / 4.) + // | Rankit -> (rank - 1. / 2.) / minSampleLength + // | Tukey -> (rank - 1. / 3.) / (minSampleLength + 1. / 3.) + // | VanDerWerden -> rank / (minSampleLength + 1.) + // + // smallSet + // |> Seq.sort + // |> Seq.mapi (fun i x -> + // let rank = float (i + 1) + // let pi = getQuantile rank + // x,Quantile.mode pi bigSet + // ) + + /// Computes the quantile quantile coordinates of two sample distributions. + /// If samples are not the same size, the larger samples is interpolated to match the quantiles of the smaller. + // tested against R qqplot + let fromTwoSamples sampleA sampleB = + let sampleALength = Seq.length sampleA + let sampleBLength = Seq.length sampleB + + let ((sN,smallSet),(bN,bigSet)) = + if sampleALength <= sampleBLength then + ((sampleALength,Seq.sort sampleA),(sampleBLength,Seq.sort sampleB)) + else ((sampleBLength,Seq.sort sampleB),(sampleALength,Seq.sort sampleA)) + + let linearSpl = LinearSpline.initInterpolate [|0. .. float bN - 1.|] (Array.ofSeq bigSet) + let stepwidth = float (bN-1) / float (sN-1) + + let approxbigSet = + Array.init sN (fun i -> + let xV = (float i) * stepwidth + let yV = LinearSpline.interpolate linearSpl xV + yV + ) + + if sampleALength <= sampleBLength then + Seq.zip smallSet approxbigSet + else + Seq.zip approxbigSet smallSet + + + /// Computes the quantile quantile coordinates of a sample distributions against a normal distribution. + /// The sample can be z transformed. StandardMethod = Rankit + // tested against R qqnorm + let internal fromSampleToGauss method zTransformSample (sample:seq) = + + let sampleLength = Seq.length sample |> float + + let standardizedData = + if zTransformSample then + Signal.Normalization.zScoreTransformPopulation (vector sample) + |> Vector.toArray + else + sample |> Seq.toArray + + let getQuantile rank = + match method with + | Blom -> (rank - 3. / 8.) / (sampleLength + 1. / 4.) + | Rankit -> (rank - 1. / 2.) / sampleLength + | Tukey -> (rank - 1. / 3.) / (sampleLength + 1. / 3.) + | VanDerWerden -> rank / (sampleLength + 1.) + + let inverseCDF x = + sqrt 2. * Errorfunction.inverf (2. * x - 1.) + + standardizedData + |> Seq.sort + |> Seq.mapi (fun i x -> + let rank = float (i + 1) + let pi = getQuantile rank + inverseCDF pi,x + ) + + /// Computes the quantile quantile coordinates of a sample distributions against a normal distribution. + /// The sample can be standardized between 0 and 1. + let internal fromSampleToUniform method standardizeSample (sample:seq) = + + let sampleLength = Seq.length sample |> float + + let standardizedSample = + if standardizeSample then + let min = Seq.min sample + let max = Seq.max sample + sample |> Seq.map (fun x -> (x-min) / (max-min)) + else + sample + + let getQuantile rank = + match method with + | Blom -> (rank - 3. / 8.) / (sampleLength + 1. / 4.) + | Rankit -> (rank - 1. / 2.) / sampleLength + | Tukey -> (rank - 1. / 3.) / (sampleLength + 1. / 3.) + | VanDerWerden -> rank / (sampleLength + 1.) + + standardizedSample + |> Seq.sort + |> Seq.mapi (fun i x -> + let rank = float (i + 1) + let pi = getQuantile rank + pi,x + ) + + /// Computes the quantile quantile coordinates of a sample distributions against a specified inverseCDF function. You can derive an inverse CDF of any statistical distribution. + let internal fromSampleToInverseCDF method invCDF (sample:seq) = + + let sampleLength = Seq.length sample |> float + + let getQuantile rank = + match method with + | Blom -> (rank - 3. / 8.) / (sampleLength + 1. / 4.) + | Rankit -> (rank - 1. / 2.) / sampleLength + | Tukey -> (rank - 1. / 3.) / (sampleLength + 1. / 3.) + | VanDerWerden -> rank / (sampleLength + 1.) + + sample + |> Seq.sort + |> Seq.mapi (fun i x -> + let rank = float (i + 1) + let pi = getQuantile rank + invCDF pi,x + ) + + +type QQPlot() = + + /// Computes the quantile quantile coordinates of a sample distributions against a normal distribution. + /// The sample can be z transformed. default = Rankit + static member toGauss(?Method:QQPlot.QuantileMethod,?ZTransform:bool) = + + let standardize = defaultArg ZTransform false + let method = defaultArg Method QQPlot.QuantileMethod.Rankit + + fun (sample: seq) -> + QQPlot.fromSampleToGauss method standardize sample + + /// Computes the quantile quantile coordinates of a sample distributions against a normal distribution. + /// The sample can be standardized to the range between 0 and 1. default = Rankit + static member toUniform(?Method:QQPlot.QuantileMethod,?Standardize:bool) = + + let standardize = defaultArg Standardize false + let method = defaultArg Method QQPlot.QuantileMethod.Rankit + + fun (sample: seq) -> + QQPlot.fromSampleToUniform method standardize sample + + /// Computes the quantile quantile coordinates of a sample distributions against a specified inverseCDF function. You can derive an inverse CDF of any statistical distribution. + static member toInvCDF(inverseCDF:(float -> float),?Method:QQPlot.QuantileMethod) = + + let method = defaultArg Method QQPlot.QuantileMethod.Rankit + + fun (sample: seq) -> + QQPlot.fromSampleToInverseCDF method inverseCDF sample + + + + + + diff --git a/src/FSharp.Stats/SpecialFunctions/Erf.fs b/src/FSharp.Stats/SpecialFunctions/Erf.fs index d2f6fdb5..4b5d129c 100644 --- a/src/FSharp.Stats/SpecialFunctions/Erf.fs +++ b/src/FSharp.Stats/SpecialFunctions/Erf.fs @@ -73,4 +73,43 @@ module Errorfunction = match x with | x when (infinity.Equals(x)) -> nan | x when ((-infinity).Equals(x)) -> infinity - | _ -> _erfcx x \ No newline at end of file + | _ -> _erfcx x + + //helper arrays for inverse error function + let private a = [|0.886226899; -1.645349621; 0.914624893; -0.140543331|] + let private b = [|1.; -2.118377725; 1.442710462; -0.329097515; 0.012229801|] + let private c = [|-1.970840454; -1.62490649; 3.429567803; 1.641345311|] + let private d = [|1.; 3.543889200; 1.637067800|] + + /// inverse of error function. uses newton refinement; from https://libit.sourceforge.net/ + /// accuracy to the fifth digit + let inverf x = + match x with + | x when x = -1. -> -infinity + | x when x = 1. -> infinity + | _ -> + let sqrtPi = sqrt Math.PI + + let z = abs x + + let r = + if z <= 0.7 then + let x2 = x * x + let r = z * (((a.[3] * x2 + a.[2]) * x2 + a.[1]) * x2 + a.[0]) + r / ((((b.[4] * x2 + b.[3]) * x2 + b.[2]) * x2 + b.[1]) * x2 + b.[0]) + else + let y = sqrt( -log((1. - z)/2.)) + let r = (((c.[3] * y + c.[2]) * y + c.[1]) * y + c.[0]) + r / ((d.[2] * y + d.[1]) * y + d.[0]) + + + let r' = r * float (sign x) + let z' = z * float (sign x) + + let r'' = r' - (Erf(r') - z')/(2./sqrtPi * exp(-r' * r')) + r'' - (Erf(r'') - z')/(2./sqrtPi *exp(-r'' * r'')) + + /// inverse of complementary error function + /// accuracy to the fifth digit + let inverfc x = + inverf (1. - x) diff --git a/tests/FSharp.Stats.Tests/SpecialFunctions.fs b/tests/FSharp.Stats.Tests/SpecialFunctions.fs index f94c99cd..1e1bd132 100644 --- a/tests/FSharp.Stats.Tests/SpecialFunctions.fs +++ b/tests/FSharp.Stats.Tests/SpecialFunctions.fs @@ -457,6 +457,34 @@ let erfTests = testCase "erfcx(-infinity)" (fun _ -> Expect.isTrue (infinity.Equals(Errorfunction.erfcx -infinity)) "expected erfcx(-infinity) to be infinity" ) + //tested against R Pracma and WolframAlpha + testCase "inverf(0.01)" (fun _ -> + Expect.floatClose Accuracy.medium (Errorfunction.inverf 0.01) 0.00886250128095 "inverf returned insufficient approximation of the result" + ) + testCase "inverf(0.5)" (fun _ -> + Expect.floatClose Accuracy.medium (Errorfunction.inverf 0.5) 0.476936276204 "inverf returned insufficient approximation of the result" + ) + testCase "inverf(0.99)" (fun _ -> + Expect.floatClose Accuracy.medium (Errorfunction.inverf 0.99) 1.82138636772 "inverf returned insufficient approximation of the result" + ) + testCase "inverf(-0.95)" (fun _ -> + Expect.floatClose Accuracy.medium (Errorfunction.inverf -0.95) -1.38590382435 "inverf returned insufficient approximation of the result" + ) + testCase "inverf(1)" (fun _ -> + Expect.isTrue ((Errorfunction.inverf 1) = infinity) "inverf returned insufficient approximation of the result" + ) + testCase "inverf(-1)" (fun _ -> + Expect.isTrue ((Errorfunction.inverf -1) = -infinity) "inverf returned insufficient approximation of the result" + ) + testCase "inverf(0)" (fun _ -> + Expect.floatClose Accuracy.medium (Errorfunction.inverf 0.0) 0. "inverf returned insufficient approximation of the result" + ) + testCase "inverf(2)" (fun _ -> + Expect.isTrue (nan.Equals(Errorfunction.inverf 2)) "inverf returned insufficient approximation of the result" + ) + testCase "inverf(-2)" (fun _ -> + Expect.isTrue (nan.Equals(Errorfunction.inverf -2)) "inverf returned insufficient approximation of the result" + ) ] []