-
Notifications
You must be signed in to change notification settings - Fork 151
/
Copy pathnormal_distribution_mod.f90
498 lines (371 loc) · 16.4 KB
/
normal_distribution_mod.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
! DART software - Copyright UCAR. This open source software is provided
! by UCAR, "as is", without charge, subject to all terms of use at
! http://www.image.ucar.edu/DAReS/DART/DART_download
module normal_distribution_mod
use types_mod, only : r8, missing_r8, digits12, PI
use utilities_mod, only : E_ERR, E_MSG, error_handler
use distribution_params_mod, only : distribution_params_type, NORMAL_DISTRIBUTION
implicit none
private
public :: normal_cdf, inv_std_normal_cdf, inv_weighted_normal_cdf, test_normal, &
normal_mean_variance, normal_mean_sd, inv_cdf, set_normal_params_from_ens
character(len=512) :: errstring
character(len=*), parameter :: source = 'normal_distribution_mod.f90'
! These quantiles bracket the range over which inv_std_normal_cdf functions
! The test routines are confined to this range and values outside this are
! changed to these. Approximate correpsonding standard deviations are in
! min_sd and max_sd and these are the range over which the test_normal functions.
! The max_sd is smaller in magnitude than the min_sd because the Fortran number
! model cannot represent numbers as close to 1 as it can to 0.
real(r8), parameter :: min_quantile = 0.0_r8, max_quantile = 0.999999999999999_r8
real(r8), parameter :: min_sd = -30.0_r8, max_sd = 8.0_r8
contains
!------------------------------------------------------------------------
subroutine test_normal
! This routine provides limited tests of the numerics in this module. It begins
! by comparing a handful of cases of the cdf to results from Matlab. It
! then tests the quality of the inverse cdf for a single mean/sd. Failing
! these tests suggests a serious problem. Passing them does not indicate that
! there are acceptable results for all possible inputs.
! Set number of equally spaced trials for the test F-1(F(x)) where F is the CDF.
integer, parameter :: num_trials = 10000000
integer :: i, j
real(r8) :: sd, quantile, inv, max_diff(16), max_q(16), max_matlab_diff
! Comparative results for a handful of cases from MATLAB21a
real(r8) :: cdf_diff(7)
real(r8) :: mmean(7) = [0.0_r8, 1.0_r8, -1.0_r8, 0.0_r8, 0.0_r8, 0.0_r8, 0.5_r8]
real(r8) :: msd(7) = [0.5_r8, 1.0_r8, 2.0_r8, 4.0_r8, 5.0_r8, 6.0_r8, 0.25_r8]
real(r8) :: mx(7) = [0.1_r8, 0.2_r8, 0.3_r8, 0.4_r8, 0.5_r8, 0.6_r8, 0.7_r8]
! Generated by matlab normcdf(mx, mmean, msd)
real(r8) :: mcdf(7) = [0.579259709439103_r8, 0.211855398583397_r8, 0.742153889194135_r8, &
0.539827837277029_r8, 0.539827837277029_r8, 0.539827837277029_r8, &
0.788144601416603_r8]
! Bounds for quantile inversion differences
real(r8) :: inv_diff_bound(16) = [1e-10_r8, 1e-10_r8, 1e-10_r8, 1e-10_r8, 1e-10_r8, &
1e-9_r8, 1e-8_r8, 1e-7_r8, 1e-7_r8, 1e-6_r8, &
1e-5_r8, 1e-4_r8, 1e-3_r8, 1e-2_r8, 1e-1_r8, 1e-0_r8]
! Compare to matlab
! Absolute value of differences should be less than 1e-15
do i = 1, 7
cdf_diff(i) = normal_cdf(mx(i), mmean(i), msd(i)) - mcdf(i)
end do
max_matlab_diff = maxval(abs(cdf_diff))
if(max_matlab_diff < 1.0e-15_r8) then
write(*, *) 'Matlab Comparison Tests: PASS ', max_matlab_diff
else
write(*, *) 'Matlab Comparison Tests: FAIL ', max_matlab_diff
endif
! Keep track of differences as function of quantile
max_diff = 0.0_r8
do j = 1, 16
max_q(j) = 1.0_r8 - 0.1**j
enddo
! Test the inversion of the cdf over +/- 30 standard deviations around mean
do i = 1, num_trials + 1
sd = min_sd + (i - 1.0_r8) * (max_sd - min_sd) / num_trials
quantile = normal_cdf(sd, 0.0_r8, 1.0_r8)
inv = inv_std_normal_cdf(quantile)
do j = 1, 16
if(quantile < max_q(j)) then
max_diff(j) = max(abs(sd-inv), max_diff(j))
endif
enddo
end do
do j = 1, 16
if(max_diff(j) > inv_diff_bound(j)) then
write(*, *) 'FAIL: Max inversion diff ', max_diff(j), ' > bound ', inv_diff_bound(j), &
'for quantiles < ', max_q(j)
else
write(*, *) 'PASS: Max inversion diff ', max_diff(j), ' < bound ', inv_diff_bound(j), &
'for quantiles < ', max_q(j)
endif
end do
end subroutine test_normal
!------------------------------------------------------------------------
function normal_cdf_params(x, p)
real(r8) :: normal_cdf_params
real(r8), intent(in) :: x
type(distribution_params_type), intent(in) :: p
! A translation routine that is required to use the generic cdf optimization routine
! Extracts the appropriate information from the distribution_params_type that is needed
! for a call to the function normal_cdf below.
real(r8) :: mean, sd
mean = p%params(1); sd = p%params(2)
normal_cdf_params = normal_cdf(x, mean, sd)
end function normal_cdf_params
!------------------------------------------------------------------------
function normal_cdf(x_in, mean, sd)
! Approximate cumulative distribution function for normal
real(r8) :: normal_cdf
real(r8), intent(in) :: x_in
real(r8), intent(in) :: mean, sd
real(digits12) :: nx
! Convert to a standard normal
nx = (x_in - mean) / sd
if(nx < 0.0_digits12) then
normal_cdf = 0.5_digits12 * erfc(-nx / sqrt(2.0_digits12))
else
normal_cdf = 0.5_digits12 * (1.0_digits12 + erf(nx / sqrt(2.0_digits12)))
endif
end function normal_cdf
!------------------------------------------------------------------------
function inv_weighted_normal_cdf(alpha, mean, sd, q) result(x)
! Find the value of x for which the cdf of a N(mean, sd) multiplied times
! alpha has value q.
real(r8) :: x
real(r8), intent(in) :: alpha, mean, sd, q
real(r8) :: normalized_q
! VARIABLES THROUGHOUT NEED TO SWITCH TO DIGITS_12
! Can search in a standard normal, then multiply by sd at end and add mean
! Divide q by alpha to get the right place for weighted normal
normalized_q = q / alpha
! Find spot in standard normal
x = inv_std_normal_cdf(normalized_q)
! Add in the mean and normalize by sd
x = mean + x * sd
end function inv_weighted_normal_cdf
!------------------------------------------------------------------------
function approx_inv_normal_cdf_params(quantile, p)
real(r8) :: approx_inv_normal_cdf_params
real(r8), intent(in) :: quantile
type(distribution_params_type), intent(in) :: p
! A translation routine that is required to use the generic first_guess for
! the cdf optimization routine.
! Extracts the appropriate information from the distribution_params_type that is needed
! for a call to the function approx_inv_normal_cdf below (which is nothing).
approx_inv_normal_cdf_params = approx_inv_normal_cdf(quantile)
end function approx_inv_normal_cdf_params
!------------------------------------------------------------------------
function approx_inv_normal_cdf(quantile_in) result(x)
real(r8) :: x
real(r8), intent(in) :: quantile_in
! This is used to get a good first guess for the search in inv_std_normal_cdf
! The params argument is not needed here but is required for consistency &
! with other distributions
! normal inverse
! translate from http://home.online.no/~pjacklam/notes/invnorm
! a routine written by john herrero
real(r8) :: quantile
real(r8) :: quantile_low,quantile_high
real(r8) :: a1,a2,a3,a4,a5,a6
real(r8) :: b1,b2,b3,b4,b5
real(r8) :: c1,c2,c3,c4,c5,c6
real(r8) :: d1,d2,d3,d4
real(r8) :: r, s
! Truncate out of range quantiles, converts them to smallest positive number or largest number <1
! This solution is stable, but may lead to underflows being thrown. May want to
! think of a better solution.
quantile = quantile_in
if(quantile <= 0.0_r8) quantile = tiny(quantile_in)
if(quantile >= 1.0_r8) quantile = nearest(1.0_r8, -1.0_r8)
a1 = -39.69683028665376_digits12
a2 = 220.9460984245205_digits12
a3 = -275.9285104469687_digits12
a4 = 138.357751867269_digits12
a5 = -30.66479806614716_digits12
a6 = 2.506628277459239_digits12
b1 = -54.4760987982241_digits12
b2 = 161.5858368580409_digits12
b3 = -155.6989798598866_digits12
b4 = 66.80131188771972_digits12
b5 = -13.28068155288572_digits12
c1 = -0.007784894002430293_digits12
c2 = -0.3223964580411365_digits12
c3 = -2.400758277161838_digits12
c4 = -2.549732539343734_digits12
c5 = 4.374664141464968_digits12
c6 = 2.938163982698783_digits12
d1 = 0.007784695709041462_digits12
d2 = 0.3224671290700398_digits12
d3 = 2.445134137142996_digits12
d4 = 3.754408661907416_digits12
quantile_low = 0.02425_digits12
quantile_high = 1_digits12 - quantile_low
! Split into an inner and two outer regions which have separate fits
if(quantile < quantile_low) then
s = sqrt(-2.0_digits12 * log(quantile))
x = (((((c1*s + c2)*s + c3)*s + c4)*s + c5)*s + c6) / &
((((d1*s + d2)*s + d3)*s + d4)*s + 1.0_digits12)
else if(quantile > quantile_high) then
s = sqrt(-2.0_digits12 * log(1.0_digits12 - quantile))
x = -(((((c1*s + c2)*s + c3)*s + c4)*s + c5)*s + c6) / &
((((d1*s + d2)*s + d3)*s + d4)*s + 1.0_digits12)
else
s = quantile - 0.5_digits12
r = s*s
x = (((((a1*r + a2)*r + a3)*r + a4)*r + a5)*r + a6)*s / &
(((((b1*r + b2)*r + b3)*r + b4)*r + b5)*r + 1.0_digits12)
endif
end function approx_inv_normal_cdf
!------------------------------------------------------------------------
function inv_std_normal_cdf_params(quantile, p) result(x)
real(r8) :: x
real(r8), intent(in) :: quantile
type(distribution_params_type), intent(in) :: p
x = inv_cdf(quantile, normal_cdf_params, approx_inv_normal_cdf_params, p)
end function inv_std_normal_cdf_params
!------------------------------------------------------------------------
function inv_std_normal_cdf(quantile) result(x)
real(r8) :: x
real(r8), intent(in) :: quantile
! This naive Newton method is much more accurate than approx_inv_normal_cdf, especially
! for quantile values less than 0.5.
! Given a quantile q, finds the value of x for which the standard normal cdf
! has approximately this quantile
! Where should the stupid p type come from
type(distribution_params_type) :: p
real(r8) :: mean, sd
! Set the mean and sd to 0 and 1 for standard normal
mean = 0.0_r8; sd = 1.0_r8
p%params(1) = mean; p%params(2) = sd
x = inv_std_normal_cdf_params(quantile, p)
end function inv_std_normal_cdf
!------------------------------------------------------------------------
function inv_cdf(quantile_in, cdf, first_guess, p) result(x)
interface
function cdf(x, p)
use types_mod, only : r8
use distribution_params_mod, only : distribution_params_type
real(r8) :: cdf
real(r8), intent(in) :: x
type(distribution_params_type), intent(in) :: p
end function
end interface
interface
function first_guess(quantile, p)
use types_mod, only : r8
use distribution_params_mod, only : distribution_params_type
real(r8) :: first_guess
real(r8), intent(in) :: quantile
type(distribution_params_type), intent(in) :: p
end function
end interface
real(r8) :: x
real(r8), intent(in) :: quantile_in
type(distribution_params_type), intent(in) :: p
! This naive Newton method is much more accurate than approx_inv_normal_cdf, especially
! for quantile values less than 0.5.
! Given a quantile q, finds the value of x for which the standard normal cdf
! has approximately this quantile
! Limit on the total iterations; Increasing this does not change any of the results
! that do not converge for the test_normal call on gfortran.
integer, parameter :: max_iterations = 50
! Limit on number of times to halve the increment; No deep thought.
integer, parameter :: max_half_iterations = 25
real(r8) :: quantile
real(r8) :: reltol, dq_dx, delta
real(r8) :: x_guess, q_guess, x_new, q_new, del_x, del_q, del_q_old, q_old
integer :: iter, j
real(r8) :: lower_bound, upper_bound
logical :: bounded_below, bounded_above
! Extract the required information from the p type
bounded_below = p%bounded_below; bounded_above = p%bounded_above
lower_bound = p%lower_bound; upper_bound = p%upper_bound
quantile = quantile_in
! Do a test for illegal values on the quantile
if(quantile < 0.0_r8 .or. quantile > 1.0_r8) then
! Need an error message
write(errstring, *) 'Illegal Quantile input', quantile
call error_handler(E_ERR, 'inv_cdf', errstring, source)
endif
! If the distribution is bounded, quantiles at the limits have values at the bounds
if(bounded_below .and. quantile == 0.0_r8) then
x = lower_bound
return
endif
if(bounded_above .and. quantile == 1.0_r8) then
x = upper_bound
return
endif
! If input quantiles are outside the numerically supported range, move them to the extremes
quantile = min(quantile, max_quantile)
! code tests stably for many distributions with min_quantile of 0.0, could remove this
quantile = max(quantile, min_quantile)
! Get first guess from functional approximation
x_guess = first_guess(quantile, p)
! Evaluate the cdf
q_guess = cdf(x_guess, p)
del_q = q_guess - quantile
! Iterations of the Newton method to approximate the root
do iter = 1, max_iterations
! Analytically, the PDF is derivative of CDF but this can be numerically inaccurate for extreme values
! Use numerical derivatives of the CDF to get more accurate inversion
! These values for the delta for the approximation work with Gfortran
delta = max(1e-8_r8, 1e-8_r8 * abs(x_guess))
dq_dx = (cdf(x_guess + delta, p) - cdf(x_guess - delta, p)) / (2.0_r8 * delta)
! Derivative of 0 means we're not going anywhere else
if(dq_dx <= 0.0_r8) then
x = x_guess
return
endif
! Linear approximation for how far to move in x
del_x = del_q / dq_dx
x_new = x_guess - del_x
! Look for convergence; If the change in x is smaller than approximate precision
reltol = (epsilon(x_guess))**(0.75_r8)
if(abs(del_x) <= reltol) then
x = x_new
return
endif
! If we've gone too far, the new error will be bigger than the old;
! Repeatedly half the distance until this is rectified
del_q_old = del_q
q_new = cdf(x_new, p)
do j = 1, max_half_iterations
del_q = q_new - quantile
if (abs(del_q) < abs(del_q_old)) then
exit
endif
q_old = q_new
x_new = (x_guess + x_new)/2.0_r8
q_new = cdf(x_new, p)
! If q isn't changing, no point in continuing
if(q_old == q_new) exit
end do
x_guess = x_new
end do
! For now, have switched a failed convergence to return the latest guess
! This has implications for stability of probit algorithms that require further study
! Not currently happening for any of the test cases on gfortran
x = x_new
write(errstring, *) 'Failed to converge for quantile ', quantile
call error_handler(E_MSG, 'inv_cdf', errstring, source)
!!!call error_handler(E_ERR, 'inv_cdf', errstring, source)
end function inv_cdf
!------------------------------------------------------------------------
function std_normal_pdf(x)
! Pdf of standard normal evaluated at x
real(r8) :: std_normal_pdf
real(r8), intent(in) :: x
std_normal_pdf = exp(-0.5_r8 * x**2) / (sqrt(2.0_r8 * PI))
end function std_normal_pdf
!------------------------------------------------------------------------
subroutine normal_mean_variance(x, num, mean, variance)
integer, intent(in) :: num
real(r8), intent(in) :: x(num)
real(r8), intent(out) :: mean
real(r8), intent(out) :: variance
mean = sum(x) / num
variance = sum((x - mean)**2) / (num - 1)
end subroutine normal_mean_variance
!------------------------------------------------------------------------
subroutine normal_mean_sd(x, num, mean, sd)
integer, intent(in) :: num
real(r8), intent(in) :: x(num)
real(r8), intent(out) :: mean
real(r8), intent(out) :: sd
mean = sum(x) / num
sd = sqrt(sum((x - mean)**2) / (num - 1))
end subroutine normal_mean_sd
!------------------------------------------------------------------------
subroutine set_normal_params_from_ens(ens, num, p)
integer, intent(in) :: num
real(r8), intent(in) :: ens(num)
type(distribution_params_type), intent(inout) :: p
! Set up the description of the normal distribution defined by the ensemble
p%distribution_type = NORMAL_DISTRIBUTION
! The two meaningful params are the mean and standard deviation
call normal_mean_sd(ens, num, p%params(1), p%params(2))
end subroutine set_normal_params_from_ens
!------------------------------------------------------------------------
end module normal_distribution_mod