Skip to content

Commit

Permalink
Automatic threshold for peaks-over-threshold (#268)
Browse files Browse the repository at this point in the history
* automatic Hs threshold

* Revert "automatic Hs threshold"

This reverts commit 4477580.

* automatic Hs threshold

* simplify & include MATLAB example for debugging

* fix independent storms

* Update mhkit/loads/extreme.py

Co-authored-by: Adam Keester <[email protected]>

* Update mhkit/loads/extreme.py

Co-authored-by: Adam Keester <[email protected]>

* Update mhkit/loads/extreme.py

Co-authored-by: Adam Keester <[email protected]>

* Update mhkit/loads/extreme.py

Co-authored-by: Adam Keester <[email protected]>

* cleanup

* Update mhkit/tests/loads/test_loads.py

* Update mhkit/tests/loads/test_loads.py

Update threshold test value one more time

* Update extreme.py

* break out nested function, consolidate scipy imports

---------

Co-authored-by: ssolson <[email protected]>
Co-authored-by: Adam Keester <[email protected]>
Co-authored-by: akeeste <[email protected]>
  • Loading branch information
4 people authored Oct 30, 2023
1 parent e89deab commit abc4395
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 2 deletions.
1 change: 1 addition & 0 deletions examples/data/loads/data_loads_hs.csv

Large diffs are not rendered by default.

130 changes: 128 additions & 2 deletions mhkit/loads/extreme.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,41 @@
import numpy as np
import pandas as pd
from scipy import stats
from scipy import optimize
from scipy import stats, optimize, signal
from mhkit.wave.resource import frequency_moment

def _peaks_over_threshold(peaks, threshold, sampling_rate):
threshold_unit = np.percentile(peaks, 100*threshold, method='hazen')
idx_peaks = np.arange(len(peaks))
idx_storm_peaks, storm_peaks = global_peaks(
idx_peaks, peaks-threshold_unit)
idx_storm_peaks = idx_storm_peaks.astype(int)

# Two storms that are close enough (within specified window) are
# considered the same storm, to ensure independence.
independent_storm_peaks = [storm_peaks[0],]
idx_independent_storm_peaks = [idx_storm_peaks[0],]
# check first 14 days to determine window size
nlags = int(14 * 24 / sampling_rate)
x = peaks - np.mean(peaks)
acf = signal.correlate(x, x, mode="full")
lag = signal.correlation_lags(len(x), len(x), mode="full")
idx_zero = np.argmax(lag==0)
positive_lag = lag[(idx_zero):(idx_zero+nlags+1)]
acf_positive = acf[(idx_zero):(idx_zero+nlags+1)] / acf[idx_zero]

window_size = sampling_rate * positive_lag[acf_positive<0.5][0]
# window size in "observations" instead of "hours" between peaks.
window = window_size / sampling_rate
# keep only independent storm peaks
for idx in idx_storm_peaks[1:]:
if (idx - idx_independent_storm_peaks[-1]) > window:
idx_independent_storm_peaks.append(idx)
independent_storm_peaks.append(peaks[idx]-threshold_unit)
elif peaks[idx] > independent_storm_peaks[-1]:
idx_independent_storm_peaks[-1] = idx
independent_storm_peaks[-1] = peaks[idx]-threshold_unit

return independent_storm_peaks

def global_peaks(t, data):
"""
Expand Down Expand Up @@ -157,6 +189,100 @@ def peaks_distribution_weibull_tail_fit(x):
return peaks


def automatic_hs_threshold(
peaks,
sampling_rate,
initial_threshold_range = (0.990, 0.995, 0.001),
max_refinement=5
):
"""
Find the best significant wave height threshold for the
peaks-over-threshold method.
This method was developed by:
> Neary, V. S., S. Ahn, B. E. Seng, M. N. Allahdadi, T. Wang, Z. Yang and R. He (2020).
> "Characterization of Extreme Wave Conditions for Wave Energy Converter Design and Project Risk Assessment.”
> J. Mar. Sci. Eng. 2020, 8(4), 289; https://doi.org/10.3390/jmse8040289.
please cite this paper if using this method.
After all thresholds in the initial range are evaluated, the search
range is refined around the optimal point until either (i) there
is minimal change from the previous refinement results, (ii) the
number of data points become smaller than about 1 per year, or (iii)
the maximum number of iterations is reached.
Parameters
----------
peaks: np.array
Peak values of the response time-series
sampling_rate: float
Sampling rate in hours.
initial_threshold_range: tuple
Initial range of thresholds to search. Described as
(min, max, step).
max_refinement: int
Maximum number of times to refine the search range.
Returns
-------
best_threshold: float
Threshold that results in the best correlation.
"""
if not isinstance(sampling_rate, (float, int)):
raise TypeError('sampling_rate must be of type float or int')

if not isinstance(peaks, np.ndarray):
raise TypeError('peaks must be of type np.ndarray')

if not len(initial_threshold_range) == 3:
raise ValueError('initial_threshold_range must be length 3')

if not isinstance(max_refinement, int):
raise TypeError('max_refinement must be of type int')

range_min, range_max, range_step = initial_threshold_range
best_threshold = -1
years = len(peaks)/(365.25*24/sampling_rate)

for i in range(max_refinement):
thresholds = np.arange(range_min, range_max, range_step)
correlations = []

for threshold in thresholds:
distribution = stats.genpareto
over_threshold = _peaks_over_threshold(
peaks, threshold, sampling_rate)
rate_per_year = len(over_threshold) / years
if rate_per_year < 2:
break
distributions_parameters = distribution.fit(
over_threshold, floc=0.)
_, (_, _, correlation) = stats.probplot(
peaks, distributions_parameters, distribution, fit=True)
correlations.append(correlation)

max_i = np.argmax(correlations)
minimal_change = np.abs(best_threshold - thresholds[max_i]) < 0.0005
best_threshold = thresholds[max_i]
if minimal_change and i<max_refinement-1:
break
range_step /= 10
if max_i == len(thresholds)-1:
range_min = thresholds[max_i - 1]
range_max = thresholds[max_i] + 5*range_step
elif max_i == 0:
range_min = thresholds[max_i] - 9*range_step
range_max = thresholds[max_i + 1]
else:
range_min = thresholds[max_i-1]
range_max = thresholds[max_i+1]

best_threshold_unit = np.percentile(peaks, 100*best_threshold, method='hazen')
return best_threshold, best_threshold_unit


def peaks_distribution_peaks_over_threshold(x, threshold=None):
"""
Estimate the peaks distribution using the peaks over threshold
Expand Down
7 changes: 7 additions & 0 deletions mhkit/tests/loads/test_loads.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,13 @@ def test_shortterm_extreme(self):
ste = loads.extreme.ste(t, data, t_st, method)
assert_allclose(ste.cdf(x), cdf_1)

def test_automatic_threshold(self):
filename = "data_loads_hs.csv"
data = np.loadtxt(os.path.join(datadir, filename), delimiter=",")
years = 2.97
pct, threshold = loads.extreme.automatic_hs_threshold(data, years)
assert np.isclose(pct, 0.9913)
assert np.isclose(threshold, 1.032092)

if __name__ == '__main__':
unittest.main()

0 comments on commit abc4395

Please sign in to comment.