Skip to content

Commit 89919b1

Browse files
authored
Merge pull request #495 from spsanderson/development
Development
2 parents df9a4ce + ffdda82 commit 89919b1

File tree

173 files changed

+1315
-2
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+1315
-2
lines changed

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ export(util_negative_binomial_stats_tbl)
129129
export(util_normal_aic)
130130
export(util_normal_param_estimate)
131131
export(util_normal_stats_tbl)
132+
export(util_pareto1_aic)
133+
export(util_pareto1_param_estimate)
134+
export(util_pareto1_stats_tbl)
132135
export(util_pareto_aic)
133136
export(util_pareto_param_estimate)
134137
export(util_pareto_stats_tbl)

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ Add function `util_zero_truncated_geometric_stats_tbl()` to create a summary tab
2121
7. Fix #480 - Add function `util_t_param_estimate()` to estimate the parameters of the
2222
T distribution.
2323
Add function `util_t_aic()` to calculate the AIC for the T distribution.
24+
8. Fix #479 - Add function `util_pareto1_param_estimate()` to estimate the parameters of the Pareto Type I distribution.
25+
Add function `util_pareto1_aic()` to calculate the AIC for the Pareto Type I distribution.
26+
Add function `util_pareto1_stats_tbl()` to create a summary table of the Pareto Type I distribution.
2427

2528
## Minor Improvements and Fixes
2629
1. Fix #468 - Update `util_negative_binomial_param_estimate()` to add the use of

R/est-param-pareto1.R

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#' Estimate Pareto Parameters
2+
#'
3+
#' @family Parameter Estimation
4+
#' @family Pareto
5+
#'
6+
#' @author Steven P. Sanderson II, MPH
7+
#'
8+
#' @details This function will attempt to estimate the Pareto shape and scale
9+
#' parameters given some vector of values.
10+
#'
11+
#' @description The function will return a list output by default, and if the parameter
12+
#' `.auto_gen_empirical` is set to `TRUE` then the empirical data given to the
13+
#' parameter `.x` will be run through the `tidy_empirical()` function and combined
14+
#' with the estimated Pareto data.
15+
#'
16+
#' Two different methods of shape parameters are supplied:
17+
#' - LSE
18+
#' - MLE
19+
#'
20+
#' @param .x The vector of data to be passed to the function.
21+
#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default
22+
#' set to TRUE. This will automatically create the `tidy_empirical()` output
23+
#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user
24+
#' can then plot out the data using `$combined_data_tbl` from the function output.
25+
#'
26+
#' @examples
27+
#' library(dplyr)
28+
#' library(ggplot2)
29+
#'
30+
#' x <- mtcars[["mpg"]]
31+
#' output <- util_pareto1_param_estimate(x)
32+
#'
33+
#' output$parameter_tbl
34+
#'
35+
#' output$combined_data_tbl |>
36+
#' tidy_combined_autoplot()
37+
#'
38+
#' set.seed(123)
39+
#' t <- tidy_pareto1(.n = 100, .shape = 1.5, .min = 1)[["y"]]
40+
#' util_pareto1_param_estimate(t)$parameter_tbl
41+
#'
42+
#' @return
43+
#' A tibble/list
44+
#'
45+
#' @name util_pareto1_param_estimate
46+
NULL
47+
48+
#' @export
49+
#' @rdname util_pareto1_param_estimate
50+
51+
util_pareto1_param_estimate <- function(.x, .auto_gen_empirical = TRUE) {
52+
53+
# Tidyeval ----
54+
x_term <- as.numeric(.x)
55+
minx <- min(x_term)
56+
maxx <- max(x_term)
57+
n <- length(x_term)
58+
unique_terms <- length(unique(x_term))
59+
60+
# Checks ----
61+
if (!is.vector(x_term, mode = "numeric") || is.factor(x_term)) {
62+
rlang::abort(
63+
message = "'.x' must be a numeric vector.",
64+
use_cli_format = TRUE
65+
)
66+
}
67+
68+
if (n < 2 || any(x_term <= 0) || unique_terms < 2) {
69+
rlang::abort(
70+
message = "'.x' must contain at least two non-missing distinct values. All values of '.x' must be positive.",
71+
use_cli_format = TRUE
72+
)
73+
}
74+
75+
# Get params ----
76+
# LSE
77+
ppc <- 0.375
78+
fhat <- stats::ppoints(n, a = ppc)
79+
lse_coef <- stats::lm(log(1 - fhat) ~ log(sort(x_term)))$coefficients
80+
lse_shape <- -lse_coef[[2]]
81+
lse_min <- exp(lse_coef[[1]] / lse_shape)
82+
83+
# MLE
84+
mle_min <- min(x_term)
85+
mle_shape <- n / sum(log(x_term / mle_min))
86+
87+
# Return Tibble ----
88+
if (.auto_gen_empirical) {
89+
te <- tidy_empirical(.x = x_term)
90+
td_lse <- tidy_pareto1(.n = n, .shape = round(lse_shape, 3), .min = round(lse_min, 3))
91+
td_mle <- tidy_pareto1(.n = n, .shape = round(mle_shape, 3), .min = round(mle_min, 3))
92+
combined_tbl <- tidy_combine_distributions(te, td_lse, td_mle)
93+
}
94+
95+
ret <- dplyr::tibble(
96+
dist_type = rep("Pareto", 2),
97+
samp_size = rep(n, 2),
98+
min = rep(minx, 2),
99+
max = rep(maxx, 2),
100+
method = c("LSE", "MLE"),
101+
est_shape = c(lse_shape, mle_shape),
102+
est_min = c(lse_min, mle_min)
103+
)
104+
105+
# Return ----
106+
attr(ret, "tibble_type") <- "parameter_estimation"
107+
attr(ret, "family") <- "pareto"
108+
attr(ret, "x_term") <- .x
109+
attr(ret, "n") <- n
110+
111+
if (.auto_gen_empirical) {
112+
output <- list(
113+
combined_data_tbl = combined_tbl,
114+
parameter_tbl = ret
115+
)
116+
} else {
117+
output <- list(
118+
parameter_tbl = ret
119+
)
120+
}
121+
122+
return(output)
123+
}

R/stats-pareto1-tbl.R

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#' Distribution Statistics for Pareto1 Distribution
2+
#'
3+
#' @family Pareto
4+
#' @family Distribution Statistics
5+
#'
6+
#' @details This function will take in a tibble and returns the statistics
7+
#' of the given type of `tidy_` distribution. It is required that data be
8+
#' passed from a `tidy_` distribution function.
9+
#'
10+
#' @description Returns distribution statistics in a tibble.
11+
#'
12+
#' @param .data The data being passed from a `tidy_` distribution function.
13+
#'
14+
#' @examples
15+
#' library(dplyr)
16+
#'
17+
#' tidy_pareto1() |>
18+
#' util_pareto1_stats_tbl() |>
19+
#' glimpse()
20+
#'
21+
#' @return
22+
#' A tibble
23+
#'
24+
#' @name util_pareto1_stats_tbl
25+
NULL
26+
#' @export
27+
#' @rdname util_pareto1_stats_tbl
28+
29+
util_pareto1_stats_tbl <- function(.data) {
30+
31+
# Immediate check for tidy_ distribution function
32+
if (!"tibble_type" %in% names(attributes(.data))) {
33+
rlang::abort(
34+
message = "You must pass data from the 'tidy_dist' function.",
35+
use_cli_format = TRUE
36+
)
37+
}
38+
39+
if (attributes(.data)$tibble_type != "tidy_pareto_single_parameter") {
40+
rlang::abort(
41+
message = "You must use 'tidy_pareto1()'",
42+
use_cli_format = TRUE
43+
)
44+
}
45+
46+
# Data
47+
data_tbl <- dplyr::as_tibble(.data)
48+
49+
atb <- attributes(data_tbl)
50+
xm <- atb$.min
51+
alpha <- atb$.shape
52+
53+
stat_mean <- ifelse(alpha <= 1, Inf, (alpha * xm) / (alpha - 1))
54+
stat_mode <- xm
55+
stat_coef_var <- ifelse(
56+
alpha <= 2,
57+
Inf,
58+
sqrt((alpha) / ((alpha - 1)^2 * (alpha - 2)))
59+
)
60+
stat_sd <- ifelse(
61+
alpha <= 1,
62+
Inf,
63+
sqrt((alpha * xm^2) / ((alpha - 1)^2 * (alpha - 2)))
64+
)
65+
stat_skewness <- ifelse(
66+
alpha <= 3,
67+
"undefined",
68+
(2 * (1 + alpha)) / (alpha - 3) * sqrt((alpha - 2) / alpha)
69+
)
70+
stat_kurtosis <- ifelse(
71+
alpha <= 4,
72+
"undefined",
73+
(6 * (alpha^3 + alpha^2 - 6 * alpha - 2)) / (alpha * (alpha - 3) * (alpha - 4))
74+
)
75+
76+
# Data Tibble
77+
ret <- dplyr::tibble(
78+
tidy_function = atb$tibble_type,
79+
function_call = atb$dist_with_params,
80+
distribution = "Pareto1",
81+
distribution_type = "Continuous",
82+
points = atb$.n,
83+
simulations = atb$.num_sims,
84+
mean = stat_mean,
85+
mode_lower = stat_mode,
86+
range = paste0(xm, " to Inf"),
87+
std_dv = stat_sd,
88+
coeff_var = stat_coef_var,
89+
skewness = stat_skewness,
90+
kurtosis = stat_kurtosis,
91+
computed_std_skew = tidy_skewness_vec(data_tbl$y),
92+
computed_std_kurt = tidy_kurtosis_vec(data_tbl$y),
93+
ci_lo = ci_lo(data_tbl$y),
94+
ci_hi = ci_hi(data_tbl$y)
95+
)
96+
97+
# Return
98+
return(ret)
99+
}

R/utis-aic-pareto1.R

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#' Calculate Akaike Information Criterion (AIC) for Pareto Distribution
2+
#'
3+
#' This function calculates the Akaike Information Criterion (AIC) for a Pareto distribution fitted to the provided data.
4+
#'
5+
#' @family Utility
6+
#' @family Pareto
7+
#' @author Steven P. Sanderson II, MPH
8+
#'
9+
#' @description
10+
#' This function estimates the shape and scale parameters of a Pareto distribution
11+
#' from the provided data using maximum likelihood estimation,
12+
#' and then calculates the AIC value based on the fitted distribution.
13+
#'
14+
#' @param .x A numeric vector containing the data to be fitted to a Pareto distribution.
15+
#'
16+
#' @details
17+
#' This function fits a Pareto distribution to the provided data using maximum
18+
#' likelihood estimation. It estimates the shape and scale parameters
19+
#' of the Pareto distribution using maximum likelihood estimation. Then, it
20+
#' calculates the AIC value based on the fitted distribution.
21+
#'
22+
#' Initial parameter estimates: The function uses the method of moments estimates
23+
#' as starting points for the shape and scale parameters of the Pareto distribution.
24+
#'
25+
#' Optimization method: The function uses the optim function for optimization.
26+
#' You might explore different optimization methods within optim for potentially
27+
#' better performance.
28+
#'
29+
#' Goodness-of-fit: While AIC is a useful metric for model comparison, it's
30+
#' recommended to also assess the goodness-of-fit of the chosen model using
31+
#' visualization and other statistical tests.
32+
#'
33+
#' @examples
34+
#' # Example 1: Calculate AIC for a sample dataset
35+
#' set.seed(123)
36+
#' x <- tidy_pareto1()$y
37+
#' util_pareto1_aic(x)
38+
#'
39+
#' @return
40+
#' The AIC value calculated based on the fitted Pareto distribution to the provided data.
41+
#'
42+
#' @name util_pareto1_aic
43+
NULL
44+
45+
#' @export
46+
#' @rdname util_pareto1_aic
47+
util_pareto1_aic <- function(.x) {
48+
# Tidyeval
49+
x <- as.numeric(.x)
50+
n <- length(x)
51+
52+
# Negative log-likelihood function for Pareto distribution
53+
neg_log_lik_pareto <- function(par, data) {
54+
shape <- par[1]
55+
min <- par[2]
56+
-sum(actuar::dpareto1(data, shape = shape, min = min, log = TRUE))
57+
}
58+
59+
# Get initial parameter estimates: method of moments
60+
pe <- TidyDensity::util_pareto1_param_estimate(x)$parameter_tbl |>
61+
subset(method == "MLE")
62+
63+
# Fit Pareto distribution using optim
64+
fit_pareto <- stats::optim(
65+
c(pe$est_shape, pe$est_min),
66+
neg_log_lik_pareto,
67+
data = x
68+
)
69+
70+
# Extract log-likelihood and number of parameters
71+
logLik_pareto <- -fit_pareto$value
72+
k_pareto <- 2 # Number of parameters for Pareto distribution (shape and min)
73+
74+
# Calculate AIC
75+
AIC_pareto <- 2 * k_pareto - 2 * logLik_pareto
76+
77+
# Return AIC
78+
return(AIC_pareto)
79+
}

docs/news/index.html

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/pkgdown.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ pkgdown: 2.0.9
33
pkgdown_sha: ~
44
articles:
55
getting-started: getting-started.html
6-
last_built: 2024-05-13T13:14Z
6+
last_built: 2024-05-15T01:02Z
77
urls:
88
reference: https://www.spsanderson.com/TidyDensity/reference
99
article: https://www.spsanderson.com/TidyDensity/articles

docs/reference/check_duplicate_rows.html

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/convert_to_ts.html

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/index.html

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)