spsanderson
diff --git a/‎NAMESPACE
Lines changed: 3 additions & 0 deletions b/‎NAMESPACE
Lines changed: 3 additions & 0 deletions
diff --git a/‎NEWS.md
Lines changed: 3 additions & 0 deletions b/‎NEWS.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/est-param-pareto1.R
Lines changed: 123 additions & 0 deletions b/‎R/est-param-pareto1.R
Lines changed: 123 additions & 0 deletions
diff --git a/‎R/stats-pareto1-tbl.R
Lines changed: 99 additions & 0 deletions b/‎R/stats-pareto1-tbl.R
Lines changed: 99 additions & 0 deletions
diff --git a/‎R/utis-aic-pareto1.R
Lines changed: 79 additions & 0 deletions b/‎R/utis-aic-pareto1.R
Lines changed: 79 additions & 0 deletions
diff --git a/‎docs/news/index.html
Lines changed: 1 addition & 0 deletions b/‎docs/news/index.html
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/pkgdown.yml
Lines changed: 1 addition & 1 deletion b/‎docs/pkgdown.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/reference/check_duplicate_rows.html
Lines changed: 1 addition & 0 deletions b/‎docs/reference/check_duplicate_rows.html
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/reference/convert_to_ts.html
Lines changed: 1 addition & 0 deletions b/‎docs/reference/convert_to_ts.html
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/reference/index.html
Lines changed: 15 additions & 0 deletions b/‎docs/reference/index.html
Lines changed: 15 additions & 0 deletions
@@ -129,6 +129,9 @@ export(util_negative_binomial_stats_tbl)
 export(util_normal_aic)
 export(util_normal_param_estimate)
 export(util_normal_stats_tbl)
+export(util_pareto1_aic)
+export(util_pareto1_param_estimate)
+export(util_pareto1_stats_tbl)
 export(util_pareto_aic)
 export(util_pareto_param_estimate)
 export(util_pareto_stats_tbl)
 
@@ -21,6 +21,9 @@ Add function `util_zero_truncated_geometric_stats_tbl()` to create a summary tab
 7. Fix #480 - Add function `util_t_param_estimate()` to estimate the parameters of the
 T distribution. 
 Add function `util_t_aic()` to calculate the AIC for the T distribution.
+8. Fix #479 - Add function `util_pareto1_param_estimate()` to estimate the parameters of the Pareto Type I distribution.
+Add function `util_pareto1_aic()` to calculate the AIC for the Pareto Type I distribution.
+Add function `util_pareto1_stats_tbl()` to create a summary table of the Pareto Type I distribution.
 
 ## Minor Improvements and Fixes
 1. Fix #468 - Update `util_negative_binomial_param_estimate()` to add the use of
 
@@ -0,0 +1,123 @@
+#' Estimate Pareto Parameters
+#'
+#' @family Parameter Estimation
+#' @family Pareto
+#'
+#' @author Steven P. Sanderson II, MPH
+#'
+#' @details This function will attempt to estimate the Pareto shape and scale
+#' parameters given some vector of values.
+#'
+#' @description The function will return a list output by default, and if the parameter
+#' `.auto_gen_empirical` is set to `TRUE` then the empirical data given to the
+#' parameter `.x` will be run through the `tidy_empirical()` function and combined
+#' with the estimated Pareto data.
+#'
+#' Two different methods of shape parameters are supplied:
+#' -  LSE
+#' -  MLE
+#'
+#' @param .x The vector of data to be passed to the function.
+#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default
+#' set to TRUE. This will automatically create the `tidy_empirical()` output
+#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user
+#' can then plot out the data using `$combined_data_tbl` from the function output.
+#'
+#' @examples
+#' library(dplyr)
+#' library(ggplot2)
+#'
+#' x <- mtcars[["mpg"]]
+#' output <- util_pareto1_param_estimate(x)
+#'
+#' output$parameter_tbl
+#'
+#' output$combined_data_tbl |>
+#'   tidy_combined_autoplot()
+#'
+#' set.seed(123)
+#' t <- tidy_pareto1(.n = 100, .shape = 1.5, .min = 1)[["y"]]
+#' util_pareto1_param_estimate(t)$parameter_tbl
+#'
+#' @return
+#' A tibble/list
+#'
+#' @name util_pareto1_param_estimate
+NULL
+
+#' @export
+#' @rdname util_pareto1_param_estimate
+
+util_pareto1_param_estimate <- function(.x, .auto_gen_empirical = TRUE) {
+
+  # Tidyeval ----
+  x_term <- as.numeric(.x)
+  minx <- min(x_term)
+  maxx <- max(x_term)
+  n <- length(x_term)
+  unique_terms <- length(unique(x_term))
+
+  # Checks ----
+  if (!is.vector(x_term, mode = "numeric") || is.factor(x_term)) {
+    rlang::abort(
+      message = "'.x' must be a numeric vector.",
+      use_cli_format = TRUE
+    )
+  }
+
+  if (n < 2 || any(x_term <= 0) || unique_terms < 2) {
+    rlang::abort(
+      message = "'.x' must contain at least two non-missing distinct values. All values of '.x' must be positive.",
+      use_cli_format = TRUE
+    )
+  }
+
+  # Get params ----
+  # LSE
+  ppc <- 0.375
+  fhat <- stats::ppoints(n, a = ppc)
+  lse_coef <- stats::lm(log(1 - fhat) ~ log(sort(x_term)))$coefficients
+  lse_shape <- -lse_coef[[2]]
+  lse_min <- exp(lse_coef[[1]] / lse_shape)
+
+  # MLE
+  mle_min <- min(x_term)
+  mle_shape <- n / sum(log(x_term / mle_min))
+
+  # Return Tibble ----
+  if (.auto_gen_empirical) {
+    te <- tidy_empirical(.x = x_term)
+    td_lse <- tidy_pareto1(.n = n, .shape = round(lse_shape, 3), .min = round(lse_min, 3))
+    td_mle <- tidy_pareto1(.n = n, .shape = round(mle_shape, 3), .min = round(mle_min, 3))
+    combined_tbl <- tidy_combine_distributions(te, td_lse, td_mle)
+  }
+
+  ret <- dplyr::tibble(
+    dist_type = rep("Pareto", 2),
+    samp_size = rep(n, 2),
+    min = rep(minx, 2),
+    max = rep(maxx, 2),
+    method = c("LSE", "MLE"),
+    est_shape = c(lse_shape, mle_shape),
+    est_min = c(lse_min, mle_min)
+  )
+
+  # Return ----
+  attr(ret, "tibble_type") <- "parameter_estimation"
+  attr(ret, "family") <- "pareto"
+  attr(ret, "x_term") <- .x
+  attr(ret, "n") <- n
+
+  if (.auto_gen_empirical) {
+    output <- list(
+      combined_data_tbl = combined_tbl,
+      parameter_tbl     = ret
+    )
+  } else {
+    output <- list(
+      parameter_tbl = ret
+    )
+  }
+
+  return(output)
+}
@@ -0,0 +1,99 @@
+#' Distribution Statistics for Pareto1 Distribution
+#'
+#' @family Pareto
+#' @family Distribution Statistics
+#'
+#' @details This function will take in a tibble and returns the statistics
+#' of the given type of `tidy_` distribution. It is required that data be
+#' passed from a `tidy_` distribution function.
+#'
+#' @description Returns distribution statistics in a tibble.
+#'
+#' @param .data The data being passed from a `tidy_` distribution function.
+#'
+#' @examples
+#' library(dplyr)
+#'
+#' tidy_pareto1() |>
+#'   util_pareto1_stats_tbl() |>
+#'   glimpse()
+#'
+#' @return
+#' A tibble
+#'
+#' @name util_pareto1_stats_tbl
+NULL
+#' @export
+#' @rdname util_pareto1_stats_tbl
+
+util_pareto1_stats_tbl <- function(.data) {
+
+  # Immediate check for tidy_ distribution function
+  if (!"tibble_type" %in% names(attributes(.data))) {
+    rlang::abort(
+      message = "You must pass data from the 'tidy_dist' function.",
+      use_cli_format = TRUE
+    )
+  }
+
+  if (attributes(.data)$tibble_type != "tidy_pareto_single_parameter") {
+    rlang::abort(
+      message = "You must use 'tidy_pareto1()'",
+      use_cli_format = TRUE
+    )
+  }
+
+  # Data
+  data_tbl <- dplyr::as_tibble(.data)
+
+  atb <- attributes(data_tbl)
+  xm <- atb$.min
+  alpha <- atb$.shape
+
+  stat_mean <- ifelse(alpha <= 1, Inf, (alpha * xm) / (alpha - 1))
+  stat_mode <- xm
+  stat_coef_var <- ifelse(
+    alpha <= 2,
+    Inf,
+    sqrt((alpha) / ((alpha - 1)^2 * (alpha - 2)))
+  )
+  stat_sd <- ifelse(
+    alpha <= 1,
+    Inf,
+    sqrt((alpha * xm^2) / ((alpha - 1)^2 * (alpha - 2)))
+  )
+  stat_skewness <- ifelse(
+    alpha <= 3,
+    "undefined",
+    (2 * (1 + alpha)) / (alpha - 3) * sqrt((alpha - 2) / alpha)
+  )
+  stat_kurtosis <- ifelse(
+    alpha <= 4,
+    "undefined",
+    (6 * (alpha^3 + alpha^2 - 6 * alpha - 2)) / (alpha * (alpha - 3) * (alpha - 4))
+  )
+
+  # Data Tibble
+  ret <- dplyr::tibble(
+    tidy_function = atb$tibble_type,
+    function_call = atb$dist_with_params,
+    distribution = "Pareto1",
+    distribution_type = "Continuous",
+    points = atb$.n,
+    simulations = atb$.num_sims,
+    mean = stat_mean,
+    mode_lower = stat_mode,
+    range = paste0(xm, " to Inf"),
+    std_dv = stat_sd,
+    coeff_var = stat_coef_var,
+    skewness = stat_skewness,
+    kurtosis = stat_kurtosis,
+    computed_std_skew = tidy_skewness_vec(data_tbl$y),
+    computed_std_kurt = tidy_kurtosis_vec(data_tbl$y),
+    ci_lo = ci_lo(data_tbl$y),
+    ci_hi = ci_hi(data_tbl$y)
+  )
+
+  # Return
+  return(ret)
+}
@@ -0,0 +1,79 @@
+#' Calculate Akaike Information Criterion (AIC) for Pareto Distribution
+#'
+#' This function calculates the Akaike Information Criterion (AIC) for a Pareto distribution fitted to the provided data.
+#'
+#' @family Utility
+#' @family Pareto
+#' @author Steven P. Sanderson II, MPH
+#'
+#' @description
+#' This function estimates the shape and scale parameters of a Pareto distribution
+#' from the provided data using maximum likelihood estimation,
+#' and then calculates the AIC value based on the fitted distribution.
+#'
+#' @param .x A numeric vector containing the data to be fitted to a Pareto distribution.
+#'
+#' @details
+#' This function fits a Pareto distribution to the provided data using maximum
+#' likelihood estimation. It estimates the shape and scale parameters
+#' of the Pareto distribution using maximum likelihood estimation. Then, it
+#' calculates the AIC value based on the fitted distribution.
+#'
+#' Initial parameter estimates: The function uses the method of moments estimates
+#' as starting points for the shape and scale parameters of the Pareto distribution.
+#'
+#' Optimization method: The function uses the optim function for optimization.
+#' You might explore different optimization methods within optim for potentially
+#' better performance.
+#'
+#' Goodness-of-fit: While AIC is a useful metric for model comparison, it's
+#' recommended to also assess the goodness-of-fit of the chosen model using
+#' visualization and other statistical tests.
+#'
+#' @examples
+#' # Example 1: Calculate AIC for a sample dataset
+#' set.seed(123)
+#' x <- tidy_pareto1()$y
+#' util_pareto1_aic(x)
+#'
+#' @return
+#' The AIC value calculated based on the fitted Pareto distribution to the provided data.
+#'
+#' @name util_pareto1_aic
+NULL
+
+#' @export
+#' @rdname util_pareto1_aic
+util_pareto1_aic <- function(.x) {
+  # Tidyeval
+  x <- as.numeric(.x)
+  n <- length(x)
+
+  # Negative log-likelihood function for Pareto distribution
+  neg_log_lik_pareto <- function(par, data) {
+    shape <- par[1]
+    min <- par[2]
+    -sum(actuar::dpareto1(data, shape = shape, min = min, log = TRUE))
+  }
+
+  # Get initial parameter estimates: method of moments
+  pe <- TidyDensity::util_pareto1_param_estimate(x)$parameter_tbl |>
+    subset(method == "MLE")
+
+  # Fit Pareto distribution using optim
+  fit_pareto <- stats::optim(
+    c(pe$est_shape, pe$est_min),
+    neg_log_lik_pareto,
+    data = x
+  )
+
+  # Extract log-likelihood and number of parameters
+  logLik_pareto <- -fit_pareto$value
+  k_pareto <- 2 # Number of parameters for Pareto distribution (shape and min)
+
+  # Calculate AIC
+  AIC_pareto <- 2 * k_pareto - 2 * logLik_pareto
+
+  # Return AIC
+  return(AIC_pareto)
+}
@@ -3,7 +3,7 @@ pkgdown: 2.0.9
 pkgdown_sha: ~
 articles:
   getting-started: getting-started.html
-last_built: 2024-05-13T13:14Z
+last_built: 2024-05-15T01:02Z
 urls:
   reference: https://www.spsanderson.com/TidyDensity/reference
   article: https://www.spsanderson.com/TidyDensity/articles