From e951dc6ecc772df8df6b12d182ea74c62363dc89 Mon Sep 17 00:00:00 2001 From: Christopher Teubert Date: Mon, 7 Aug 2023 16:11:08 -0700 Subject: [PATCH 1/2] Add param estimation example --- examples/param_est.ipynb | 507 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 507 insertions(+) create mode 100644 examples/param_est.ipynb diff --git a/examples/param_est.ipynb b/examples/param_est.ipynb new file mode 100644 index 0000000..9d56072 --- /dev/null +++ b/examples/param_est.ipynb @@ -0,0 +1,507 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Welcome to the Parameter Estimation Feature Example" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The goal of this notebook is to instruct ProgPy users on how to use the estimate_params feature for PrognosticModels.\n", + "\n", + "First some background. Parameter estimation is used to tune the parameters of a general model so its behavior matches the behavior of a specific system. For example, parameters of the battery model can be tuned to configure the model to describe the behavior of a specific battery.\n", + "\n", + "Generally, parameter estimation is done by tuning the parameters of the model so that simulation best matches the behavior observed in some available data. In ProgPy, this is done using the progpy.PrognosticsModel.estimate_params() method. This method takes input and output data from one or more runs, and uses scipy.optimize.minimize function to estimate the parameters of the model. For more information, refer to our Documentation [here](https://nasa.github.io/progpy/prog_models_guide.html#parameter-estimation)\n", + "\n", + "A few definitions:\n", + "* __`keys`__ `(list[str])`: Parameter keys to optimize\n", + "* __`times`__ `(list[float])`: Array of times for each run\n", + "* __`inputs`__ `(list[InputContainer])`: Array of input containers where inputs[x] corresponds to times[x]\n", + "* __`outputs`__ `(list[OutputContainer])`: Array of output containers where outputs[x] corresponds to times[x]\n", + "* __`method`__ `(str, optional)`: Optimization method- see scipy.optimize.minimize for options\n", + "* __`tol`__ `(int, optional)`: Tolerance for termination. Depending on the provided minimization method, specifying tolerance sets solver-specific options to tol\n", + "* __`error_method`__ `(str, optional)`: Method to use in calculating error. See calc_error for options\n", + "* __`bounds`__ `(tuple or dict, optional)`: Bounds for optimization in format ((lower1, upper1), (lower2, upper2), ...) or {key1: (lower1, upper1), key2: (lower2, upper2), ...}\n", + "* __`options`__ `(dict, optional)`: Options passed to optimizer. See scipy.optimize.minimize for options" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 1) Simple Example" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we will show an example demonstrating model parameter estimation. In this example, we estimate the model parameters from data. In general, the data will usually be collected from the physical system or from a different model (model surrogacy). In this case, we will use the example data, below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "times = [0, 1, 2, 3, 4, 5, 6, 7]\n", + "inputs = [{}]*8\n", + "outputs = [\n", + " {'x': 1.83},\n", + " {'x': 36.5091999066245},\n", + " {'x': 60.05364349596605},\n", + " {'x': 73.23733081022635},\n", + " {'x': 76.47528104941956},\n", + " {'x': 69.9146810161441},\n", + " {'x': 53.74272753819968},\n", + " {'x': 28.39355725512131},\n", + "]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we will import a model from the ProgPy Package. For this example we're using the simple ThrownObject model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from progpy.models import ThrownObject" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can build a model with a best guess for the parameters.\n", + "\n", + "We will use a guess that our thrower is 20 meters tall, has a throwing speed of 3.1 m/s, and that acceleration due to gravity is 15 m/s^2. However, given our times, inputs, and outputs, we can clearly tell this is not true! Let's see if parameter estimation can fix this!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m = ThrownObject(thrower_height=20, throwing_speed=3.1, g=15)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this example, we will define specific parameters that we want to estimate.\n", + "\n", + "We can pass the desired parameters to our __keys__ keyword argument." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "keys = ['thrower_height', 'throwing_speed', 'g']" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To really see what `estimate_params()` is doing, we will print out the state before executing the estimation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Printing state before\n", + "print('Model configuration before')\n", + "for key in keys:\n", + " print(\"-\", key, m.parameters[key])\n", + "print(' Error: ', m.calc_error(times, inputs, outputs, dt=0.1))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that the error is quite high. This indicates that the parameters are not accurate.\n", + "\n", + "Now, we will run `estimate_params()` with the data to correct these parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.estimate_params(times = times, inputs = inputs, outputs = outputs, keys = keys, dt=0.1)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's see what the new parameters are after estimation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('\\nOptimized configuration')\n", + "for key in keys:\n", + " print(\"-\", key, m.parameters[key])\n", + "print(' Error: ', m.calc_error(times, inputs, outputs, dt=0.1))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sure enough- parameter estimation determined that the thrower's height wasn't 20m, instead was closer to 1.8m, a much more reasonable height!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 2) Using Tol" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An additional feature of the `estimate_params()` function is the tolerance feature, or `tol`. The exact function that the `tol` argument\n", + "uses is specific to the method used. For example, the `tol` argument for the `Nelder-Mead` method is the change in the lowest error and its corresponding parameter values between iterations. The difference between iterations for both of these must be below `tol` for parameter estimation to converge.\n", + "\n", + "For example, if in the nth iteration of the optimizer above the best error was __2e-5__ and the cooresponding values were thrower_height=1.8, throwing_speed=40, and g=-9.8 and at the n+1th iteration the best error was __1e-5__ and the cooresponding values were thrower_height=1.85, throwing_speed=39.5, and g=-9.81, then the difference in error would be __1e-5__ and the difference in parameter values would be \n", + "\n", + "$$\\sqrt{(1.85 - 1.8)^2 + (40 - 39.5)^2 + (9 - 9.81)^2} = 0.5025932749$$\n", + "\n", + "In this case, error would meet a tol of __1e-4__, but the parameters would not, so optimization would continue. For more information, see the [scipy.optimize.minimize](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) documentation.\n", + "\n", + "In our previous example, note that our total error was roughly __6e-10__ after the `estimate_params()` call, using the default `tol` of __1e-4__. Now, let us see what happens to the parameters when we pass a tolerance of __1e-6__." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m = ThrownObject(thrower_height=20, throwing_speed=3.1, g=15)\n", + "m.estimate_params(times = times, inputs = inputs, outputs = outputs, keys = keys, dt=0.1, tol=1e-6)\n", + "print('\\nOptimized configuration')\n", + "for key in keys:\n", + " print(\"-\", key, m.parameters[key])\n", + "print(' Error: ', m.calc_error(times, inputs, outputs, dt=0.1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As expected, reducing the tolerance leads to a decrease in the overall error, resulting in more accurate parameters." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note, if we were to set a high tolerance, such as 10, our error would consequently be very high!\n", + "\n", + "Also note that the tol value is for scipy minimize. It is different but strongly correlated to the result of calc_error. For more information on how the `tol` feature works, please consider scipy's `minimize()` documentation located [here](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also adjust the metric that is used to estimate parameters by setting the error_method to a different `calc_error()` method (see example below).\n", + "Default is Mean Squared Error (MSE).\n", + "See calc_error method for list of options." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.parameters['thrower_height'] = 3.1\n", + "m.parameters['throwing_speed'] = 29\n", + "\n", + "# Using MAE, or Mean Absolute Error instead of the default Mean Squared Error.\n", + "m.estimate_params(times = times, inputs = inputs, outputs = outputs, keys = keys, dt=0.1, tol=1e-9, error_method='MAX_E')\n", + "print('\\nOptimized configuration')\n", + "for key in keys:\n", + " print(\"-\", key, m.parameters[key])\n", + "print(' Error: ', m.calc_error(times, inputs, outputs, dt=0.1, method='MAX_E'))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that MAX_E is frequently better at capturing tail behavior in many prognostic models." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 3) Handling Noise with Multiple Runs" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the previous two examples, we demonstrated how to use `estimate_params()` using a clearly defined ThrownObject model. However, unlike most models, we assumed that there would be no noise!\n", + "\n", + "In this example, we'll show how to use `estimate_params()` with noisy data.\n", + "\n", + "First let's repeat the previous example, this time generating data from a noisy model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m = ThrownObject(process_noise = 1)\n", + "results = m.simulate_to_threshold(save_freq=0.5, dt=('auto', 0.1))\n", + "\n", + "# Resetting parameters to their incorrectly set values.\n", + "m.parameters['thrower_height'] = 20\n", + "m.parameters['throwing_speed'] = 3.1\n", + "m.parameters['g'] = 15\n", + "keys = ['thrower_height', 'throwing_speed', 'g']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.estimate_params(times = results.times, inputs = results.inputs, outputs = results.outputs, keys = keys)\n", + "print('\\nOptimized configuration')\n", + "for key in keys:\n", + " print(\"-\", key, m.parameters[key])\n", + "print(' Error: ', m.calc_error(results.times, results.inputs, results.outputs))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case, the error from calc_error is low, but to have an accurate estimation of the error, we should actually be manually measuring the Absolute Mean Error rather than using calc_error().\n", + "\n", + "The reason being is simple! calc_error() is calculating the error between the simulated and observed data. However, the observed and simulated data in this case are being generated from a model that has noise! In other words, we are comparing the difference of noise to noise, which can lead to inconsistent results!\n", + "\n", + "Let's create a helper function to calculate the Absolute Mean Error between our original and estimated parameters!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Creating a new model with the original parameters to compare to the model with noise.\n", + "true_Values = ThrownObject()\n", + "\n", + "# Function to determine the Absolute Mean Error (AME) of the model parameters.\n", + "def AME(m, keys):\n", + " error = 0\n", + " for key in keys:\n", + " error += abs(m.parameters[key] - true_Values.parameters[key])\n", + " return error" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now using our new AME function we see that the error isn't as great as we thought." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "AME(m, keys)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the error changes every time due to the randomness of noise:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for count in range(10):\n", + " m = ThrownObject(process_noise = 1)\n", + " results = m.simulate_to_threshold(save_freq=0.5, dt=('auto', 0.1))\n", + " \n", + " # Resetting parameters to their originally incorrectly set values.\n", + " m.parameters['thrower_height'] = 20\n", + " m.parameters['throwing_speed'] = 3.1\n", + " m.parameters['g'] = 15\n", + "\n", + " m.estimate_params(times = results.times, inputs = results.inputs, outputs = results.outputs, keys = keys, dt=0.1)\n", + " error = AME(m, ['thrower_height', 'throwing_speed', 'g'])\n", + " print(f'Estimate Call Number {count} - AME Error {error}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This issue with noise can be overcome with more data. Let's repeat the example above, this time using data from multiple runs. First, let's generate the data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "times, inputs, outputs = [], [], []\n", + "m = ThrownObject(process_noise=1)\n", + "for count in range(20):\n", + " results = m.simulate_to_threshold(save_freq=0.5, dt=('auto', 0.1))\n", + " times.append(results.times)\n", + " inputs.append(results.inputs)\n", + " outputs.append(results.outputs)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next let's reset the parameters to our incorrect values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.parameters['thrower_height'] = 20\n", + "m.parameters['throwing_speed'] = 3.1\n", + "m.parameters['g'] = 15" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, let's call estimate_params with all the collected data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m.estimate_params(times=times, inputs=inputs, outputs=outputs, keys=keys, dt=0.1)\n", + "print('\\nOptimized configuration')\n", + "for key in keys:\n", + " print(\"-\", key, m.parameters[key])\n", + "error = AME(m, ['thrower_height', 'throwing_speed', 'g'])\n", + "print('AME Error: ', error)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that by using data from multiple runs, we are able to produce a lower AME Error than before! This is because we are able to simulate the noise multiple times, which in turn, allows our `estimate_params()` to produce a more accurate result since it is given more data to work with!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 21f06c7f3b6e0a9cc66ac26c960dc1c4bc7c0c95 Mon Sep 17 00:00:00 2001 From: Christopher Teubert Date: Mon, 7 Aug 2023 16:11:52 -0700 Subject: [PATCH 2/2] Remove previous example --- examples/param_est.py | 58 ------------------------------------------- 1 file changed, 58 deletions(-) delete mode 100644 examples/param_est.py diff --git a/examples/param_est.py b/examples/param_est.py deleted file mode 100644 index de46c1a..0000000 --- a/examples/param_est.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright © 2021 United States Government as represented by the Administrator of the -# National Aeronautics and Space Administration. All Rights Reserved. - -""" -Example demonstrating the model parameter estimation feature. -""" - -from progpy.models.thrown_object import ThrownObject - -def run_example(): - # Step 1: Build the model with your best guess in parameters - # Here we're guessing that the thrower is 20 meters tall. Obviously not true! - # Let's see if parameter estimation can fix this - m = ThrownObject(thrower_height=20) - - # Step 2: Collect data from the use of the system. Let's pretend we threw the ball once, and collected position measurements - times = [0, 1, 2, 3, 4, 5, 6, 7, 8] - inputs = [{}]*9 - outputs = [ - {'x': 1.83}, - {'x': 36.95}, - {'x': 62.36}, - {'x': 77.81}, - {'x': 83.45}, - {'x': 79.28}, - {'x': 65.3}, - {'x': 41.51}, - {'x': 7.91}, - ] - - # Step 3: Identify the parameters to be estimated - keys = ['thrower_height', 'throwing_speed'] - - # Printing state before - print('Model configuration before') - for key in keys: - print("-", key, m.parameters[key]) - print(' Error: ', m.calc_error(times, inputs, outputs, dt=1e-4)) - - # Step 4: Run parameter estimation with data - m.estimate_params([(times, inputs, outputs)], keys, dt=0.01) - - # Print result - print('\nOptimized configuration') - for key in keys: - print("-", key, m.parameters[key]) - print(' Error: ', m.calc_error(times, inputs, outputs, dt=1e-4)) - - # Sure enough- parameter estimation determined that the thrower's height wasn't 20 m, instead was closer to 1.9m, a much more reasonable height! - - # Note: You can also adjust the metric that is used to estimate parameters. - # This is done by setting the "error_method" argument. - # e.g., m.estimate_params([(times, inputs, outputs)], keys, dt=0.01, error_method='MAX_E') - # Default is Mean Squared Error (MSE) - # See calc_error method for list of options. - -if __name__=='__main__': - run_example()