Skip to content

Fixing inconsistent results when using seed in examples (reopened) #3621

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def main():
obj, theta = pest.theta_est()

# Parameter estimation with bootstrap resampling
bootstrap_theta = pest.theta_est_bootstrap(50)
bootstrap_theta = pest.theta_est_bootstrap(50, seed=524)

# Plot results
parmest.graphics.pairwise_plot(bootstrap_theta, title="Bootstrap theta")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ def main():
obj, theta = pest.theta_est()

# Bootstrapping
bootstrap_theta = pest.theta_est_bootstrap(10)
bootstrap_theta = pest.theta_est_bootstrap(10, seed=524)
print(bootstrap_theta)

# Confidence region test
CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0])
CR = pest.confidence_region_test(bootstrap_theta, "MVN", [0.5, 0.75, 1.0],
seed=524)
print(CR)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def main():
file_name = abspath(join(file_dirname, "reactor_data.csv"))
data = pd.read_csv(file_name)

seed= 524 # Set seed for reproducibility
np.random.seed(seed) # Set seed for reproducibility

# Create more data for the example
N = 50
df_std = data.std().to_frame().transpose()
Expand All @@ -50,10 +53,10 @@ def main():
### Parameter estimation with 'leave-N-out'
# Example use case: For each combination of data where one data point is left
# out, estimate theta
lNo_theta = pest.theta_est_leaveNout(1)
lNo_theta = pest.theta_est_leaveNout(1, seed=524)
print(lNo_theta.head())

parmest.graphics.pairwise_plot(lNo_theta, theta)
parmest.graphics.pairwise_plot(lNo_theta, theta, seed=524,)

### Leave one out/boostrap analysis
# Example use case: leave 25 data points out, run 20 bootstrap samples with the
Expand Down Expand Up @@ -81,6 +84,8 @@ def main():
alpha,
["MVN"],
title="Alpha: " + str(alpha) + ", " + str(theta_est_N.loc[0, alpha]),
seed=524+i, # setting the seed for testing repeatability,
# for typical use cases, this should not be set
)

# Extract the percent of points that are within the alpha region
Expand Down
24 changes: 18 additions & 6 deletions pyomo/contrib/parmest/graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def pairwise_plot(
add_obj_contour=True,
add_legend=True,
filename=None,
seed=None,
):
"""
Plot pairwise relationship for theta values, and optionally alpha-level
Expand Down Expand Up @@ -283,6 +284,10 @@ def pairwise_plot(
assert isinstance(add_obj_contour, bool)
assert isinstance(filename, (type(None), str))

if seed is not None:
np.random.seed(seed)


# If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values
if isinstance(theta_values, tuple):
assert len(theta_values) == 3
Expand All @@ -292,7 +297,7 @@ def pairwise_plot(
if isinstance(mean, dict):
mean = pd.Series(mean)
theta_names = mean.index
mvn_dist = stats.multivariate_normal(mean, cov)
mvn_dist = stats.multivariate_normal(mean, cov, seed=seed)
theta_values = pd.DataFrame(
mvn_dist.rvs(n, random_state=1), columns=theta_names
)
Expand Down Expand Up @@ -402,7 +407,7 @@ def pairwise_plot(
)

elif dist == "MVN":
mvn_dist = fit_mvn_dist(thetas)
mvn_dist = fit_mvn_dist(thetas, seed=seed)
Z = mvn_dist.pdf(thetas)
score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
g.map_offdiag(
Expand All @@ -419,7 +424,7 @@ def pairwise_plot(
)

elif dist == "KDE":
kde_dist = fit_kde_dist(thetas)
kde_dist = fit_kde_dist(thetas, seed=seed)
Z = kde_dist.pdf(thetas.transpose())
score = stats.scoreatpercentile(Z, (1 - alpha) * 100)
g.map_offdiag(
Expand Down Expand Up @@ -512,7 +517,7 @@ def fit_rect_dist(theta_values, alpha):
return lower_bound, upper_bound


def fit_mvn_dist(theta_values):
def fit_mvn_dist(theta_values, seed=None):
"""
Fit a multivariate normal distribution to theta values

Expand All @@ -527,13 +532,18 @@ def fit_mvn_dist(theta_values):
"""
assert isinstance(theta_values, pd.DataFrame)

if seed is not None:
np.random.seed(seed)

dist = stats.multivariate_normal(
theta_values.mean(), theta_values.cov(), allow_singular=True
theta_values.mean(), theta_values.cov(), allow_singular=True,
seed=seed
)

return dist


def fit_kde_dist(theta_values):
def fit_kde_dist(theta_values, seed=None):
"""
Fit a Gaussian kernel-density distribution to theta values

Expand All @@ -547,6 +557,8 @@ def fit_kde_dist(theta_values):
scipy.stats.gaussian_kde distribution
"""
assert isinstance(theta_values, pd.DataFrame)
if seed is not None:
np.random.seed(seed)

dist = stats.gaussian_kde(theta_values.transpose().values)

Expand Down
28 changes: 17 additions & 11 deletions pyomo/contrib/parmest/parmest.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,6 @@ def _create_parmest_model(self, experiment_number):

# Add objective function (optional)
if self.obj_function:

# Check for component naming conflicts
reserved_names = [
'Total_Cost_Objective',
Expand Down Expand Up @@ -818,7 +817,7 @@ def _Q_at_theta(self, thetavals, initialize_parmest_model=False):

return retval, thetavals, WorstStatus

def _get_sample_list(self, samplesize, num_samples, replacement=True):
def _get_sample_list(self, samplesize, num_samples, replacement=True, seed=None):
samplelist = list()

scenario_numbers = list(range(len(self.exp_list)))
Expand All @@ -834,7 +833,9 @@ def _get_sample_list(self, samplesize, num_samples, replacement=True):
duplicate = False # check for duplicates between samples
while (unique_samples <= len(self._return_theta_names())) and (
not duplicate
):
):
# if seed is not None:
# np.random.seed(seed) # set seed for reproducibility
sample = np.random.choice(
scenario_numbers, samplesize, replace=replacement
)
Expand Down Expand Up @@ -920,7 +921,7 @@ def theta_est(
calc_cov=calc_cov,
cov_n=cov_n,
)

def theta_est_bootstrap(
self,
bootstrap_samples,
Expand Down Expand Up @@ -994,7 +995,7 @@ def theta_est_bootstrap(
del bootstrap_theta['samples']

return bootstrap_theta

def theta_est_leaveNout(
self, lNo, lNo_samples=None, seed=None, return_samples=False
):
Expand Down Expand Up @@ -1036,7 +1037,7 @@ def theta_est_leaveNout(
if seed is not None:
np.random.seed(seed)

global_list = self._get_sample_list(samplesize, lNo_samples, replacement=False)
global_list = self._get_sample_list(samplesize, lNo_samples, replacement=False, seed=seed)

task_mgr = utils.ParallelTaskManager(len(global_list))
local_list = task_mgr.global_to_local_data(global_list)
Expand Down Expand Up @@ -1116,20 +1117,21 @@ def leaveNout_bootstrap_test(
if seed is not None:
np.random.seed(seed)

global_list = self._get_sample_list(lNo, lNo_samples, replacement=False)
global_list = self._get_sample_list(lNo, lNo_samples, replacement=False,)

results = []
for idx, sample in global_list:

obj, theta = self.theta_est()

bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples)
bootstrap_theta = self.theta_est_bootstrap(bootstrap_samples, seed=seed)

training, test = self.confidence_region_test(
bootstrap_theta,
distribution=distribution,
alphas=alphas,
test_theta_values=theta,
seed=seed,
)

results.append((sample, test, training))
Expand Down Expand Up @@ -1287,7 +1289,8 @@ def likelihood_ratio_test(
return LR

def confidence_region_test(
self, theta_values, distribution, alphas, test_theta_values=None
self, theta_values, distribution, alphas, test_theta_values=None,
seed=None
):
"""
Confidence region test to determine if theta values are within a
Expand Down Expand Up @@ -1341,6 +1344,9 @@ def confidence_region_test(
if test_theta_values is not None:
test_result = test_theta_values.copy()

if seed is not None:
np.random.seed(seed)

for a in alphas:
if distribution == 'Rect':
lb, ub = graphics.fit_rect_dist(theta_values, a)
Expand All @@ -1355,7 +1361,7 @@ def confidence_region_test(
).all(axis=1)

elif distribution == 'MVN':
dist = graphics.fit_mvn_dist(theta_values)
dist = graphics.fit_mvn_dist(theta_values, seed=seed)
Z = dist.pdf(theta_values)
score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
training_results[a] = Z >= score
Expand All @@ -1366,7 +1372,7 @@ def confidence_region_test(
test_result[a] = Z >= score

elif distribution == 'KDE':
dist = graphics.fit_kde_dist(theta_values)
dist = graphics.fit_kde_dist(theta_values, seed=seed)
Z = dist.pdf(theta_values.transpose())
score = scipy.stats.scoreatpercentile(Z, (1 - a) * 100)
training_results[a] = Z >= score
Expand Down
14 changes: 9 additions & 5 deletions pyomo/contrib/parmest/tests/test_parmest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@
ipopt_available = SolverFactory("ipopt").available()
pynumero_ASL_available = AmplInterface.available()
testdir = this_file_dir()
SET_GLOBAL_SEED = True


if SET_GLOBAL_SEED:
# Set the global seed for reproducibility
seed= 524
np.random.seed(seed) # Set seed for reproducibility
@unittest.skipIf(
not parmest.parmest_available,
"Cannot test parmest: required dependencies are missing",
Expand Down Expand Up @@ -93,7 +97,7 @@ def test_bootstrap(self):
objval, thetavals = self.pest.theta_est()

num_bootstraps = 10
theta_est = self.pest.theta_est_bootstrap(num_bootstraps, return_samples=True)
theta_est = self.pest.theta_est_bootstrap(num_bootstraps, return_samples=True, seed=seed)

num_samples = theta_est["samples"].apply(len)
self.assertEqual(len(theta_est.index), 10)
Expand All @@ -109,9 +113,9 @@ def test_bootstrap(self):
self.assertEqual(CR[0.75].sum(), 7)
self.assertEqual(CR[1.0].sum(), 10) # all true

graphics.pairwise_plot(theta_est)
graphics.pairwise_plot(theta_est, thetavals)
graphics.pairwise_plot(theta_est, thetavals, 0.8, ["MVN", "KDE", "Rect"])
graphics.pairwise_plot(theta_est, seed=seed)
graphics.pairwise_plot(theta_est, thetavals, seed=seed)
graphics.pairwise_plot(theta_est, thetavals, 0.8, ["MVN", "KDE", "Rect"], seed=seed)

@unittest.skipIf(
not graphics.imports_available, "parmest.graphics imports are unavailable"
Expand Down
Loading