Skip to content

Commit

Permalink
add averaged morphing and possibility to plot several morphed points …
Browse files Browse the repository at this point in the history
…at once in single bin plots
  • Loading branch information
nprouvost committed Aug 23, 2024
1 parent cae9292 commit a18d7ee
Show file tree
Hide file tree
Showing 2 changed files with 260 additions and 43 deletions.
135 changes: 131 additions & 4 deletions hbt/config/hist_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,24 +256,113 @@ def hh_morphing(task, hists, guidance_points=["0", "1", "2p45"], target_point="k
# be careful with the order, the categories can be shuffled around in the different histograms
# so sort the values by the categories
model_values = np.array([
model_hists[i].view().value[np.argsort(model_hists[i].axes[0])] for i in range(3)
model_hists[i].view().value[np.argsort(model_hists[i].axes[0])] for i in range(len(guidance_points))
])

model_variances = np.array([
model_hists[i].view().variance[np.argsort(model_hists[i].axes[0])] for i in range(3)
model_hists[i].view().variance[np.argsort(model_hists[i].axes[0])] for i in range(len(guidance_points))
])

# morphed values
original_hist_shape = hists[new_proc].view().value.shape
morphed_values = np.matmul(
np.matmul(new_coefficients, inv_guidance_matrix),
model_values.reshape(3, -1),
model_values.reshape(len(guidance_points), -1),
).reshape(original_hist_shape)

# morphed variances, using quadratic error propagation (and therefore assuming uncorrelated uncertainties)
morphed_variances = np.matmul(
np.matmul(new_coefficients**2, inv_guidance_matrix**2),
model_variances.reshape(3, -1),
model_variances.reshape(len(guidance_points), -1),
).reshape(original_hist_shape)

# reshape the values to the correct categorization
morphed_values_correct_categorization = morphed_values[np.argsort(np.argsort(model_hists[0].axes[0]))]
morphed_variances_correct_categorization = morphed_variances[np.argsort(np.argsort(model_hists[0].axes[0]))]

# insert values into the new histogram
hists[new_proc].view().value = morphed_values_correct_categorization
hists[new_proc].view().variance = morphed_variances_correct_categorization

return hists

def hh_averaged_morphing(task, hists, guidance_points=["0", "1", "2p45", "5"], target_point="kl5_kt1"):

guidance_points_float = [float(i.replace("p", ".")) for i in guidance_points]

model_procs = [
config.get_process(f"hh_ggf_hbb_htt_kl{i}_kt1", default=None)
for i in guidance_points
]
if not all(model_procs):
return hists

# verify that the axis order is exactly "category -> shift -> variable"
# which is needed to insert values at the end
CAT_AXIS, SHIFT_AXIS, VAR_AXIS = range(3)
for h in hists.values():
# validate axes
assert len(h.axes) == 3
assert h.axes[CAT_AXIS].name == "category"
assert h.axes[SHIFT_AXIS].name == "shift"

# get model histograms, stop early when not four present
model_hists = [h for p, h in hists.items() if p in model_procs]
if len(model_hists) != 4:
raise Exception("not all four model histograms present, averaged morphing cannot occur")
return hists

# prepare morphing here
# build guidance matrix from guidance points
# knowing that all model hists have kt=1
# (formula for any kt: (kt**2*kl**2,kt**4,kt**3*kl))
guidance_matrix = np.array([
[guidance_point**2, 1, guidance_point] for guidance_point in guidance_points_float
])

# pseudo inverse guidance matrix
pinv_guidance_matrix = np.linalg.pinv(guidance_matrix)

# new coefficients for the newly created point
kl = read_coupling_values(target_point)["kl"]
kt = read_coupling_values(target_point)["kt"]
new_coefficients = np.array([kt**2 * kl**2, kt**4, kt**3 * kl])

# create the new process
new_proc = od.Process(
# "_average" used to distinguish from the other morphing in plot functions
f"hh_ggf_hbb_htt_{target_point}_average_morphed",
id=21130,
label=r"average morphed $HH_{ggf} \rightarrow bb\tau\tau$ "
"\n"
r"($\kappa_{\lambda}=$" + str(kl) + r", $\kappa_{t}=$" + str(kt) + ")",
)

# create the new hist
hists[new_proc] = model_hists[0].copy().reset()

# morphing
# be careful with the order, the categories can be shuffled around in the different histograms
# so sort the values by the categories
model_values = np.array([
model_hists[i].view().value[np.argsort(model_hists[i].axes[0])] for i in range(len(guidance_points))
])

model_variances = np.array([
model_hists[i].view().variance[np.argsort(model_hists[i].axes[0])] for i in range(len(guidance_points))
])

# morphed values
original_hist_shape = hists[new_proc].view().value.shape
morphed_values = np.matmul(
np.matmul(new_coefficients, pinv_guidance_matrix),
model_values.reshape(len(guidance_points), -1),
).reshape(original_hist_shape)

# morphed variances, using quadratic error propagation (and therefore assuming uncorrelated uncertainties)
morphed_variances = np.matmul(
np.matmul(new_coefficients**2, pinv_guidance_matrix**2),
model_variances.reshape(len(guidance_points), -1),
).reshape(original_hist_shape)

# reshape the values to the correct categorization
Expand All @@ -292,4 +381,42 @@ def hh_morphing(task, hists, guidance_points=["0", "1", "2p45"], target_point="k
"hh_morphing_kl2p45_kt1": partial(hh_morphing, guidance_points=["0", "1", "5"], target_point="kl2p45_kt1"),
"hh_morphing_kl1_kt1": partial(hh_morphing, guidance_points=["0", "2p45", "5"], target_point="kl1_kt1"),
"hh_morphing_kl0_kt1": partial(hh_morphing, guidance_points=["1", "2p45", "5"], target_point="kl0_kt1"),
"hh_morphing_kl2_kt1": partial(hh_morphing, guidance_points=["0", "1", "2p45"], target_point="kl2_kt1"),
"hh_morphing_kl3_kt1": partial(hh_morphing, guidance_points=["0", "1", "2p45"], target_point="kl3_kt1"),
"hh_morphing_kl4_kt1": partial(hh_morphing, guidance_points=["0", "1", "2p45"], target_point="kl4_kt1"),
"hh_averaged_morphing_kl5_kt1": partial(
hh_averaged_morphing,
guidance_points=["0", "1", "2p45", "5"],
target_point="kl5_kt1",
),
"hh_averaged_morphing_kl0_kt1": partial(
hh_averaged_morphing,
guidance_points=["0", "1", "2p45", "5"],
target_point="kl0_kt1",
),
"hh_averaged_morphing_kl1_kt1": partial(
hh_averaged_morphing,
guidance_points=["0", "1", "2p45", "5"],
target_point="kl1_kt1",
),
"hh_averaged_morphing_kl2_kt1": partial(
hh_averaged_morphing,
guidance_points=["0", "1", "2p45", "5"],
target_point="kl2_kt1",
),
"hh_averaged_morphing_kl2p45_kt1": partial(
hh_averaged_morphing,
guidance_points=["0", "1", "2p45", "5"],
target_point="kl2p45_kt1",
),
"hh_averaged_morphing_kl3_kt1": partial(
hh_averaged_morphing,
guidance_points=["0", "1", "2p45", "5"],
target_point="kl3_kt1",
),
"hh_averaged_morphing_kl4_kt1": partial(
hh_averaged_morphing,
guidance_points=["0", "1", "2p45", "5"],
target_point="kl4_kt1",
),
}
168 changes: 129 additions & 39 deletions hbt/plotting/morphing_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def plot_morphing_comparison(

non_morphed_hist = OrderedDict()
for key, hist in hists.items():
if list(morphed_hists.keys())[0].name.replace("_morphed", "") == key.name:
if list(morphed_hists.keys())[0].name.replace("_morphed", "").replace("_average", "") == key.name:
non_morphed_hist[key] = hist

if len(morphed_hists.keys()) != len(non_morphed_hist.keys()):
Expand Down Expand Up @@ -289,38 +289,51 @@ def plot_bin_morphing(
) -> plt.Figure:

variable_inst = variable_insts[0]
averaged = False

# separate morphed, true and guidance points histograms
morphed_hists = OrderedDict()
for key, hist in hists.items():
if "morphed" in key.name:
morphed_hists[key] = hist
if "average" in key.name:
averaged = True

if not morphed_hists:
raise ValueError("No morphed histograms found in input hists.")

if len(morphed_hists.keys()) > 1:
raise ValueError("More than one morphed histogram found in input hists. Not implemented")

# non morphed
non_morphed_hist = OrderedDict()
for key, hist in hists.items():
if list(morphed_hists.keys())[0].name.replace("_morphed", "") == key.name:
non_morphed_hist[key] = hist

if len(morphed_hists.keys()) != len(non_morphed_hist.keys()):
raise ValueError("Number of morphed and non-morphed histograms do not match.")

# guidance point histograms
guidance_hists = OrderedDict()
for key, hist in hists.items():
not_in_non_morphed_hists = (key.name != list(non_morphed_hist.keys())[0].name)
not_in_morphed_hists = (key.name != list(morphed_hists.keys())[0].name)
if ("hh_ggf_hbb_htt_kl" in key.name) and not_in_non_morphed_hists and not_in_morphed_hists:
guidance_hists[key] = hist

if len(guidance_hists.keys()) != 3:
raise ValueError("Number of guidance point histograms is not 3.")
# if len(morphed_hists.keys()) > 1:
# raise ValueError("More than one morphed histogram found in input hists. Not implemented")

if not averaged:
# non morphed
non_morphed_hist = OrderedDict()
for key, hist in hists.items():
if list(morphed_hists.keys())[0].name.replace("_morphed", "").replace("_average", "") == key.name:
non_morphed_hist[key] = hist

# if len(morphed_hists.keys()) != len(non_morphed_hist.keys()):
# raise ValueError("Number of morphed and non-morphed histograms do not match.")

# guidance point histograms
guidance_hists = OrderedDict()
for key, hist in hists.items():
not_in_non_morphed_hists = (key.name != list(non_morphed_hist.keys())[0].name)
not_in_morphed_hists = (key.name not in [list(morphed_hists.keys())[i].name for i in range(len(morphed_hists.keys()))]) # noqa
if ("hh_ggf_hbb_htt_kl" in key.name) and not_in_non_morphed_hists and not_in_morphed_hists:
guidance_hists[key] = hist

if len(guidance_hists.keys()) != 3:
raise ValueError("Number of guidance point histograms is not 3.")
else:
# guidance point histograms
guidance_hists = OrderedDict()
for key, hist in hists.items():
if "hh_ggf_hbb_htt_kl" in key.name and "morphed" not in key.name:
guidance_hists[key] = hist

if len(guidance_hists.keys()) != 4:
raise ValueError("Number of guidance point histograms is not 4.")

# get guidance points
guidance_points_str = [key.name.replace("hh_ggf_hbb_htt_kl", "") for key in guidance_hists.keys()]
Expand All @@ -331,22 +344,30 @@ def plot_bin_morphing(
chosen_bin = function_bin_search(list(morphed_hists.values())[0].values())

# get value of chosen bin for each histogram
morphed_chosen_bin = np.array([
list(morphed_hists.values())[0].values()[..., chosen_bin][0],
list(morphed_hists.values())[0].variances()[..., chosen_bin][0],
])

non_morphed_chosen_bin = np.array([
list(non_morphed_hist.values())[0].values()[..., chosen_bin][0],
list(non_morphed_hist.values())[0].variances()[..., chosen_bin][0],
])
morphed_chosen_bins = []
for key, hist in morphed_hists.items():
morphed_chosen_bins.append(np.array([
hist.values()[..., chosen_bin][0],
hist.variances()[..., chosen_bin][0],
]))
morphed_chosen_bins = np.array(morphed_chosen_bins)
# morphed_chosen_bin = np.array([
# list(morphed_hists.values())[0].values()[..., chosen_bin][0],
# list(morphed_hists.values())[0].variances()[..., chosen_bin][0],
# ])

if not averaged:
non_morphed_chosen_bin = np.array([
list(non_morphed_hist.values())[0].values()[..., chosen_bin][0],
list(non_morphed_hist.values())[0].variances()[..., chosen_bin][0],
])

guidance_chosen_bins_values = np.array([
list(guidance_hists.values())[i].values()[..., chosen_bin][0] for i in range(3)
list(guidance_hists.values())[i].values()[..., chosen_bin][0] for i in range(len(guidance_points))
])

guidance_chosen_bins_variances = np.array([
list(guidance_hists.values())[i].variances()[..., chosen_bin][0] for i in range(3)
list(guidance_hists.values())[i].variances()[..., chosen_bin][0] for i in range(len(guidance_points))
])

# fit a parabola to the guidance points
Expand All @@ -355,7 +376,12 @@ def plot_bin_morphing(
def parabola(x, a, b, c):
return a * x**2 + b * x + c

popt, pcov = curve_fit(parabola, guidance_points, guidance_chosen_bins_values, sigma=guidance_chosen_bins_variances)
popt, pcov = curve_fit(
parabola,
guidance_points,
guidance_chosen_bins_values,
sigma=np.sqrt(guidance_chosen_bins_variances),
)

# plot the parabola
fig, ax = plt.subplots()
Expand All @@ -367,6 +393,41 @@ def parabola(x, a, b, c):

ax.plot(x, y, label="Parabola fit", color="black")

if averaged:
# fit parabola unweighted
popt_unweighted, pcov_unweighted = curve_fit(parabola, guidance_points, guidance_chosen_bins_values)

y_unweighted = parabola(x, *popt_unweighted)
ax.plot(x, y_unweighted, label="Parabola fit unweighted", color="orange")

if not averaged:
# fit a parabola to the four true points
# get fourth point from non-morphed histogram
non_morphed_hist_name = list(non_morphed_hist.keys())[0].name
non_morphed_point = float(non_morphed_hist_name.replace("hh_ggf_hbb_htt_kl", "").replace("_kt1", "").replace("p", ".")) # noqa
popt_2, pcov_2 = curve_fit(
parabola,
[non_morphed_point, *guidance_points],
[non_morphed_chosen_bin[0], *guidance_chosen_bins_values],
sigma=np.sqrt(np.array([non_morphed_chosen_bin[1], *guidance_chosen_bins_variances])),
)

y_2 = parabola(x, *popt_2)
ax.plot(x, y_2, label="Parabola fit with 4 values", color="orange")

# same result as above

# # fit a parabola with 4 points but through the polyfit function
# coefs = np.polyfit(
# [non_morphed_point, *guidance_points],
# [non_morphed_chosen_bin[0], *guidance_chosen_bins_values],
# 2,
# w=1 / np.sqrt(np.array([non_morphed_chosen_bin[1], *guidance_chosen_bins_variances])),
# )

# y_3 = np.polyval(coefs, x)
# ax.plot(x, y_3, label="Polyfit with 4 values", color="green")

# plot the guidance points
ax.errorbar(
guidance_points,
Expand All @@ -377,10 +438,38 @@ def parabola(x, a, b, c):
)

# plot the chosen bin values for morphed and non-morphed histograms
morphed_hist_name = list(morphed_hists.keys())[0].name
morphed_point = float(morphed_hist_name.replace("hh_ggf_hbb_htt_kl", "").replace("_kt1_morphed", "").replace("p", ".")) # noqa
ax.errorbar(morphed_point, morphed_chosen_bin[0], yerr=np.sqrt(morphed_chosen_bin[1]), fmt="ro", label="Morphed")
ax.errorbar(morphed_point, non_morphed_chosen_bin[0], yerr=np.sqrt(non_morphed_chosen_bin[1]), fmt="go", label="True value") # noqa
if averaged:
label_ = "Average morphed"
else:
label_ = "Morphed"
morphed_points = []
# label_list = []
i = 0
for key, hist in morphed_hists.items():
morphed_hist_name = list(morphed_hists.keys())[i].name
if "average" in morphed_hist_name:
morphed_hist_name = morphed_hist_name.replace("_average", "")
morphed_point = float(morphed_hist_name.replace("hh_ggf_hbb_htt_kl", "").replace("_kt1_morphed", "").replace("p", ".")) # noqa
morphed_points.append(morphed_point)
i += 1
# label_list.append(label_ + " kl " + str(morphed_point))
ax.errorbar(
morphed_points,
morphed_chosen_bins[:, 0],
yerr=np.sqrt(morphed_chosen_bins[:, 1]),
fmt="ro",
label=label_,
)
if not averaged:
non_morphed_hist_name = list(non_morphed_hist.keys())[0].name
non_morphed_point = float(non_morphed_hist_name.replace("hh_ggf_hbb_htt_kl", "").replace("_kt1", "").replace("p", ".")) # noqa
ax.errorbar(
non_morphed_point,
non_morphed_chosen_bin[0],
yerr=np.sqrt(non_morphed_chosen_bin[1]),
fmt="go",
label="True value",
)

# add text with the chosen bin value and the variable name on the top right corner
ax.text(
Expand All @@ -401,3 +490,4 @@ def parabola(x, a, b, c):

plot_max_bin_morphing = partial(plot_bin_morphing, function_bin_search=np.argmax, bin_type="Max bin")
plot_min_bin_morphing = partial(plot_bin_morphing, function_bin_search=np.argmin, bin_type="Min bin")
plot_bin_5_morphing = partial(plot_bin_morphing, function_bin_search=lambda x: 5, bin_type="Bin 5")

0 comments on commit a18d7ee

Please sign in to comment.