Skip to content

Commit cc1fb65

Browse files
committed
feat: add optimize_sq_fit
1 parent 09d6d48 commit cc1fb65

File tree

2 files changed

+112
-9
lines changed

2 files changed

+112
-9
lines changed

glassure/optimization.py

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
from typing import Optional
55

66
import numpy as np
7-
from scipy.integrate import simpson
87
from lmfit import Parameters, minimize
98

109
from . import Pattern
1110
from .transform import calculate_fr, calculate_gr, calculate_sq_from_fr
1211

1312
__all__ = [
1413
"optimize_sq",
14+
"optimize_sq_fit",
1515
"optimize_density",
1616
]
1717

@@ -98,9 +98,88 @@ def optimize_sq(
9898
return sq_pattern
9999

100100

101+
def optimize_sq_fit(sq_pattern: Pattern, r_cutoff: float) -> Pattern:
102+
"""
103+
Optimizes the S(Q) pattern by fitting a polynomial to the F(Q) = q( S(Q) - 1 ). The order of the polynomial
104+
is determined by the q_max and r_cutoff value = r_cutoff * q_max / pi. The zero order term is fixed to 0.
105+
106+
This method is based on the normalization description for PDFGetX3 in the following reference:
107+
108+
Juhás, P., Davis, T., Farrow, C.L., Billinge, S.J.L., 2013. PDFgetX3: a rapid and highly automatable
109+
program for processing powder diffraction data into total scattering pair distribution functions.
110+
J Appl Crystallogr 46, 560–566. https://doi.org/10.1107/S0021889813005190
111+
112+
In order to try to do a similar procedure as in the above paper, the input S(Q) should be created using
113+
a normalization without incoherent scattering. Since it is assume that the polynomial fit, will also
114+
remove the incoherent scattering.
115+
116+
:param sq_pattern:
117+
original S(Q)
118+
:param r_cutoff:
119+
cutoff value below which there is no signal expected (below the first peak in g(r))
120+
121+
:return:
122+
optimized S(Q) pattern
123+
"""
124+
125+
q = sq_pattern.x
126+
fq = sq_pattern.x * (sq_pattern.y - 1)
127+
128+
degree = q.max() * r_cutoff / (np.pi)
129+
degree = max(1.0, degree) # at least a linear fit
130+
131+
degree_high = np.ceil(degree).astype(int)
132+
degree_low = np.floor(degree).astype(int)
133+
134+
if degree_low == degree_high:
135+
# When degrees are the same, we only need to fit once
136+
coeffs_low = fit_polynom_through_origin(q, fq, degree_low)
137+
p_low = np.poly1d(coeffs_low)
138+
fq_fit = p_low(q)
139+
else:
140+
weight_low, weight_high = degree_high - degree, degree - degree_low
141+
coeffs_low = fit_polynom_through_origin(q, fq, degree_low)
142+
coeffs_high = fit_polynom_through_origin(q, fq, degree_high)
143+
p_low = np.poly1d(coeffs_low)
144+
p_high = np.poly1d(coeffs_high)
145+
fq_fit = p_low(q) * weight_low + p_high(q) * weight_high
146+
147+
return Pattern(sq_pattern.x, sq_pattern.y - fq_fit / sq_pattern.x)
148+
149+
150+
def fit_polynom_through_origin(x, y , degree: int) -> np.ndarray:
151+
"""
152+
Fits a polynomial of given degree through the data points (x, y) with the constraint that the polynomial goes
153+
through the origin (0,0). The zero order term is fixed to 0.
154+
155+
Implementation is based on ChatGPT recommendation for it to be the fastest solution.
156+
157+
:param x:
158+
x data points
159+
:param y:
160+
y data points
161+
:param degree:
162+
degree of the polynomial
163+
164+
:return:
165+
coefficients of the polynomial, highest degree first
166+
"""
167+
# Vandermonde matrix WITHOUT the x⁰ column
168+
# shape: (len(x), degree)
169+
X = np.vstack([x**k for k in range(1, degree + 1)]).T
170+
171+
# Solve X * beta = y
172+
beta, *_ = np.linalg.lstsq(
173+
X, y, rcond=-1
174+
) # rcond=-1 means as much precision as possible
175+
176+
return np.concatenate(
177+
(beta[::-1], [0])
178+
) # add zero coefficient for x⁰ and reverse order
179+
180+
101181
from .calc import calculate_pdf
102-
from .configuration import DataConfig, CalculationConfig
103-
from .methods import ExtrapolationMethod
182+
from .configuration import CalculationConfig, DataConfig
104183

105184

106185
def optimize_density(
@@ -117,8 +196,8 @@ def optimize_density(
117196
The density in the SampleConfig of the DataConfig is taking as starting parameter
118197
119198
For method='gr' or method='fr' the optimization is based on the g(r) or f(r) function, and the density is
120-
optimized to minimize the low g(r) or f(r) region to be close to zero. The Lorch modification function will be
121-
applied before calculating the chi square of the low r region if it is applied in the calculation configuration.
199+
optimized to minimize the low g(r) or f(r) region to be close to zero. The Lorch modification function will be
200+
applied before calculating the chi square of the low r region if it is applied in the calculation configuration.
122201
The general procedure is explained in Eggert et al. 2002 PRB, 65, 174105.
123202
124203
For method='sq' the optimization is based on the low Q part of the S(Q) function, and the density is optimized

tests/test_optimization.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
calculate_incoherent_scattering,
1313
calculate_s0,
1414
)
15-
from glassure.transform import calculate_sq, calculate_fr
16-
from glassure.normalization import normalize_fit, normalize
15+
from glassure.transform import calculate_sq, calculate_fr, calculate_gr
16+
from glassure.normalization import normalize_fit
1717
from glassure.configuration import OptimizeConfig
18-
from glassure.optimization import optimize_sq, optimize_density
18+
from glassure.optimization import optimize_sq, optimize_density, optimize_sq_fit
1919
from glassure.calc import create_calculate_pdf_configs
2020
from glassure.methods import ExtrapolationMethod
2121

@@ -111,7 +111,6 @@ def test_optimize_sq(sq, atomic_density):
111111
sq_optimized = optimize_sq(sq, 1.4, 5, atomic_density)
112112
assert not np.allclose(sq.y, sq_optimized.y)
113113

114-
115114
def test_optimize_sq_fft(sq, atomic_density):
116115
iterations = 5
117116
r_step = 0.001 # need high value to be accurate for fft
@@ -139,6 +138,31 @@ def test_optimize_sq_fft(sq, atomic_density):
139138
assert np.allclose(fr_optimized.y, fr_optimized_fft.y, atol=0.1)
140139

141140

141+
def test_optimize_sq_fit_SiO2(sq, atomic_density):
142+
sq_fit = optimize_sq_fit(sq, 1.4)
143+
sq_kaplow = optimize_sq(sq, 1.4, 5, atomic_density)
144+
145+
146+
assert np.mean((np.array(sq.y) - np.array(sq_fit.y)) ** 2) < 0.3
147+
assert np.mean((np.array(sq.y) - np.array(sq_kaplow.y)) ** 2) < 0.3
148+
149+
fr_original = calculate_fr(sq, method="fft")
150+
fr_fit = calculate_fr(sq_fit, method="fft")
151+
fr_kaplow = calculate_fr(sq_kaplow, method="fft")
152+
153+
assert np.mean((np.array(fr_original.y) - np.array(fr_fit.y)) ** 2) < 1.0
154+
assert np.mean((np.array(fr_original.y) - np.array(fr_kaplow.y)) ** 2) < 1.0
155+
assert np.mean((np.array(fr_fit.y) - np.array(fr_kaplow.y)) ** 2) < 1.0
156+
157+
gr_original = calculate_gr(fr_original, atomic_density).limit(1, 10)
158+
gr_fit = calculate_gr(fr_fit, atomic_density).limit(1, 10)
159+
gr_kaplow = calculate_gr(fr_kaplow, atomic_density).limit(1, 10)
160+
161+
assert np.mean((np.array(gr_original.y) - np.array(gr_fit.y)) ** 2) < 1.0
162+
assert np.mean((np.array(gr_original.y) - np.array(gr_kaplow.y)) ** 2) < 1.0
163+
assert np.mean((np.array(gr_fit.y) - np.array(gr_kaplow.y)) ** 2) < 1.0
164+
165+
142166
def test_optimize_density_SiO2(data_path, bkg_path):
143167
data = Pattern.from_file(data_path)
144168
background = Pattern.from_file(bkg_path)

0 commit comments

Comments
 (0)