Merge pull request #101 from yucongalicechen/mud

estimate mu*D from z-scan file
diffpy · Sep 14, 2024 · 89c1322 · 89c1322
2 parents adee9b2 + 0013d35
commit 89c1322
Show file tree

Hide file tree

Showing 6 changed files with 197 additions and 0 deletions.
diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py
@@ -124,6 +124,12 @@ def get_args(override_cli_inputs=None):
         ),
         default=None,
     )
+    p.add_argument(
+        "-z",
+        "--z-scan-file",
+        help="Path to the z-scan file to be loaded to determine the mu*D value",
+        default=None,
+    )
     args = p.parse_args(override_cli_inputs)
     return args
 

diff --git a/src/diffpy/labpdfproc/mud_calculator.py b/src/diffpy/labpdfproc/mud_calculator.py
@@ -0,0 +1,100 @@
+import numpy as np
+from scipy.optimize import dual_annealing
+from scipy.signal import convolve
+
+from diffpy.utils.parsers.loaddata import loadData
+
+
+def _top_hat(x, slit_width):
+    """
+    create a top-hat function, return 1.0 for values within the specified slit width and 0 otherwise
+    """
+    return np.where((x >= -slit_width) & (x <= slit_width), 1.0, 0)
+
+
+def _model_function(x, diameter, x0, I0, mud, slope):
+    """
+    compute the model function with the following steps:
+    1. Recenter x to h by subtracting x0 (so that the circle is centered at 0 and it is easier to compute length l)
+    2. Compute length l that is the effective length for computing intensity I = I0 * e^{-mu * l}:
+    - For h within the diameter range, l is the chord length of the circle at position h
+    - For h outside this range, l = 0
+    3. Apply a linear adjustment to I0 by taking I0 as I0 - slope * x
+    """
+    min_radius = -diameter / 2
+    max_radius = diameter / 2
+    h = x - x0
+    length = np.piecewise(
+        h,
+        [h < min_radius, (min_radius <= h) & (h <= max_radius), h > max_radius],
+        [0, lambda h: 2 * np.sqrt((diameter / 2) ** 2 - h**2), 0],
+    )
+    return (I0 - slope * x) * np.exp(-mud / diameter * length)
+
+
+def _extend_x_and_convolve(x, diameter, slit_width, x0, I0, mud, slope):
+    """
+    extend x values and I values for padding (so that we don't have tails in convolution), then perform convolution
+    (note that the convolved I values are the same as modeled I values if slit width is close to 0)
+    """
+    n_points = len(x)
+    x_left_pad = np.linspace(x.min() - n_points * (x[1] - x[0]), x.min(), n_points)
+    x_right_pad = np.linspace(x.max(), x.max() + n_points * (x[1] - x[0]), n_points)
+    x_extended = np.concatenate([x_left_pad, x, x_right_pad])
+    I_extended = _model_function(x_extended, diameter, x0, I0, mud, slope)
+    kernel = _top_hat(x_extended - x_extended.mean(), slit_width)
+    I_convolved = I_extended  # this takes care of the case where slit width is close to 0
+    if kernel.sum() != 0:
+        kernel /= kernel.sum()
+        I_convolved = convolve(I_extended, kernel, mode="same")
+    padding_length = len(x_left_pad)
+    return I_convolved[padding_length:-padding_length]
+
+
+def _objective_function(params, x, observed_data):
+    """
+    compute the objective function for fitting a model to the observed/experimental data
+    by minimizing the sum of squared residuals between the observed data and the convolved model data
+    """
+    diameter, slit_width, x0, I0, mud, slope = params
+    convolved_model_data = _extend_x_and_convolve(x, diameter, slit_width, x0, I0, mud, slope)
+    residuals = observed_data - convolved_model_data
+    return np.sum(residuals**2)
+
+
+def _compute_single_mud(x_data, I_data):
+    """
+    perform dual annealing optimization and extract the parameters
+    """
+    bounds = [
+        (1e-5, x_data.max() - x_data.min()),  # diameter: [small positive value, upper bound]
+        (0, (x_data.max() - x_data.min()) / 2),  # slit width: [0, upper bound]
+        (x_data.min(), x_data.max()),  # x0: [min x, max x]
+        (1e-5, I_data.max()),  # I0: [small positive value, max observed intensity]
+        (1e-5, 20),  # muD: [small positive value, upper bound]
+        (-10000, 10000),  # slope: [lower bound, upper bound]
+    ]
+    result = dual_annealing(_objective_function, bounds, args=(x_data, I_data))
+    diameter, slit_width, x0, I0, mud, slope = result.x
+    convolved_fitted_signal = _extend_x_and_convolve(x_data, diameter, slit_width, x0, I0, mud, slope)
+    residuals = I_data - convolved_fitted_signal
+    rmse = np.sqrt(np.mean(residuals**2))
+    return mud, rmse
+
+
+def compute_mud(filepath):
+    """
+    compute the best-fit mu*D value from a z-scan file
+
+    Parameters
+    ----------
+    filepath str
+        the path to the z-scan file
+
+    Returns
+    -------
+    a float contains the best-fit mu*D value
+    """
+    x_data, I_data = loadData(filepath, unpack=True)
+    best_mud, _ = min((_compute_single_mud(x_data, I_data) for _ in range(10)), key=lambda pair: pair[1])
+    return best_mud
diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py
@@ -1,6 +1,7 @@
 import copy
 from pathlib import Path
 
+from diffpy.labpdfproc.mud_calculator import compute_mud
 from diffpy.utils.tools import get_package_info, get_user_info
 
 WAVELENGTHS = {"Mo": 0.71, "Ag": 0.59, "Cu": 1.54}
@@ -134,6 +135,28 @@ def set_wavelength(args):
     return args
 
 
+def set_mud(args):
+    """
+    Set the mud based on the given input arguments
+
+    Parameters
+    ----------
+    args argparse.Namespace
+        the arguments from the parser
+
+    Returns
+    -------
+    args argparse.Namespace
+    """
+    if args.z_scan_file:
+        filepath = Path(args.z_scan_file).resolve()
+        if not filepath.is_file():
+            raise FileNotFoundError(f"Cannot find {args.z_scan_file}. Please specify a valid file path.")
+        args.z_scan_file = str(filepath)
+        args.mud = compute_mud(filepath)
+    return args
+
+
 def _load_key_value_pair(s):
     items = s.split("=")
     key = items[0].strip()
@@ -234,6 +257,7 @@ def preprocessing_args(args):
     args = set_input_lists(args)
     args.output_directory = set_output_directory(args)
     args = set_wavelength(args)
+    args = set_mud(args)
     args = load_user_metadata(args)
     return args
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -11,6 +11,8 @@ def user_filesystem(tmp_path):
     input_dir.mkdir(parents=True, exist_ok=True)
     home_dir = base_dir / "home_dir"
     home_dir.mkdir(parents=True, exist_ok=True)
+    test_dir = base_dir / "test_dir"
+    test_dir.mkdir(parents=True, exist_ok=True)
 
     chi_data = "dataformat = twotheta\n mode = xray\n # chi_Q chi_I\n 1 2\n 3 4\n 5 6\n 7 8\n"
     xy_data = "1 2\n 3 4\n 5 6\n 7 8"
@@ -51,4 +53,19 @@ def user_filesystem(tmp_path):
     with open(home_dir / "diffpyconfig.json", "w") as f:
         json.dump(home_config_data, f)
 
+    z_scan_data = """
+        -1.00000000 100000.00000000
+        -0.77777778 100000.00000000
+        -0.55555556 100000.00000000
+        -0.33333333 10687.79256604
+        -0.11111111 5366.53289631
+        0.11111111 5366.53289631
+        0.33333333 10687.79256604
+        0.55555556 100000.00000000
+        0.77777778 100000.00000000
+        1.00000000 100000.00000000
+    """
+    with open(test_dir / "testfile.xy", "w") as f:
+        f.write(z_scan_data)
+
     yield tmp_path
diff --git a/tests/test_mud_calculator.py b/tests/test_mud_calculator.py
@@ -0,0 +1,22 @@
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from diffpy.labpdfproc.mud_calculator import _extend_x_and_convolve, compute_mud
+
+
+def test_compute_mud(tmp_path):
+    diameter, slit_width, x0, I0, mud, slope = 1, 0.1, 0, 1e5, 3, 0
+    x_data = np.linspace(-1, 1, 50)
+    convolved_I_data = _extend_x_and_convolve(x_data, diameter, slit_width, x0, I0, mud, slope)
+
+    directory = Path(tmp_path)
+    file = directory / "testfile"
+    with open(file, "w") as f:
+        for x, I in zip(x_data, convolved_I_data):
+            f.write(f"{x}\t{I}\n")
+
+    expected_mud = 3
+    actual_mud = compute_mud(file)
+    assert actual_mud == pytest.approx(expected_mud, rel=1e-4, abs=0.1)
diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -13,6 +13,7 @@
     load_user_metadata,
     preprocessing_args,
     set_input_lists,
+    set_mud,
     set_output_directory,
     set_wavelength,
 )
@@ -188,6 +189,32 @@ def test_set_wavelength_bad(inputs, msg):
         actual_args = set_wavelength(actual_args)
 
 
+def test_set_mud(user_filesystem):
+    cli_inputs = ["2.5", "data.xy"]
+    actual_args = get_args(cli_inputs)
+    actual_args = set_mud(actual_args)
+    assert actual_args.mud == pytest.approx(2.5, rel=1e-4, abs=0.1)
+    assert actual_args.z_scan_file is None
+
+    cwd = Path(user_filesystem)
+    test_dir = cwd / "test_dir"
+    os.chdir(cwd)
+    inputs = ["--z-scan-file", "test_dir/testfile.xy"]
+    expected = [3, str(test_dir / "testfile.xy")]
+    cli_inputs = ["2.5", "data.xy"] + inputs
+    actual_args = get_args(cli_inputs)
+    actual_args = set_mud(actual_args)
+    assert actual_args.mud == pytest.approx(expected[0], rel=1e-4, abs=0.1)
+    assert actual_args.z_scan_file == expected[1]
+
+
+def test_set_mud_bad():
+    cli_inputs = ["2.5", "data.xy", "--z-scan-file", "invalid file"]
+    actual_args = get_args(cli_inputs)
+    with pytest.raises(FileNotFoundError, match="Cannot find invalid file. Please specify a valid file path."):
+        actual_args = set_mud(actual_args)
+
+
 params5 = [
     ([], []),
     (
@@ -317,5 +344,6 @@ def test_load_metadata(mocker, user_filesystem):
             "username": "cli_username",
             "email": "[email protected]",
             "package_info": {"diffpy.labpdfproc": "1.2.3", "diffpy.utils": "3.3.0"},
+            "z_scan_file": None,
         }
         assert actual_metadata == expected_metadata