diff --git a/devutils/makesdist b/devutils/makesdist index 1d81cf0..a7e2676 100644 --- a/devutils/makesdist +++ b/devutils/makesdist @@ -17,11 +17,11 @@ sys.path.insert(0, BASEDIR) from setup import versiondata timestamp = versiondata.getint('DEFAULT', 'timestamp') -print 'Run "setup.py sdist --formats=tar"', +print('Run "setup.py sdist --formats=tar"',) cmd_sdist = [sys.executable] + 'setup.py sdist --formats=tar'.split() ec = subprocess.call(cmd_sdist, cwd=BASEDIR, stdout=open(os.devnull, 'w')) if ec: sys.exit(ec) -print "[done]" +print("[done]") tarname = max(glob.glob(BASEDIR + '/dist/*.tar'), key=os.path.getmtime) @@ -36,8 +36,8 @@ def fixtarinfo(tinfo): return tinfo -print 'Filter %s --> %s.gz' % (2 * (os.path.basename(tarname),)), +print('Filter %s --> %s.gz' % (2 * (os.path.basename(tarname),)),) for ti in tfin: tfout.addfile(fixtarinfo(ti), tfin.extractfile(ti)) os.remove(tarname) -print "[done]" +print("[done]") diff --git a/devutils/prep.py b/devutils/prep.py index b765f7d..49afb2c 100644 --- a/devutils/prep.py +++ b/devutils/prep.py @@ -8,6 +8,8 @@ __basedir__ = os.getcwdu() +from numpy.compat import unicode + # Example imports @@ -18,20 +20,21 @@ def __init__(self): def test(self, call, *args, **kwds): m = sys.modules[call.__module__] - testname = m.__name__+'.'+call.__name__ + testname = m.__name__ + "." + call.__name__ path = os.path.dirname(m.__file__) os.chdir(path) try: call(*args, **kwds) - self.messages.append("%s: success" %testname) - except Exception, e: - self.messages.append("%s: error, details below.\n%s" %(testname, e)) + self.messages.append("%s: success" % testname) + except Exception as e: + self.messages.append("%s: error, details below.\n%s" % (testname, e)) finally: os.chdir(__basedir__) def report(self): - print '==== Results of Tests ====' - print '\n'.join(self.messages) + print("==== Results of Tests ====") + print("\n".join(self.messages)) + def scrubeol(directory, filerestr): """Use unix-style endlines for files in directory matched by regex string. @@ -50,11 +53,11 @@ def scrubeol(directory, filerestr): text = unicode(original.read()) original.close() - updated = io.open(f, 'w', newline='\n') + updated = io.open(f, "w", newline="\n") updated.write(text) updated.close() - print "Updated %s to unix-style endlines." %f + print("Updated %s to unix-style endlines." % f) def rm(directory, filerestr): @@ -72,14 +75,13 @@ def rm(directory, filerestr): for f in files: os.remove(f) - print "Deleted %s." %f - + print("Deleted %s." % f) if __name__ == "__main__": # Temporarily add examples to path - lib_path = os.path.abspath(os.path.join('..','doc','examples')) + lib_path = os.path.abspath(os.path.join("..", "doc", "examples")) sys.path.append(lib_path) # Delete existing files that don't necessarily have a fixed name. @@ -88,14 +90,16 @@ def rm(directory, filerestr): ### Testing examples examples = Test() - test_names = ["extract_single_peak", - "parameter_summary", - "fit_initial", - "query_results", - "multimodel_known_dG1", - "multimodel_known_dG2", - "multimodel_unknown_dG1", - "multimodel_unknown_dG2"] + test_names = [ + "extract_single_peak", + "parameter_summary", + "fit_initial", + "query_results", + "multimodel_known_dG1", + "multimodel_known_dG2", + "multimodel_unknown_dG1", + "multimodel_unknown_dG2", + ] test_modules = [] for test in test_names: @@ -107,7 +111,7 @@ def rm(directory, filerestr): examples.report() ### Convert output of example files to Unix-style endlines for sdist. - if os.linesep != '\n': - print "==== Scrubbing Endlines ====" + if os.linesep != "\n": + print("==== Scrubbing Endlines ====") # All *.srmise and *.pwa files in examples directory. scrubeol("../doc/examples/output", r".*(\.srmise|\.pwa)") diff --git a/diffpy/srmise/applications/extract.py b/diffpy/srmise/applications/extract.py index 59ca6b2..6b89be9 100755 --- a/diffpy/srmise/applications/extract.py +++ b/diffpy/srmise/applications/extract.py @@ -149,9 +149,7 @@ def main(): "--pf", dest="peakfunction", metavar="PF", - help="Fit peak function PF defined in " - "diffpy.srmise.peaks, e.g. " - "'GaussianOverR(maxwidth=0.7)'", + help="Fit peak function PF defined in " "diffpy.srmise.peaks, e.g. " "'GaussianOverR(maxwidth=0.7)'", ) parser.add_option( "--cres", @@ -230,8 +228,7 @@ def main(): dest="bpoly0", type="string", metavar="a0[c]", - help="Use constant baseline given by y=a0. " - "Append 'c' to make parameter constant.", + help="Use constant baseline given by y=a0. " "Append 'c' to make parameter constant.", ) group.add_option( "--bpoly1", @@ -239,8 +236,7 @@ def main(): type="string", nargs=2, metavar="a1[c] a0[c]", - help="Use baseline given by y=a1*x + a0. Append 'c' to " - "make parameter constant.", + help="Use baseline given by y=a1*x + a0. Append 'c' to " "make parameter constant.", ) group.add_option( "--bpoly2", @@ -248,16 +244,14 @@ def main(): type="string", nargs=3, metavar="a2[c] a1[c] a0[c]", - help="Use baseline given by y=a2*x^2+a1*x + a0. Append " - "'c' to make parameter constant.", + help="Use baseline given by y=a2*x^2+a1*x + a0. Append " "'c' to make parameter constant.", ) group.add_option( "--bseq", dest="bseq", type="string", metavar="FILE", - help="Use baseline interpolated from x,y values in FILE. " - "This baseline has no free parameters.", + help="Use baseline interpolated from x,y values in FILE. " "This baseline has no free parameters.", ) group.add_option( "--bspherical", @@ -343,9 +337,7 @@ def main(): metavar="FILE", help="Save result of extraction to FILE (.srmise " "format).", ) - group.add_option( - "--plot", "-p", action="store_true", dest="plot", help="Plot extracted peaks." - ) + group.add_option("--plot", "-p", action="store_true", dest="plot", help="Plot extracted peaks.") group.add_option( "--liveplot", "-l", @@ -362,9 +354,7 @@ def main(): ) parser.add_option_group(group) - group = OptionGroup( - parser, "Verbosity Options", "Control detail printed to console." - ) + group = OptionGroup(parser, "Verbosity Options", "Control detail printed to console.") group.add_option( "--informative", "-i", @@ -435,9 +425,7 @@ def main(): options.peakfunction = eval("peaks." + options.peakfunction) except Exception as err: print(err) - print( - "Could not create peak function '%s'. Exiting." % options.peakfunction - ) + print("Could not create peak function '%s'. Exiting." % options.peakfunction) return if options.modelevaluator is not None: @@ -447,9 +435,7 @@ def main(): options.modelevaluator = eval("modelevaluators." + options.modelevaluator) except Exception as err: print(err) - print( - "Could not find ModelEvaluator '%s'. Exiting." % options.modelevaluator - ) + print("Could not find ModelEvaluator '%s'. Exiting." % options.modelevaluator) return if options.bcrystal is not None: @@ -534,9 +520,7 @@ def main(): if options.rng is not None: pdict["rng"] = list(options.rng) if options.qmax is not None: - pdict["qmax"] = ( - options.qmax if options.qmax == "automatic" else float(options.qmax) - ) + pdict["qmax"] = options.qmax if options.qmax == "automatic" else float(options.qmax) if options.nyquist is not None: pdict["nyquist"] = options.nyquist if options.supersample is not None: @@ -624,10 +608,7 @@ def _format_text(self, text): # the above is still the same bits = text.split("\n") formatted_bits = [ - textwrap.fill( - bit, text_width, initial_indent=indent, subsequent_indent=indent - ) - for bit in bits + textwrap.fill(bit, text_width, initial_indent=indent, subsequent_indent=indent) for bit in bits ] result = "\n".join(formatted_bits) + "\n" return result @@ -665,9 +646,7 @@ def format_option(self, option): help_lines.extend(textwrap.wrap(para, self.help_width)) # Everything is the same after here result.append("%*s%s\n" % (indent_first, "", help_lines[0])) - result.extend( - ["%*s%s\n" % (self.help_position, "", line) for line in help_lines[1:]] - ) + result.extend(["%*s%s\n" % (self.help_position, "", line) for line in help_lines[1:]]) elif opts[-1] != "\n": result.append("\n") return "".join(result) diff --git a/diffpy/srmise/applications/plot.py b/diffpy/srmise/applications/plot.py index b6b788a..abaa2a5 100755 --- a/diffpy/srmise/applications/plot.py +++ b/diffpy/srmise/applications/plot.py @@ -194,9 +194,7 @@ def makeplot(ppe_or_stability, ip=None, **kwds): x = ppe.x[rangeslice] y = ppe.y[rangeslice] dy = ppe.effective_dy[rangeslice] - mcluster = ModelCluster( - ppe.initial_peaks, ppe.baseline, x, y, dy, None, ppe.error_method, ppe.pf - ) + mcluster = ModelCluster(ppe.initial_peaks, ppe.baseline, x, y, dy, None, ppe.error_method, ppe.pf) ext = mcluster else: ext = ppe.extracted @@ -255,9 +253,7 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Define the various data which will be plotted r = ext.r_cluster dr = (r[-1] - r[0]) / len(r) - rexpand = np.concatenate( - (np.arange(r[0] - dr, xlo, -dr)[::-1], r, np.arange(r[-1] + dr, xhi + dr, dr)) - ) + rexpand = np.concatenate((np.arange(r[0] - dr, xlo, -dr)[::-1], r, np.arange(r[-1] + dr, xhi + dr, dr))) rfine = np.arange(r[0], r[-1], 0.1 * dr) gr_obs = np.array(resample(ppe.x, ppe.y, rexpand)) * scale # gr_fit = resample(r, ext.value(), rfine) @@ -295,21 +291,10 @@ def makeplot(ppe_or_stability, ip=None, **kwds): max_res = 0.0 # Derive various y limits based on all the offsets - rel_height = ( - 100.0 - - top_offset - - dg_height - - cmp_height - - datatop_offset - - databottom_offset - - bottom_offset - ) + rel_height = 100.0 - top_offset - dg_height - cmp_height - datatop_offset - databottom_offset - bottom_offset abs_height = 100 * ((max_gr - min_gr) + (max_res - min_res)) / rel_height - yhi = ( - max_gr - + (top_offset + dg_height + cmp_height + datatop_offset) * abs_height / 100 - ) + yhi = max_gr + (top_offset + dg_height + cmp_height + datatop_offset) * abs_height / 100 ylo = yhi - abs_height yhi = kwds.get("yhi", yhi) @@ -353,13 +338,7 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Remove labels above where insets begin # ax_data.yaxis.set_ticklabels([str(int(loc)) for loc in ax_data.yaxis.get_majorticklocs() if loc < datatop]) - ax_data.yaxis.set_ticks( - [ - loc - for loc in ax_data.yaxis.get_majorticklocs() - if (loc < datatop and loc >= ylo) - ] - ) + ax_data.yaxis.set_ticks([loc for loc in ax_data.yaxis.get_majorticklocs() if (loc < datatop and loc >= ylo)]) # Dataset label if datalabel is not None: @@ -378,9 +357,7 @@ def makeplot(ppe_or_stability, ip=None, **kwds): # Create new x axis at bottom edge of compare inset ax_data.axis["top"].set_visible(False) - ax_data.axis["newtop"] = ax_data.new_floating_axis( - 0, datatop, axis_direction="bottom" - ) # "top" bugged? + ax_data.axis["newtop"] = ax_data.new_floating_axis(0, datatop, axis_direction="bottom") # "top" bugged? ax_data.axis["newtop"].toggle(all=False, ticks=True) ax_data.axis["newtop"].major_ticks.set_tick_out(True) ax_data.axis["newtop"].minor_ticks.set_tick_out(True) @@ -399,9 +376,7 @@ def makeplot(ppe_or_stability, ip=None, **kwds): transform=ax_data.transAxes, ) labeldict[fig] = newylabel # so we can find the correct text object - fig.canvas.mpl_connect( - "draw_event", on_draw - ) # original label invisibility and updating + fig.canvas.mpl_connect("draw_event", on_draw) # original label invisibility and updating # Compare extracted (and ideal, if provided) peak positions clearly. if cmp_height > 0: @@ -610,15 +585,9 @@ def main(): type="int", help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.", ) - parser.add_option( - "--show", action="store_true", help="execute pylab.show() blocking call" - ) - parser.add_option( - "-o", "--output", type="string", help="save plot to the specified file" - ) - parser.add_option( - "--format", type="string", default="eps", help="output format for plot saving" - ) + parser.add_option("--show", action="store_true", help="execute pylab.show() blocking call") + parser.add_option("-o", "--output", type="string", help="save plot to the specified file") + parser.add_option("--format", type="string", default="eps", help="output format for plot saving") parser.allow_interspersed_args = True opts, args = parser.parse_args(sys.argv[1:]) @@ -636,9 +605,7 @@ def main(): try: toplot.load(filename) except Exception: - print( - "File '%s' is not a .srmise or PeakStability data file." % filename - ) + print("File '%s' is not a .srmise or PeakStability data file." % filename) return if opts.model is not None: diff --git a/diffpy/srmise/basefunction.py b/diffpy/srmise/basefunction.py index 3d23670..f19e1f5 100644 --- a/diffpy/srmise/basefunction.py +++ b/diffpy/srmise/basefunction.py @@ -116,21 +116,12 @@ def __init__( # Check validity of default_formats self.default_formats = default_formats - if not ( - "default_input" in self.default_formats - and "default_output" in self.default_formats - ): - emsg = ( - "Argument default_formats must specify 'default_input' " - + "and 'default_output' as keys." - ) + if not ("default_input" in self.default_formats and "default_output" in self.default_formats): + emsg = "Argument default_formats must specify 'default_input' " + "and 'default_output' as keys." raise ValueError(emsg) for f in self.default_formats.values(): if not f in self.parformats: - emsg = ( - "Keys of argument default_formats must map to a " - + "value within argument parformats." - ) + emsg = "Keys of argument default_formats must map to a " + "value within argument parformats." raise ValueError() # Set metadictionary @@ -195,10 +186,7 @@ def jacobian(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = ( - "Argument 'p' must be evaluated by the BaseFunction " - + "subclass which owns it." - ) + emsg = "Argument 'p' must be evaluated by the BaseFunction " + "subclass which owns it." raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values @@ -241,18 +229,12 @@ def transform_derivatives(self, pars, in_format=None, out_format=None): out_format = self.default_formats["default_input"] if not in_format in self.parformats: - raise ValueError( - "Argument 'in_format' must be one of %s." % self.parformats - ) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) if not out_format in self.parformats: - raise ValueError( - "Argument 'out_format' must be one of %s." % self.parformats - ) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) if in_format == out_format: return np.identity(self.npars) - return self._transform_derivativesraw( - pars, in_format=in_format, out_format=out_format - ) + return self._transform_derivativesraw(pars, in_format=in_format, out_format=out_format) def transform_parameters(self, pars, in_format=None, out_format=None): """Return new sequence with pars converted from in_format to out_format. @@ -282,18 +264,12 @@ def transform_parameters(self, pars, in_format=None, out_format=None): out_format = self.default_formats["default_input"] if not in_format in self.parformats: - raise ValueError( - "Argument 'in_format' must be one of %s." % self.parformats - ) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) if not out_format in self.parformats: - raise ValueError( - "Argument 'out_format' must be one of %s." % self.parformats - ) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) # if in_format == out_format: # return pars - return self._transform_parametersraw( - pars, in_format=in_format, out_format=out_format - ) + return self._transform_parametersraw(pars, in_format=in_format, out_format=out_format) def value(self, p, r, rng=None): """Calculate value of ModelPart over r, possibly restricted by range. @@ -307,10 +283,7 @@ def value(self, p, r, rng=None): previously calculated values instead. """ if self is not p._owner: - emsg = ( - "Argument 'p' must be evaluated by the BaseFunction " - + "subclass which owns it." - ) + emsg = "Argument 'p' must be evaluated by the BaseFunction " + "subclass which owns it." raise ValueError(emsg) # normally r will be a sequence, but also allow single numeric values diff --git a/diffpy/srmise/baselines/arbitrary.py b/diffpy/srmise/baselines/arbitrary.py index 2588740..d2bc487 100644 --- a/diffpy/srmise/baselines/arbitrary.py +++ b/diffpy/srmise/baselines/arbitrary.py @@ -17,12 +17,14 @@ import numpy as np import diffpy.srmise.srmiselog +from diffpy.srmise.baselines import Polynomial from diffpy.srmise.baselines.base import BaselineFunction from diffpy.srmise.srmiseerrors import SrMiseEstimationError logger = logging.getLogger("diffpy.srmise") -class Arbitrary (BaselineFunction): + +class Arbitrary(BaselineFunction): """Methods for evaluating a baseline from an arbitrary function. Supports baseline calculations with arbitrary functions. These functions, @@ -64,10 +66,10 @@ def __init__(self, npars, valuef, jacobianf=None, estimatef=None, Cache=None): # Define parameterdict # e.g. {"a_0":0, "a_1":1, "a_2":2, "a_3":3} if npars is 4. parameterdict = {} - for d in range(self.testnpars+1): - parameterdict["a_"+str(d)] = d - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + for d in range(self.testnpars + 1): + parameterdict["a_" + str(d)] = d + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} # Check that the provided functions are at least callable if valuef is None or callable(valuef): @@ -114,9 +116,8 @@ def estimate_parameters(self, r, y): # TODO: check that estimatef returns something proper? try: return self.estimatef(r, y) - except Exception, e: - emsg = "Error within estimation routine provided to Arbitrary:\n"+\ - str(e) + except Exception as e: + emsg = "Error within estimation routine provided to Arbitrary:\n" + str(e) raise SrMiseEstimationError(emsg) def _jacobianraw(self, pars, r, free): @@ -137,10 +138,10 @@ def _jacobianraw(self, pars, r, free): emsg = "No jacobian routine provided to Arbitrary." raise NotImplementedError(emsg) if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) # Allow an arbitrary function without a Jacobian provided act as @@ -149,7 +150,7 @@ def _jacobianraw(self, pars, r, free): # large performance implications if all other functions used while # fitting a function define a Jacobian. if nfree == 0: - return [None for p in range(len(par))] + return [None for p in range(len(pars))] # TODO: check that jacobianf returns something proper? return self.jacobianf(pars, r, free) @@ -170,15 +171,13 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): @@ -192,7 +191,7 @@ def _valueraw(self, pars, r): ... r: sequence or scalar over which pars is evaluated""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) # TODO: check that valuef returns something proper? @@ -201,21 +200,22 @@ def _valueraw(self, pars, r): def getmodule(self): return __name__ -#end of class Polynomial + +# end of class Polynomial # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - f = Polynomial(degree = 3) + f = Polynomial(degree=3) r = np.arange(5) pars = np.array([3, 0, 1, 2]) free = np.array([True, False, True, True]) - print f._valueraw(pars, r) - print f._jacobianraw(pars, r, free) + print(f._valueraw(pars, r)) + print(f._jacobianraw(pars, r, free)) - f = Polynomial(degree = -1) + f = Polynomial(degree=-1) r = np.arange(5) pars = np.array([]) free = np.array([]) - print f._valueraw(pars, r) - print f._jacobianraw(pars, r, free) + print(f._valueraw(pars, r)) + print(f._jacobianraw(pars, r, free)) diff --git a/diffpy/srmise/baselines/base.py b/diffpy/srmise/baselines/base.py index d84160a..e71add5 100644 --- a/diffpy/srmise/baselines/base.py +++ b/diffpy/srmise/baselines/base.py @@ -18,6 +18,7 @@ import diffpy.srmise.srmiselog from diffpy.srmise.basefunction import BaseFunction from diffpy.srmise.modelparts import ModelPart +from diffpy.srmise.peaks import Peaks from diffpy.srmise.srmiseerrors import * logger = logging.getLogger("diffpy.srmise") @@ -83,9 +84,7 @@ def __init__( additional functionality. Cache: A class (not instance) which implements caching of BaseFunction evaluations.""" - BaseFunction.__init__( - self, parameterdict, parformats, default_formats, metadict, base, Cache - ) + BaseFunction.__init__(self, parameterdict, parformats, default_formats, metadict, base, Cache) #### "Virtual" class methods #### diff --git a/diffpy/srmise/baselines/fromsequence.py b/diffpy/srmise/baselines/fromsequence.py index c06e2de..718051a 100644 --- a/diffpy/srmise/baselines/fromsequence.py +++ b/diffpy/srmise/baselines/fromsequence.py @@ -22,7 +22,8 @@ logger = logging.getLogger("diffpy.srmise") -class FromSequence (BaselineFunction): + +class FromSequence(BaselineFunction): """Methods for evaluation of a baseline from discrete data via interpolation. FromSequence uses cubic spline interpolation (no smoothing) on discrete @@ -46,13 +47,13 @@ def __init__(self, *args, **kwds): or file: Name of file with column of x values and column of y values. """ - if len(args)==1 and len(kwds)==0: + if len(args) == 1 and len(kwds) == 0: # load from file x, y = self.readxy(args[0]) elif len(args) == 0 and ("file" in kwds and "x" not in kwds and "y" not in kwds): # load file x, y = self.readxy(kwds["file"]) - elif len(args)==2 and len(kwds)==0: + elif len(args) == 2 and len(kwds) == 0: # Load x, y directly from arguments x = args[0] y = args[1] @@ -69,8 +70,8 @@ def __init__(self, *args, **kwds): emsg = "Sequences x and y must have the same length." raise ValueError(emsg) parameterdict = {} - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} self.spline = spi.InterpolatedUnivariateSpline(x, y) self.minx = x[0] self.maxx = x[-1] @@ -102,10 +103,10 @@ def _jacobianraw(self, pars, r, free): r: sequence or scalar over which pars is evaluated free: Empty sequence.""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) return [] @@ -125,15 +126,13 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): @@ -143,18 +142,22 @@ def _valueraw(self, pars, r): pars: Empty sequence r: sequence or scalar over which pars is evaluated""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) try: if r[0] < self.minx or r[-1] > self.maxx: - logger.warn("Warning: Evaluating interpolating function over %s, outside safe range of %s.", - [r[0], r[-1]], - [self.minx, self.maxx]) - except IndexError, TypeError: + logger.warn( + "Warning: Evaluating interpolating function over %s, outside safe range of %s.", + [r[0], r[-1]], + [self.minx, self.maxx], + ) + except (IndexError, TypeError) as e: if r < self.minx or r > self.maxx: - logger.warn("Warning: Evaluating interpolating function at %s, outside safe range of %s.", - r, - [self.minx, self.maxx]) + logger.warn( + "Warning: Evaluating interpolating function at %s, outside safe range of %s.", + r, + [self.minx, self.maxx], + ) return self.spline(r) def getmodule(self): @@ -162,7 +165,7 @@ def getmodule(self): def xyrepr(self, var): """Safe string output of x and y, compatible with eval()""" - return "[%s]" %", ".join([repr(v) for v in var]) + return "[%s]" % ", ".join([repr(v) for v in var]) def readxy(self, filename): """ """ @@ -170,38 +173,40 @@ def readxy(self, filename): # TODO: Make this safer try: - datastring = open(filename,'rb').read() - except Exception, err: + datastring = open(filename, "rb").read() + except Exception as err: raise err import re - res = re.search(r'^[^#]', datastring, re.M) + + res = re.search(r"^[^#]", datastring, re.M) if res: - datastring = datastring[res.end():].strip() + datastring = datastring[res.end() :].strip() - x=[] - y=[] + x = [] + y = [] try: for line in datastring.split("\n"): v = line.split() x.append(float(v[0])) y.append(float(v[1])) - except (ValueError, IndexError), err: + except (ValueError, IndexError) as err: raise SrMiseDataFormatError(str(err)) return (np.array(x), np.array(y)) -#end of class FromSequence + +# end of class FromSequence # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - r = np.arange(0, 9.42413, .2) - b = -(np.tanh(.5*r) + np.sin(.5*r)) + r = np.arange(0, 9.42413, 0.2) + b = -(np.tanh(0.5 * r) + np.sin(0.5 * r)) f = FromSequence(r, b) pars = np.array([]) free = np.array([]) - r2 = np.arange(0, 9.42413, .5) + r2 = np.arange(0, 9.42413, 0.5) b2 = f._valueraw(pars, r2) diff --git a/diffpy/srmise/baselines/nanospherical.py b/diffpy/srmise/baselines/nanospherical.py index 04c1d43..d58999d 100644 --- a/diffpy/srmise/baselines/nanospherical.py +++ b/diffpy/srmise/baselines/nanospherical.py @@ -22,7 +22,8 @@ logger = logging.getLogger("diffpy.srmise") -class NanoSpherical (BaselineFunction): + +class NanoSpherical(BaselineFunction): """Methods for evaluation of baseline of spherical nanoparticle of uniform density. Allowed formats are @@ -47,29 +48,29 @@ def __init__(self, Cache=None): evaluations. """ # Define parameterdict - parameterdict = {'scale':0, 'radius':1} - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + parameterdict = {"scale": 0, "radius": 1} + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} metadict = {} BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) #### Methods required by BaselineFunction #### -# def estimate_parameters(self, r, y): -# """Estimate parameters for spherical baseline. (Not implemented!) -# -# Parameters -# r - array along r from which to estimate -# y - array along y from which to estimate -# -# Returns Numpy array of parameters in the default internal format. -# Raises NotImplementedError if estimation is not implemented for this -# degree, or SrMiseEstimationError if parameters cannot be estimated for -# any other reason. -# """ -# if len(r) != len(y): -# emsg = "Arrays r, y must have equal length." -# raise ValueError(emsg) + # def estimate_parameters(self, r, y): + # """Estimate parameters for spherical baseline. (Not implemented!) + # + # Parameters + # r - array along r from which to estimate + # y - array along y from which to estimate + # + # Returns Numpy array of parameters in the default internal format. + # Raises NotImplementedError if estimation is not implemented for this + # degree, or SrMiseEstimationError if parameters cannot be estimated for + # any other reason. + # """ + # if len(r) != len(y): + # emsg = "Arrays r, y must have equal length." + # raise ValueError(emsg) def _jacobianraw(self, pars, r, free): """Return the Jacobian of the spherical baseline. @@ -83,22 +84,26 @@ def _jacobianraw(self, pars, r, free): needed. True for evaluation, False for no evaluation. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) jacobian = [None for p in range(self.npars)] if (free == False).sum() == self.npars: return jacobian if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - if free[0]: jacobian[0] = 0. - if free[1]: jacobian[1] = 0. + if r <= 0.0 or r >= 2.0 * pars[1]: + if free[0]: + jacobian[0] = 0.0 + if free[1]: + jacobian[1] = 0.0 else: - if free[0]: jacobian[0] = self._jacobianrawscale(pars, r) - if free[1]: jacobian[1] = self._jacobianrawradius(pars, r) + if free[0]: + jacobian[0] = self._jacobianrawscale(pars, r) + if free[1]: + jacobian[1] = self._jacobianrawradius(pars, r) else: s = self._getdomain(pars, r) if free[0]: @@ -120,12 +125,12 @@ def _jacobianrawscale(self, pars, r): """ s = np.abs(pars[0]) R = np.abs(pars[1]) - rdivR = r/R + rdivR = r / R # From abs'(s) in derivative, which is equivalent to sign(s) except at 0 where it # is undefined. Since s=0 is equivalent to the absence of a nanoparticle, sign will # be fine. sign = np.sign(pars[1]) - return -sign*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) + return -sign * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) def _jacobianrawradius(self, pars, r): """Return partial Jacobian wrt radius without bounds checking. @@ -141,7 +146,7 @@ def _jacobianrawradius(self, pars, r): # From abs'(R) in derivative, which is equivalent to sign(R) except at 0 where it # is undefined. Since R=0 is a singularity anyway, sign will be fine. sign = np.sign(pars[1]) - return sign*s*(3*r**2*(r**2-4*R**2))/(16*R**4) + return sign * s * (3 * r**2 * (r**2 - 4 * R**2)) / (16 * R**4) def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. @@ -162,15 +167,13 @@ def _transform_parametersraw(self, pars, in_format, out_format): temp[0] = np.abs(temp[0]) temp[1] = np.abs(temp[1]) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): @@ -185,11 +188,11 @@ def _valueraw(self, pars, r): r - sequence or scalar over which pars is evaluated. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if np.isscalar(r): - if r <= 0. or r >= 2.*pars[1]: - return 0. + if r <= 0.0 or r >= 2.0 * pars[1]: + return 0.0 else: return self._valueraw2(pars, r) else: @@ -209,38 +212,48 @@ def _valueraw2(self, pars, r): """ s = np.abs(pars[0]) R = np.abs(pars[1]) - rdivR = r/R - return -s*r*(1-(3./4.)*rdivR+(1./16.)*rdivR**3) + rdivR = r / R + return -s * r * (1 - (3.0 / 4.0) * rdivR + (1.0 / 16.0) * rdivR**3) def _getdomain(self, pars, r): """Return slice object for which r > 0 and r < twice the radius""" - low = r.searchsorted(0., side='right') - high = r.searchsorted(2.*pars[1], side='left') + low = r.searchsorted(0.0, side="right") + high = r.searchsorted(2.0 * pars[1], side="left") return slice(low, high) def getmodule(self): return __name__ -#end of class NanoSpherical + +# end of class NanoSpherical # simple test code -if __name__ == '__main__': +if __name__ == "__main__": f = NanoSpherical() r = np.arange(-5, 10) - pars = np.array([-1., 7.]) + pars = np.array([-1.0, 7.0]) free = np.array([False, True]) - print "Testing nanoparticle spherical baseline" - print "Scale: %f, Radius: %f" %(pars[0], pars[1]) - print "-----------------------------------------" + print("Testing nanoparticle spherical baseline") + print("Scale: %f, Radius: %f" % (pars[0], pars[1])) + print("-----------------------------------------") val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - outjac = [j if j is not None else [None]*len(r) for j in jac] - print "r".center(10), "value".center(10), "jac(scale)".center(10), "jac(radius)".center(10) + jac = f._jacobianraw(pars, r, free) + outjac = [j if j is not None else [None] * len(r) for j in jac] + print( + "r".center(10), + "value".center(10), + "jac(scale)".center(10), + "jac(radius)".center(10), + ) for tup in zip(r, val, *outjac): for t in tup: if t is None: - print ("%s" %None).ljust(10), + print( + ("%s" % None).ljust(10), + ) else: - print ("% .3g" %t).ljust(10), + print( + ("% .3g" % t).ljust(10), + ) print diff --git a/diffpy/srmise/baselines/polynomial.py b/diffpy/srmise/baselines/polynomial.py index 4d95c4f..646e9bf 100644 --- a/diffpy/srmise/baselines/polynomial.py +++ b/diffpy/srmise/baselines/polynomial.py @@ -22,7 +22,8 @@ logger = logging.getLogger("diffpy.srmise") -class Polynomial (BaselineFunction): + +class Polynomial(BaselineFunction): """Methods for evaluation and parameter estimation of a polynomial baseline.""" def __init__(self, degree, Cache=None): @@ -41,14 +42,14 @@ def __init__(self, degree, Cache=None): emsg = "Argument degree must be an integer." raise ValueError(emsg) if self.degree < 0: - self.degree = -1 # interpreted as negative infinity + self.degree = -1 # interpreted as negative infinity # Define parameterdict # e.g. {"a_0":3, "a_1":2, "a_2":1, "a_3":0} if degree is 3. parameterdict = {} - for d in range(self.degree+1): - parameterdict["a_"+str(d)] = self.degree - d - formats = ['internal'] - default_formats = {'default_input':'internal', 'default_output':'internal'} + for d in range(self.degree + 1): + parameterdict["a_" + str(d)] = self.degree - d + formats = ["internal"] + default_formats = {"default_input": "internal", "default_output": "internal"} metadict = {} metadict["degree"] = (degree, repr) BaselineFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) @@ -81,7 +82,7 @@ def estimate_parameters(self, r, y): return np.array([]) if self.degree == 0: - return np.array([0.]) + return np.array([0.0]) if self.degree == 1: # Estimate degree=1 baseline. @@ -90,15 +91,16 @@ def estimate_parameters(self, r, y): # lies above the baseline. # TODO: Make this more sophisticated. try: - cut = np.max([len(y)/10, 1]) + cut = np.max([len(y) / 10, 1]) cut_idx = y.argsort()[:cut] import numpy.linalg as la + A = np.array([r[cut_idx]]).T slope = la.lstsq(A, y[cut_idx])[0][0] - return np.array([slope, 0.]) - except Exception, e: - emsg = "Error during estimation -- "+str(e) + return np.array([slope, 0.0]) + except Exception as e: + emsg = "Error during estimation -- " + str(e) raise raise SrMiseEstimationError(emsg) @@ -116,10 +118,10 @@ def _jacobianraw(self, pars, r, free): needed. True for evaluation, False for no evaluation. """ if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) if len(free) != self.npars: - emsg = "Argument free must have "+str(self.npars)+" elements." + emsg = "Argument free must have " + str(self.npars) + " elements." raise ValueError(emsg) jacobian = [None for p in range(self.npars)] if (free == False).sum() == self.npars: @@ -148,15 +150,13 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": pass else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): @@ -171,51 +171,54 @@ def _valueraw(self, pars, r): If degree is negative infinity, pars is an empty sequence. r: sequence or scalar over which pars is evaluated""" if len(pars) != self.npars: - emsg = "Argument pars must have "+str(self.npars)+" elements." + emsg = "Argument pars must have " + str(self.npars) + " elements." raise ValueError(emsg) return np.polyval(pars, r) def getmodule(self): return __name__ -#end of class Polynomial + +# end of class Polynomial # simple test code -if __name__ == '__main__': +if __name__ == "__main__": # Test polynomial of degree 3 - print "Testing degree 3 polynomial" - print "---------------------------" - f = Polynomial(degree = 3) + print("Testing degree 3 polynomial") + print("---------------------------") + f = Polynomial(degree=3) r = np.arange(5) pars = np.array([3, 0, 1, 2]) free = np.array([True, False, True, True]) val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) # Test polynomial of degree -oo - print "\nTesting degree -oo polynomial (== 0)" - print "------------------------------------" - f = Polynomial(degree = -1) + print("\nTesting degree -oo polynomial (== 0)") + print("------------------------------------") + f = Polynomial(degree=-1) r = np.arange(5) pars = np.array([]) free = np.array([]) val = f._valueraw(pars, r) - jac = f._jacobianraw(pars, r, free) - print "Value:\n", val - print "Jacobian: " - for j in jac: print " %s" %j + jac = f._jacobianraw(pars, r, free) + print("Value:\n", val) + print("Jacobian: ") + for j in jac: + print(" %s" % j) # Test linear estimation - print "\nTesting linear baseline estimation" - print "------------------------------------" - f = Polynomial(degree = 1) + print("\nTesting linear baseline estimation") + print("------------------------------------") + f = Polynomial(degree=1) pars = np.array([1, 0]) - r = np.arange(0, 10, .1) - y = -r + 10*np.exp(-(r-5)**2) + np.random.rand(len(r)) + r = np.arange(0, 10, 0.1) + y = -r + 10 * np.exp(-((r - 5) ** 2)) + np.random.rand(len(r)) est = f.estimate_parameters(r, y) - print "Actual baseline: ", np.array([-1, 0.]) - print "Estimated baseline: ", est + print("Actual baseline: ", np.array([-1, 0.0])) + print("Estimated baseline: ", est) diff --git a/diffpy/srmise/dataclusters.py b/diffpy/srmise/dataclusters.py index 6d4f87e..39ccf1b 100644 --- a/diffpy/srmise/dataclusters.py +++ b/diffpy/srmise/dataclusters.py @@ -169,9 +169,7 @@ def next(self): else: # insert right of nearest cluster self.lastcluster_idx = nearest_cluster[0] + 1 - self.clusters = np.insert( - self.clusters, self.lastcluster_idx, [test_idx, test_idx], 0 - ) + self.clusters = np.insert(self.clusters, self.lastcluster_idx, [test_idx, test_idx], 0) return self def makeclusters(self): @@ -266,10 +264,7 @@ def cluster_is_full(self, cluster_idx): high = self.clusters[cluster_idx + 1, 0] - 1 else: high = len(self.data_order) - 1 - return ( - self.clusters[cluster_idx, 0] == low - and self.clusters[cluster_idx, 1] == high - ) + return self.clusters[cluster_idx, 0] == low and self.clusters[cluster_idx, 1] == high def combine_clusters(self, combine): """Combine clusters specified by each subarray of cluster indices. @@ -442,9 +437,7 @@ def animate(self): # simple test code if __name__ == "__main__": - x = np.array( - [-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0] - ) + x = np.array([-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]) y = np.array( [ 0.0183156, diff --git a/diffpy/srmise/modelcluster.py b/diffpy/srmise/modelcluster.py index 1b4bfe0..2367297 100644 --- a/diffpy/srmise/modelcluster.py +++ b/diffpy/srmise/modelcluster.py @@ -58,9 +58,7 @@ class ModelCovariance(object): def __init__(self, *args, **kwds): """Intialize object.""" self.cov = None # The raw covariance matrix - self.model = ( - None # ModelParts instance, so both peaks and baseline (if present) - ) + self.model = None # ModelParts instance, so both peaks and baseline (if present) # Map i->[n1,n2,...] of the jth ModelPart to the n_i parameters in cov. self.mmap = {} @@ -98,9 +96,7 @@ def setcovariance(self, model, cov): emsg = "Parameter 'cov' must be a square matrix." raise ValueError(emsg) - if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars( - False - ): + if tempcov.shape[0] != model.npars(True) and tempcov.shape[0] != model.npars(False): emsg = [ "Parameter 'cov' must be an nxn matrix, where n is equal to the number of free ", "parameters in the model, or the total number of parameters (fixed and free) of ", @@ -248,9 +244,7 @@ def getcorrelation(self, i, j): if self.cov[i1, i1] == 0.0 or self.cov[j1, j1] == 0.0: return 0.0 # Avoiding undefined quantities is sensible in this context. else: - return self.cov[i1, j1] / ( - np.sqrt(self.cov[i1, i1]) * np.sqrt(self.cov[j1, j1]) - ) + return self.cov[i1, j1] / (np.sqrt(self.cov[i1, i1]) * np.sqrt(self.cov[j1, j1])) def getvalue(self, i): """Return value of parameter i. @@ -483,9 +477,7 @@ def writestr(self, **kwds): if self.peak_funcs is None: lines.append("peak_funcs=None") else: - lines.append( - "peak_funcs=%s" % repr([pfbaselist.index(p) for p in self.peak_funcs]) - ) + lines.append("peak_funcs=%s" % repr([pfbaselist.index(p) for p in self.peak_funcs])) if self.error_method is None: lines.append("ModelEvaluator=None") else: @@ -611,9 +603,7 @@ def factory(mcstr, **kwds): ### Instantiating baseline functions if readblf: blfbaselist = [] - res = re.split( - r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions - ) + res = re.split(r"(?m)^#+ BaselineFunction \d+\s*(?:#.*\s+)*", baselinefunctions) for s in res[1:]: blfbaselist.append(BaseFunction.factory(s, blfbaselist)) @@ -676,10 +666,7 @@ def factory(mcstr, **kwds): for line in start_data.split("\n"): l = line.split() if len(arrays) != len(l): - emsg = ( - "Number of value fields does not match that given by '%s'" - % start_data_info - ) + emsg = "Number of value fields does not match that given by '%s'" % start_data_info for a, v in zip(arrays, line.split()): a.append(float(v)) except (ValueError, IndexError) as err: @@ -765,19 +752,11 @@ def join_adjacent(m1, m2): # border_x are removed. The highly unlikely case of two peaks # exactly at the border is also handled. for i in reversed(range(len(new_model))): - if ( - new_model[i]["position"] == border_x - and i > 0 - and new_model[i - 1]["position"] == border_x - ): + if new_model[i]["position"] == border_x and i > 0 and new_model[i - 1]["position"] == border_x: del new_model[i] elif new_ids[i] != i: - if ( - new_model[i]["position"] > border_x - and new_ids[i] < len(left.model) - ) and ( - new_model[i]["position"] < border_x - and new_ids[i] >= len(left.model) + if (new_model[i]["position"] > border_x and new_ids[i] < len(left.model)) and ( + new_model[i]["position"] < border_x and new_ids[i] >= len(left.model) ): del new_model[i] @@ -826,15 +805,11 @@ def change_slice(self, new_slice): # check if slice has expanded on the left if self.never_fit and self.slice.start < old_slice.start: left_slice = slice(self.slice.start, old_slice.start) - self.never_fit = ( - max(y_data_nobl[left_slice] - self.y_error[left_slice]) < 0 - ) + self.never_fit = max(y_data_nobl[left_slice] - self.y_error[left_slice]) < 0 # check if slice has expanded on the right if self.never_fit and self.slice.stop > old_slice.stop: right_slice = slice(old_slice.stop, self.slice.stop) - self.never_fit = ( - max(y_data_nobl[right_slice] - self.y_error[right_slice]) < 0 - ) + self.never_fit = max(y_data_nobl[right_slice] - self.y_error[right_slice]) < 0 return @@ -884,9 +859,7 @@ def estimatepeak(self): # throw some exception pass selected = self.peak_funcs[0] - estimate = selected.estimate_parameters( - self.r_cluster, self.y_cluster - self.valuebl() - ) + estimate = selected.estimate_parameters(self.r_cluster, self.y_cluster - self.valuebl()) if estimate is not None: newpeak = selected.actualize(estimate, "internal") @@ -943,11 +916,7 @@ def fit( orig_baseline = self.baseline.copy() self.last_fit_size = self.size - if ( - fitbaseline - and self.baseline is not None - and self.baseline.npars(count_fixed=False) > 0 - ): + if fitbaseline and self.baseline is not None and self.baseline.npars(count_fixed=False) > 0: y_datafit = self.y_data fmodel = ModelParts(self.model) fmodel.append(self.baseline) @@ -966,18 +935,12 @@ def fit( cov_format, ) except SrMiseFitError as e: - logger.debug( - "Error while fitting cluster: %s\nReverting to original model.", e - ) + logger.debug("Error while fitting cluster: %s\nReverting to original model.", e) self.model = orig_model self.baseline = orig_baseline return None - if ( - fitbaseline - and self.baseline is not None - and self.baseline.npars(count_fixed=False) > 0 - ): + if fitbaseline and self.baseline is not None and self.baseline.npars(count_fixed=False) > 0: self.model = Peaks(fmodel[:-1]) self.baseline = fmodel[-1] else: @@ -1039,10 +1002,9 @@ def contingent_fit(self, minpoints, growth_threshold): """ if self.never_fit: return None - if ( - self.last_fit_size > 0 - and float(self.size) / self.last_fit_size >= growth_threshold - ) or (self.last_fit_size == 0 and self.size >= minpoints): + if (self.last_fit_size > 0 and float(self.size) / self.last_fit_size >= growth_threshold) or ( + self.last_fit_size == 0 and self.size >= minpoints + ): return self.fit(justify=True) return None @@ -1063,10 +1025,7 @@ def cleanfit(self): outside_idx = [ i for i in outside_idx - if ( - self.model[i].removable - and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0 - ) + if (self.model[i].removable and max(self.model[i].value(self.r_cluster) - self.error_cluster) < 0) ] # TODO: Check for peaks that have blown up. @@ -1075,14 +1034,10 @@ def cleanfit(self): # NaN is too serious not to remove, even if removable is False, but I should look # into better handling anyway. - nan_idx = [ - i for i in range(len(self.model)) if np.isnan(self.model[i].pars).any() - ] + nan_idx = [i for i in range(len(self.model)) if np.isnan(self.model[i].pars).any()] if len(outside_idx) > 0: - msg = [ - "Following peaks outside cluster made no contribution within it and were removed:" - ] + msg = ["Following peaks outside cluster made no contribution within it and were removed:"] msg.extend([str(self.model[i]) for i in outside_idx]) logger.debug("\n".join(msg)) @@ -1147,9 +1102,7 @@ def value(self, r=None): return self.valuebl(r) else: if r is None: - return self.valuebl(r) + ( - self.model.value(self.r_data, self.slice)[self.slice] - ) + return self.valuebl(r) + (self.model.value(self.r_data, self.slice)[self.slice]) else: return self.valuebl(r) + (self.model.value(r)) @@ -1360,18 +1313,12 @@ def prune(self): msg = ["len(check_models): %s", "len(best_model): %s", "i: %s"] logger.debug("\n".join(msg), len(check_models), len(best_model), i) - addpars = ( - best_model.npars() - - check_models[i].npars() - - best_model[i].npars(count_fixed=False) - ) + addpars = best_model.npars() - check_models[i].npars() - best_model[i].npars(count_fixed=False) # Remove contribution of (effectively) fixed peaks y = np.array(y_nobl) if lo > 0: - logger.debug( - "len(sum): %s", len(np.sum(best_modely[:lo], axis=0)) - ) + logger.debug("len(sum): %s", len(np.sum(best_modely[:lo], axis=0))) y -= np.sum(best_modely[:lo], axis=0) if hi < len(best_modely): y -= np.sum(best_modely[hi:], axis=0) @@ -1408,9 +1355,7 @@ def prune(self): "check_qual: %s", "sorted check_qual: %s", ] - logger.debug( - "\n".join(msg), best_qual.stat, [c.stat for c in check_qual], arg - ) + logger.debug("\n".join(msg), best_qual.stat, [c.stat for c in check_qual], arg) arg = arg[-1] newbest_qual = check_qual[arg] diff --git a/diffpy/srmise/modelevaluators/aic.py b/diffpy/srmise/modelevaluators/aic.py index 915cace..729e556 100644 --- a/diffpy/srmise/modelevaluators/aic.py +++ b/diffpy/srmise/modelevaluators/aic.py @@ -21,7 +21,8 @@ logger = logging.getLogger("diffpy.srmise") -class AIC (ModelEvaluator): + +class AIC(ModelEvaluator): """Evaluate and compare models with the AIC statistic. Akaike's Information Criterion (AIC) is a method for comparing statistical @@ -52,11 +53,11 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AIC (Akaike's Information Criterion). - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + Parameters + fit: A ModelCluster + count_fixed: Whether fixed parameters are considered. + kshift: (0) Treat the model has having this many additional + parameters. Negative values also allowed.""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -77,7 +78,6 @@ def evaluate(self, fit, count_fixed=False, kshift=0): return self.stat - def minpoints(self, npars): """Calculates the minimum number of points required to make an estimate of a model's quality.""" @@ -86,36 +86,37 @@ def minpoints(self, npars): def parpenalty(self, k, n): """Returns the cost for adding k parameters to the current model cluster.""" - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 - return (2*k)*fudgefactor + return (2 * k) * fudgefactor def growth_justified(self, fit, k_prime): """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified + if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which + depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AIC with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AIC is defined for the actual fit if n < self.minpoints(k_actual): logger.warn("AIC.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @@ -125,24 +126,25 @@ def akaikeweights(aics): aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): """Return sequence of Akaike probabilities for sequence of AICs""" aic_weights = AIC.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AIC # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AIC() - m2=AIC() + m1 = AIC() + m2 = AIC() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1) diff --git a/diffpy/srmise/modelevaluators/aicc.py b/diffpy/srmise/modelevaluators/aicc.py index 57f0713..1e31b90 100644 --- a/diffpy/srmise/modelevaluators/aicc.py +++ b/diffpy/srmise/modelevaluators/aicc.py @@ -21,7 +21,8 @@ logger = logging.getLogger("diffpy.srmise") -class AICc (ModelEvaluator): + +class AICc(ModelEvaluator): """Evaluate and compare models with the AICc statistic. Akaike's Information Criterion w/ 2nd order correction for small sample @@ -50,13 +51,13 @@ def __init__(self): def evaluate(self, fit, count_fixed=False, kshift=0): """Return quality of fit for given ModelCluster using AICc (Akaike's Information Criterion - with 2nd order correction for small sample size). + with 2nd order correction for small sample size). - Parameters - fit: A ModelCluster - count_fixed: Whether fixed parameters are considered. - kshift: (0) Treat the model has having this many additional - parameters. Negative values also allowed.""" + Parameters + fit: A ModelCluster + count_fixed: Whether fixed parameters are considered. + kshift: (0) Treat the model has having this many additional + parameters. Negative values also allowed.""" # Number of parameters. By default, fixed parameters are ignored. k = fit.model.npars(count_fixed=count_fixed) + kshift if k < 0: @@ -77,7 +78,6 @@ def evaluate(self, fit, count_fixed=False, kshift=0): return self.stat - def minpoints(self, npars): """Calculates the minimum number of points required to make an estimate of a model's quality.""" @@ -88,36 +88,37 @@ def minpoints(self, npars): def parpenalty(self, k, n): """Returns the cost for adding k parameters to the current model cluster.""" - #Weight the penalty for additional parameters. - #If this isn't 1 there had better be a good reason. - fudgefactor = 1. + # Weight the penalty for additional parameters. + # If this isn't 1 there had better be a good reason. + fudgefactor = 1.0 - return (2*k+float(2*k*(k+1))/(n-k-1))*fudgefactor + return (2 * k + float(2 * k * (k + 1)) / (n - k - 1)) * fudgefactor def growth_justified(self, fit, k_prime): """Returns whether adding k_prime parameters to the given model (ModelCluster) is justified given the current quality of the fit. - The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified - if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which - depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future.""" + The assumption is that adding k_prime parameters will result in "effectively 0" chiSquared cost, and so adding it is justified + if the cost of adding these parameters is less than the current chiSquared cost. The validity of this assumption (which + depends on an unknown chiSquared value) and the impact of the errors used should be examined more thoroughly in the future. + """ if self.chisq is None: self.chisq = self.chi_squared(fit.value(), fit.y_cluster, fit.error_cluster) - k_actual = fit.model.npars(count_fixed=False) #parameters in current fit - k_test = k_actual + k_prime #parameters in prospective fit - n = fit.size #the number of data points included in the fit + k_actual = fit.model.npars(count_fixed=False) # parameters in current fit + k_test = k_actual + k_prime # parameters in prospective fit + n = fit.size # the number of data points included in the fit # If there are too few points to calculate AICc with the requested number of parameter # then clearly that increase in parameters is not justified. if n < self.minpoints(k_test): return False - #assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit + # assert n >= self.minPoints(kActual) #check that AICc is defined for the actual fit if n < self.minpoints(k_actual): logger.warn("AICc.growth_justified(): too few data to evaluate quality reliably.") - n=self.minpoints(k_actual) + n = self.minpoints(k_actual) - penalty=self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) + penalty = self.parpenalty(k_test, n) - self.parpenalty(k_actual, n) return penalty < self.chisq @@ -127,24 +128,25 @@ def akaikeweights(aics): aic_stats = np.array([aic.stat for aic in aics]) aic_min = min(aic_stats) - return np.exp(-(aic_stats-aic_min)/2.) + return np.exp(-(aic_stats - aic_min) / 2.0) @staticmethod def akaikeprobs(aics): """Return sequence of Akaike probabilities for sequence of AICs""" aic_weights = AICc.akaikeweights(aics) - return aic_weights/np.sum(aic_weights) + return aic_weights / np.sum(aic_weights) + # end of class AICc # simple test code -if __name__ == '__main__': +if __name__ == "__main__": - m1=AICc() - m2=AICc() + m1 = AICc() + m2 = AICc() m1.stat = 20 m2.stat = 30 - print m2 > m1 + print(m2 > m1) diff --git a/diffpy/srmise/modelevaluators/base.py b/diffpy/srmise/modelevaluators/base.py index eb28272..54c304a 100644 --- a/diffpy/srmise/modelevaluators/base.py +++ b/diffpy/srmise/modelevaluators/base.py @@ -68,9 +68,7 @@ def __lt__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat != None and other.stat != None # The statistic must already be calculated if self.higher_is_better: return self.stat < other.stat @@ -81,9 +79,7 @@ def __le__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat != None and other.stat != None # The statistic must already be calculated if self.higher_is_better: return self.stat <= other.stat @@ -94,9 +90,7 @@ def __eq__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat != None and other.stat != None # The statistic must already be calculated return self.stat == other.stat @@ -104,9 +98,7 @@ def __ne__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat != None and other.stat != None # The statistic must already be calculated return self.stat != other.stat @@ -114,9 +106,7 @@ def __gt__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat != None and other.stat != None # The statistic must already be calculated if self.higher_is_better: return self.stat > other.stat @@ -127,9 +117,7 @@ def __ge__(self, other): """ """ assert self.method == other.method # Comparison between same types required - assert ( - self.stat != None and other.stat != None - ) # The statistic must already be calculated + assert self.stat != None and other.stat != None # The statistic must already be calculated if self.higher_is_better: return self.stat >= other.stat diff --git a/diffpy/srmise/modelparts.py b/diffpy/srmise/modelparts.py index 4943e0a..6409d20 100644 --- a/diffpy/srmise/modelparts.py +++ b/diffpy/srmise/modelparts.py @@ -173,11 +173,7 @@ def fit( y, r, (y - self.value(r, range=range)) - 1.1 * (max(y) - min(y)), - *[ - i - for sublist in [[r, p.value(r, range=range)] for p in self] - for i in sublist - ] + *[i for sublist in [[r, p.value(r, range=range)] for p in self] for i in sublist], ) plt.draw() @@ -193,9 +189,7 @@ def fit( # clean up parameters for p in self: - p.pars = p.owner().transform_parameters( - p.pars, in_format="internal", out_format="internal" - ) + p.pars = p.owner().transform_parameters(p.pars, in_format="internal", out_format="internal") # Supply estimated covariance matrix if requested. # The precise relationship between f[1] and estimated covariance matrix is a little unclear from @@ -282,8 +276,7 @@ def residual_jacobian(self, freepars, r, y_expected, y_error, range=None): """ if len(freepars) == 0: raise ValueError( - "Argument freepars has length 0. The Jacobian " - "is only defined with >=1 free parameters." + "Argument freepars has length 0. The Jacobian " "is only defined with >=1 free parameters." ) self.pack_freepars(freepars) @@ -340,10 +333,7 @@ def covariance(self, format="internal", **kwds): try: idx = int(k[1:]) except ValueError: - emsg = ( - "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." - % k - ) + emsg = "Invalid format keyword '%s'. They must be specified as 'f0', 'f1', etc." % k raise ValueError(emsg) formats[int(k[1:])] = v @@ -431,10 +421,7 @@ def __init__(self, owner, pars, free=None, removable=True, static_owner=False): self._owner = owner if len(pars) != owner.npars: - emsg = ( - "The length of pars must equal the number of parameters " - + "specified by the model part owner." - ) + emsg = "The length of pars must equal the number of parameters " + "specified by the model part owner." raise ValueError(emsg) self.pars = np.array(pars[:]) # pars[:] in case pars is a ModelPart @@ -466,10 +453,7 @@ def changeowner(self, owner): emsg = "Cannot change owner if static_owner is True." raise SrMiseStaticOwnerError(emsg) if self._owner.npars != owner.npars: - emsg = ( - "New owner specifies different number of parameters than " - + "original owner." - ) + emsg = "New owner specifies different number of parameters than " + "original owner." raise SrMiseStaticOwnerError(emsg) self._owner = owner @@ -524,9 +508,7 @@ def copy(self): The original and the copy are completely independent, except they both reference the same owner.""" - return type(self).__call__( - self._owner, self.pars, self.free, self.removable, self.static_owner - ) + return type(self).__call__(self._owner, self.pars, self.free, self.removable, self.static_owner) def __getitem__(self, key_or_idx): """Return parameter of peak corresponding with key_or_idx. @@ -580,11 +562,7 @@ def npars(self, count_fixed=True): def __str__(self): """Return string representation of ModelPart parameters.""" - return str( - self._owner.transform_parameters( - self.pars, in_format="internal", out_format="default_output" - ) - ) + return str(self._owner.transform_parameters(self.pars, in_format="internal", out_format="default_output")) def __eq__(self, other): """ """ diff --git a/diffpy/srmise/multimodelselection.py b/diffpy/srmise/multimodelselection.py index 01995e2..53773f1 100644 --- a/diffpy/srmise/multimodelselection.py +++ b/diffpy/srmise/multimodelselection.py @@ -54,9 +54,7 @@ def __init__(self): self.classweights = {} self.classprobs = {} self.sortedclassprobs = {} - self.sortedclasses = ( - {} - ) # dg->as self.classes, but with model indices sorted by best AIC + self.sortedclasses = {} # dg->as self.classes, but with model indices sorted by best AIC PeakStability.__init__(self) return @@ -71,9 +69,7 @@ def makeaics(self, dgs, dr, filename=None): nominal value. filename - Optional file to save pickled results """ - aics_out = ( - {} - ) # Version of self.aics that holds only the statistic, not the AIC object. + aics_out = {} # Version of self.aics that holds only the statistic, not the AIC object. self.dgs = np.array(dgs) for i, dg in enumerate(self.dgs): self.dgs_idx[dg] = i @@ -100,17 +96,13 @@ def makeaics(self, dgs, dr, filename=None): # modelevaluators subpackage are in need of a rewrite, and so it would be # best to do them all at once. dg0 = self.dgs[0] - mc = ModelCluster( - result[1], result[2], r, y, dg0 * np.ones(len(r)), None, em, self.ppe.pf - ) + mc = ModelCluster(result[1], result[2], r, y, dg0 * np.ones(len(r)), None, em, self.ppe.pf) em0 = mc.quality() for dg in self.dgs: em_instance = em() em_instance.chisq = em0.chisq * (dg0 / dg) ** 2 # rescale chi-square - em_instance.evaluate( - mc - ) # evaluate AIC without recalculating chi-square + em_instance.evaluate(mc) # evaluate AIC without recalculating chi-square self.aics[dg].append(em_instance) aics_out[dg].append(em_instance.stat) @@ -198,9 +190,7 @@ def animate_probs(self, step=False, duration=0.0, **kwds): best_idx = self.sortedprobs[self.dgs[0]][-1] (line,) = plt.plot(self.dgs, self.aicprobs[self.dgs[0]]) vline = plt.axvline(self.dgs[0]) - (dot,) = plt.plot( - self.dgs[best_idx], self.aicprobs[self.dgs[0]][best_idx], "ro" - ) + (dot,) = plt.plot(self.dgs[best_idx], self.aicprobs[self.dgs[0]][best_idx], "ro") plt.subplot(212) self.setcurrent(best_idx) @@ -244,9 +234,7 @@ def animate_classprobs(self, step=False, duration=0.0, **kwds): arrow_left = len(self.classes) - 1 arrow_right = arrow_left + 0.05 * arrow_left (line,) = plt.plot(range(len(self.classes)), self.classprobs[self.dgs[0]]) - (dot,) = plt.plot( - self.dgs[best_idx], self.classprobs[self.dgs[0]][bestclass_idx], "ro" - ) + (dot,) = plt.plot(self.dgs[best_idx], self.classprobs[self.dgs[0]][bestclass_idx], "ro") plt.axvline(arrow_left, color="k") ax2 = ax1.twinx() ax2.set_ylim(self.dgs[0], self.dgs[-1]) @@ -315,9 +303,7 @@ def classify(self, r, tolerance=0.05): self.classes_idx = {} self.class_tolerance = None - classes = ( - [] - ) # each element is a list of the models (result indices) in the class + classes = [] # each element is a list of the models (result indices) in the class classes_idx = {} # given an integer corresponding to a model, return its class epsqval = {} # holds the squared value of each class' exemplar peaks ebsqval = {} # holds the squared value of each class exemplar baselines @@ -358,9 +344,7 @@ def classify(self, r, tolerance=0.05): for p, ep in zip(psqval, epsqval[c]): basediff = np.abs(np.sum(p - ep)) # if basediff > tolerance*np.sum(ep): - if basediff > tolerance * np.sum( - ep - ) or basediff > tolerance * np.sum(p): + if basediff > tolerance * np.sum(ep) or basediff > tolerance * np.sum(p): badpeak = True break if badpeak: @@ -369,9 +353,7 @@ def classify(self, r, tolerance=0.05): # check baseline values basediff = np.abs(np.sum(bsqval - ebsqval[c])) # if basediff > tolerance*np.sum(ebsqval[c]): - if basediff > tolerance * np.sum( - ebsqval[c] - ) or basediff > tolerance * np.sum(bsqval): + if basediff > tolerance * np.sum(ebsqval[c]) or basediff > tolerance * np.sum(bsqval): continue # that's all the checks, add to current class @@ -419,9 +401,7 @@ def makeclassweights(self): for dg in self.dgs: bestinclass = [cls[-1] for cls in self.sortedclasses[dg]] - self.classweights[dg] = em.akaikeweights( - [self.aics[dg][b] for b in bestinclass] - ) + self.classweights[dg] = em.akaikeweights([self.aics[dg][b] for b in bestinclass]) def makeclassprobs(self): self.classprobs = {} @@ -429,9 +409,7 @@ def makeclassprobs(self): for dg in self.dgs: bestinclass = [cls[-1] for cls in self.sortedclasses[dg]] - self.classprobs[dg] = em.akaikeprobs( - [self.aics[dg][b] for b in bestinclass] - ) + self.classprobs[dg] = em.akaikeprobs([self.aics[dg][b] for b in bestinclass]) def makesortedclassprobs(self): self.sortedclassprobs = {} @@ -577,9 +555,7 @@ def plot3dclassprobs(self, **kwds): elif norm is "full": mcolor = len(self.results) if class_size is "number": - norm = colors.BoundaryNorm( - np.linspace(0, mcolor + 1, mcolor + 2), mcolor + 1 - ) + norm = colors.BoundaryNorm(np.linspace(0, mcolor + 1, mcolor + 2), mcolor + 1) if class_size is "fraction": norm = colors.Normalize(0.0, 1.0) @@ -637,18 +613,14 @@ def plot3dclassprobs(self, **kwds): cbaxis = fig.add_axes(rect) # Remove all colorbar.make_axes keywords except orientation - kwds = eatkwds( - "fraction", "pad", "shrink", "aspect", "anchor", "panchor", **kwds - ) + kwds = eatkwds("fraction", "pad", "shrink", "aspect", "anchor", "panchor", **kwds) else: kwds.setdefault("shrink", 0.75) # In matplotlib 1.1.0 make_axes_gridspec ignores anchor and panchor keywords. # Eat these keywords for now. kwds = eatkwds("anchor", "panchor", **kwds) - cbaxis, kwds = colorbar.make_axes_gridspec( - ax, **kwds - ) # gridspec allows tight_layout + cbaxis, kwds = colorbar.make_axes_gridspec(ax, **kwds) # gridspec allows tight_layout plt.tight_layout() # do it after cbaxis, so colorbar isn't ignored cb = colorbar.ColorbarBase(cbaxis, cmap=cmap, norm=norm, **kwds) diff --git a/diffpy/srmise/pdfdataset.py b/diffpy/srmise/pdfdataset.py index feb64fa..034a93d 100644 --- a/diffpy/srmise/pdfdataset.py +++ b/diffpy/srmise/pdfdataset.py @@ -166,10 +166,10 @@ def read(self, filename): self.readStr(open(filename, "rb").read()) except PDFDataFormatError as err: basename = os.path.basename(filename) - emsg = ( - "Could not open '%s' due to unsupported file format " - + "or corrupted data. [%s]" - ) % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) self.filename = os.path.abspath(filename) return self @@ -350,10 +350,7 @@ def writeStr(self): lines.append("##### start data") lines.append("#L r(A) G(r) d_r d_Gr") for i in range(len(self.robs)): - lines.append( - "%g %g %g %g" - % (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i]) - ) + lines.append("%g %g %g %g" % (self.robs[i], self.Gobs[i], self.drobs[i], self.dGobs[i])) # that should be it datastring = "\n".join(lines) + "\n" return datastring diff --git a/diffpy/srmise/pdfpeakextraction.py b/diffpy/srmise/pdfpeakextraction.py index 06bd2be..1971bce 100644 --- a/diffpy/srmise/pdfpeakextraction.py +++ b/diffpy/srmise/pdfpeakextraction.py @@ -234,8 +234,7 @@ def resampledata(self, dr, **kwds): # Not a robust epsilon test, but all physical Nyquist rates in same oom. if dr - dr_nyquist > eps: logger.warning( - "Resampling at %s, below Nyquist rate of %s. Information will be lost!" - % (dr, dr_nyquist) + "Resampling at %s, below Nyquist rate of %s. Information will be lost!" % (dr, dr_nyquist) ) r = np.arange(max(self.x[0], self.rng[0]), min(self.x[-1], self.rng[1]), dr) @@ -440,9 +439,7 @@ def extract(self, **kwds): break else: - ext = ModelCluster( - ext.model, bl, r1, y1, y_error1, None, self.error_method, self.pf - ) + ext = ModelCluster(ext.model, bl, r1, y1, y_error1, None, self.error_method, self.pf) ext.prune() logger.info("Model after resampling and termination ripples:\n%s", ext) @@ -457,9 +454,7 @@ def extract(self, **kwds): logger.info(str(cov)) # logger.info("Correlations > .8:\n%s", "\n".join(str(c) for c in cov.correlationwarning(.8))) except SrMiseUndefinedCovarianceError as e: - logger.warn( - "Covariance not defined for final model. Fit may not have converged." - ) + logger.warn("Covariance not defined for final model. Fit may not have converged.") logger.info(str(ext)) # Update calculated instance variables @@ -532,9 +527,7 @@ def fit(self, **kwds): try: logger.info(str(cov)) except SrMiseUndefinedCovarianceError as e: - logger.warn( - "Covariance not defined for final model. Fit may not have converged." - ) + logger.warn("Covariance not defined for final model. Fit may not have converged.") # Update calculated instance variables self.extraction_type = "fit" @@ -703,13 +696,8 @@ def writepwastr(self, comments): lines.append("## Model Quality") # Quality of fit - lines.append( - "# Quality reported by ModelEvaluator: %s" % self.extracted.quality().stat - ) - lines.append( - "# Free parameters in extracted peaks: %s" - % self.extracted.model.npars(count_fixed=False) - ) + lines.append("# Quality reported by ModelEvaluator: %s" % self.extracted.quality().stat) + lines.append("# Free parameters in extracted peaks: %s" % self.extracted.model.npars(count_fixed=False)) if self.baseline is not None: fblpars = self.baseline.npars(count_fixed=False) else: @@ -830,15 +818,11 @@ def resample(orig_r, orig_y, new_r): dr = (orig_r[-1] - orig_r[0]) / (n - 1) if new_r[0] < orig_r[0]: - logger.warning( - "Resampling outside original grid: %s (requested) < %s (original)" - % (new_r[0], orig_r[0]) - ) + logger.warning("Resampling outside original grid: %s (requested) < %s (original)" % (new_r[0], orig_r[0])) if new_r[-1] > orig_r[-1]: logger.warning( - "Resampling outside original grid: %s (requested) > %s (original)" - % (new_r[-1], orig_r[-1]) + "Resampling outside original grid: %s (requested) > %s (original)" % (new_r[-1], orig_r[-1]) ) new_y = new_r * 0.0 diff --git a/diffpy/srmise/peakextraction.py b/diffpy/srmise/peakextraction.py index 8f1d404..30235e6 100644 --- a/diffpy/srmise/peakextraction.py +++ b/diffpy/srmise/peakextraction.py @@ -179,9 +179,7 @@ def defaultvars(self, *args): self.effective_dy = self.dy else: # A terribly crude guess - self.effective_dy = ( - 0.05 * (np.max(self.y) - np.min(self.y)) * np.ones(len(self.x)) - ) + self.effective_dy = 0.05 * (np.max(self.y) - np.min(self.y)) * np.ones(len(self.x)) elif np.isscalar(self.effective_dy) and self.effective_dy > 0: self.effective_dy = self.effective_dy * np.ones(len(self.x)) @@ -207,9 +205,7 @@ def defaultvars(self, *args): self.baseline = self.baseline.actualize(epars, "internal") logger.info("Estimating baseline: %s" % self.baseline) except (NotImplementedError, SrMiseEstimationError): - logger.error( - "Could not estimate baseline from provided BaselineFunction, trying default values." - ) + logger.error("Could not estimate baseline from provided BaselineFunction, trying default values.") self.baseline = None if self.baseline is None or "baseline" in args: @@ -282,10 +278,10 @@ def read(self, filename): except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ( - "Could not open '%s' due to unsupported file format " - + "or corrupted data. [%s]" - ) % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) return self @@ -466,10 +462,7 @@ def readstr(self, datastring): for line in start_data.split("\n"): l = line.split() if len(arrays) != len(l): - emsg = ( - "Number of value fields does not match that given by '%s'" - % start_data_info - ) + emsg = "Number of value fields does not match that given by '%s'" % start_data_info for a, v in zip(arrays, line.split()): a.append(float(v)) except (ValueError, IndexError) as err: @@ -503,9 +496,7 @@ def readstr(self, datastring): if re.match(r"^None$", mc): self.extracted = None else: - self.extracted = ModelCluster.factory( - mc, pfbaselist=safepf, blfbaselist=safebf - ) + self.extracted = ModelCluster.factory(mc, pfbaselist=safepf, blfbaselist=safebf) def write(self, filename): """Write string representation of PeakExtraction instance to file. @@ -706,11 +697,7 @@ def estimate_peak(self, x, add=True): # Determine clusters using initial_peaks and pre-defined or estimated baseline rangeslice = self.getrangeslice() x1 = self.x[rangeslice] - y1 = ( - self.y[rangeslice] - - self.baseline.value(x1) - - self.initial_peaks.value(x1) - ) + y1 = self.y[rangeslice] - self.baseline.value(x1) - self.initial_peaks.value(x1) dy = self.effective_dy[rangeslice] if x < x1[0] or x > x1[-1]: @@ -730,9 +717,7 @@ def estimate_peak(self, x, add=True): y1 = y1[cslice] dy = dy[cslice] - mcluster = ModelCluster( - None, None, x1, y1, dy, None, self.error_method, self.pf - ) + mcluster = ModelCluster(None, None, x1, y1, dy, None, self.error_method, self.pf) mcluster.fit() if len(mcluster.model) > 0: @@ -798,11 +783,7 @@ def extract_single(self, recursion_depth=1): y = self.y[rangeslice] - ip.value(x) # List of ModelClusters containing extracted peaks. - mclusters = [ - ModelCluster( - None, bl, x, y, dy, dclusters.cut(0), self.error_method, self.pf - ) - ] + mclusters = [ModelCluster(None, bl, x, y, dy, dclusters.cut(0), self.error_method, self.pf)] # The minimum number of points required to make a valid fit, as # determined by the peak functions and error method used. This is a @@ -844,9 +825,7 @@ def extract_single(self, recursion_depth=1): ) else: # Update an existing cluster - mclusters[step.lastcluster_idx].change_slice( - step.cut(step.lastcluster_idx) - ) + mclusters[step.lastcluster_idx].change_slice(step.cut(step.lastcluster_idx)) # Find newly adjacent clusters adjacent = step.find_adjacent_clusters().ravel() @@ -937,9 +916,7 @@ def extract_single(self, recursion_depth=1): right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate( - [np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))] - ) + other_peaks = np.concatenate([np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))]) # Go from indices to lists of peaks. near_peaks = Peaks([full_cluster.model[i] for i in near_peaks]) @@ -983,9 +960,7 @@ def extract_single(self, recursion_depth=1): self.error_method, self.pf, ) - recurse = len(near_peaks) > 0 and checkrec.quality().growth_justified( - checkrec, min_npars - ) + recurse = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) if recurse and recursion_depth < 3: logger.info( @@ -1004,8 +979,7 @@ def extract_single(self, recursion_depth=1): rec_search.extract_single(recursion_depth + 1) rec = rec_search.extracted logger.info( - "*********ENDING RECURSION level %s (full boundary) ************\n" - % (recursion_depth + 1) + "*********ENDING RECURSION level %s (full boundary) ************\n" % (recursion_depth + 1) ) # Incorporate best peaks from recursive search. @@ -1024,9 +998,7 @@ def extract_single(self, recursion_depth=1): "%s", "---End of combining clusters---", ] - logger.debug( - "\n".join(msg), mclusters[step.lastcluster_idx], full_cluster - ) + logger.debug("\n".join(msg), mclusters[step.lastcluster_idx], full_cluster) mclusters[step.lastcluster_idx] = full_cluster ### End update cluster fits ### @@ -1043,9 +1015,7 @@ def extract_single(self, recursion_depth=1): cleft = step.clusters[idx - 1] cright = step.clusters[idx] - new_cluster = ModelCluster.join_adjacent( - mclusters[idx - 1], mclusters[idx] - ) + new_cluster = ModelCluster.join_adjacent(mclusters[idx - 1], mclusters[idx]) # Estimate coordinate where clusters combine. border_x = 0.5 * (x[cleft[1]] + x[cright[0]]) @@ -1089,9 +1059,7 @@ def extract_single(self, recursion_depth=1): right_data = min(len(x), x.searchsorted(peak_pos[pivot + 1]) + 1) near_peaks = np.append(near_peaks, pivot) - other_peaks = np.concatenate( - [np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))] - ) + other_peaks = np.concatenate([np.arange(0, pivot - 1), np.arange(pivot + 1, len(peak_pos))]) # Go from indices to lists of peaks. near_peaks = Peaks([new_cluster.model[i] for i in near_peaks]) @@ -1202,14 +1170,11 @@ def extract_single(self, recursion_depth=1): self.error_method, self.pf, ) - recurse2 = len(near_peaks) > 0 and checkrec.quality().growth_justified( - checkrec, min_npars - ) + recurse2 = len(near_peaks) > 0 and checkrec.quality().growth_justified(checkrec, min_npars) if recurse2 and recursion_depth < 3: logger.info( - "\n*********STARTING RECURSION level %s (prefit)************" - % (recursion_depth + 1) + "\n*********STARTING RECURSION level %s (prefit)************" % (recursion_depth + 1) ) rec_search2 = PeakExtraction() rec_search2.setdata(rec_r2, rec_y2, None, rec_error2) @@ -1223,8 +1188,7 @@ def extract_single(self, recursion_depth=1): rec_search2.extract_single(recursion_depth + 1) rec2 = rec_search2.extracted logger.info( - "*********ENDING RECURSION level %s (prefit) ************\n" - % (recursion_depth + 1) + "*********ENDING RECURSION level %s (prefit) ************\n" % (recursion_depth + 1) ) # Incorporate best peaks from recursive search. @@ -1255,9 +1219,7 @@ def extract_single(self, recursion_depth=1): "---End of combining clusters---", ] - logger.debug( - "\n".join(msg), mclusters[idx - 1], mclusters[idx], new_cluster - ) + logger.debug("\n".join(msg), mclusters[idx - 1], mclusters[idx], new_cluster) mclusters[idx - 1] = new_cluster del mclusters[idx] diff --git a/diffpy/srmise/peaks/base.py b/diffpy/srmise/peaks/base.py index d8e3469..6e87d83 100644 --- a/diffpy/srmise/peaks/base.py +++ b/diffpy/srmise/peaks/base.py @@ -22,6 +22,7 @@ logger = logging.getLogger("diffpy.srmise") + class PeakFunction(BaseFunction): """Base class for functions which represent peaks. @@ -60,7 +61,15 @@ class PeakFunction(BaseFunction): transform_parameters() """ - def __init__(self, parameterdict, parformats, default_formats, metadict, base=None, Cache=None): + def __init__( + self, + parameterdict, + parformats, + default_formats, + metadict, + base=None, + Cache=None, + ): """Set parameterdict defined by subclass parameterdict: A dictionary mapping string keys to their index in a @@ -82,24 +91,31 @@ def __init__(self, parameterdict, parformats, default_formats, metadict, base=No raise ValueError(emsg) BaseFunction.__init__(self, parameterdict, parformats, default_formats, metadict, base, Cache) - #### "Virtual" class methods #### def scale_at(self, peak, x, scale): emsg = "scale_at must be implemented in a PeakFunction subclass." raise NotImplementedError(emsg) - #### Methods required by BaseFunction #### - def actualize(self, pars, in_format="default_input", free=None, removable=True, static_owner=False): + def actualize( + self, + pars, + in_format="default_input", + free=None, + removable=True, + static_owner=False, + ): converted = self.transform_parameters(pars, in_format, out_format="internal") return Peak(self, converted, free, removable, static_owner) def getmodule(self): return __name__ -#end of class PeakFunction + +# end of class PeakFunction + class Peaks(ModelParts): """A collection for Peak objects.""" @@ -110,12 +126,12 @@ def __init__(self, *args, **kwds): def argsort(self, key="position"): """Return sequence of indices which sort peaks in order specified by key.""" - keypars=np.array([p[key] for p in self]) + keypars = np.array([p[key] for p in self]) # In normal use the peaks will already be sorted, so check for it. - sorted=True - for i in range(len(keypars)-1): - if keypars[i] > keypars[i+1]: - sorted=False + sorted = True + for i in range(len(keypars) - 1): + if keypars[i] > keypars[i + 1]: + sorted = False break if not sorted: return keypars.argsort().tolist() @@ -142,14 +158,14 @@ def match_at(self, x, y): orig = self.copy() try: - scale = y/height + scale = y / height # First attempt at scaling peaks. Record which peaks, if any, # were not scaled in case a second attempt is required. scaled = [] all_scaled = True any_scaled = False - fixed_height = 0. + fixed_height = 0.0 for peak in self: scaled.append(peak.scale_at(x, scale)) all_scaled = all_scaled and scaled[-1] @@ -161,13 +177,13 @@ def match_at(self, x, y): if not all_scaled and fixed_height < y and fixed_height < height: self[:] = orig[:] any_scaled = False - scale = (y - fixed_height)/(height - fixed_height) + scale = (y - fixed_height) / (height - fixed_height) for peak, s in (self, scaled): if s: # "or" is short-circuited, so scale_at() must be first # to guarantee it is called. any_scaled = peak.scale_at(x, scale) or any_scaled - except Exception, e: + except Exception as e: logger.debug("An exception prevented matching -- %s", e) self[:] = orig[:] return False @@ -175,13 +191,15 @@ def match_at(self, x, y): def sort(self, key="position"): """Sort peaks in order specified by key.""" - keypars=np.array([p[key] for p in self]) + keypars = np.array([p[key] for p in self]) order = keypars.argsort() self[:] = [self[idx] for idx in order] return + # End of class Peaks + class Peak(ModelPart): """Represents a single peak associated with a PeakFunction subclass.""" @@ -225,7 +243,7 @@ def scale_at(self, x, scale): try: adj_pars = self._owner.scale_at(self.pars, x, scale) - except SrMiseScalingError, err: + except SrMiseScalingError as err: logger.debug("Cannot scale peak:", err) return False @@ -256,10 +274,10 @@ def factory(peakstr, ownerlist): try: pdict[l[0]] = eval(l[1]) except Exception: - emsg = ("Invalid parameter: %s" %d) + emsg = "Invalid parameter: %s" % d raise SrMiseDataFormatError(emsg) else: - emsg = ("Invalid parameter: %s" %d) + emsg = "Invalid parameter: %s" % d raise SrMiseDataFormatError(emsg) # Correctly initialize the base function, if one exists. @@ -271,10 +289,11 @@ def factory(peakstr, ownerlist): return Peak(**pdict) + # End of class Peak # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn @@ -283,26 +302,26 @@ def factory(peakstr, ownerlist): from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import GaussianOverR - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) #default unknown errors - pf = GaussianOverR(.7) + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = GaussianOverR(0.7) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.actualize(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf.actualize(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y, r, yfit) diff --git a/diffpy/srmise/peaks/gaussian.py b/diffpy/srmise/peaks/gaussian.py index 3eae227..3913b6c 100644 --- a/diffpy/srmise/peaks/gaussian.py +++ b/diffpy/srmise/peaks/gaussian.py @@ -22,20 +22,21 @@ logger = logging.getLogger("diffpy.srmise") -class Gaussian (PeakFunction): + +class Gaussian(PeakFunction): """Methods for evaluation and parameter estimation of width-limited Gaussian. - Allowed formats are - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Allowed formats are + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] - The internal parameterization is unconstrained, but are interpreted - so that the width is between 0 and a user-provided maximum full width - at half maximum, and the area is positive. + The internal parameterization is unconstrained, but are interpreted + so that the width is between 0 and a user-provided maximum full width + at half maximum, and the area is positive. - Note that all full width at half maximum values are for the - corresponding Gaussian. + Note that all full width at half maximum values are for the + corresponding Gaussian. """ # Possibly implement cutoff later, but low priority. @@ -46,9 +47,9 @@ class Gaussian (PeakFunction): def __init__(self, maxwidth, Cache=None): """maxwidth defined as full width at half maximum for the corresponding Gaussian, which is physically relevant.""" - parameterdict={'position':0,'width':1,'area':2} - formats=['internal','pwa','mu_sigma_area'] - default_formats={'default_input':'internal', 'default_output':'pwa'} + parameterdict = {"position": 0, "width": 1, "area": 2} + formats = ["internal", "pwa", "mu_sigma_area"] + default_formats = {"default_input": "internal", "default_output": "pwa"} metadict = {} metadict["maxwidth"] = (maxwidth, repr) PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) @@ -59,16 +60,16 @@ def __init__(self, maxwidth, Cache=None): self.maxwidth = maxwidth ### Useful constants ### - #c1 and c2 help with function values - self.c1 = self.maxwidth*np.sqrt(np.pi/(8*np.log(2))) - self.c2 = self.maxwidth**2/(8*np.log(2)) + # c1 and c2 help with function values + self.c1 = self.maxwidth * np.sqrt(np.pi / (8 * np.log(2))) + self.c2 = self.maxwidth**2 / (8 * np.log(2)) - #c3 and c4 help with parameter estimation - self.c3 = .5*np.sqrt(np.pi/np.log(2)) - self.c4 = np.pi/(self.maxwidth*2) + # c3 and c4 help with parameter estimation + self.c3 = 0.5 * np.sqrt(np.pi / np.log(2)) + self.c4 = np.pi / (self.maxwidth * 2) - #convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma - self.sigma2fwhm = 2*np.sqrt(2*np.log(2)) + # convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma + self.sigma2fwhm = 2 * np.sqrt(2 * np.log(2)) return @@ -102,39 +103,49 @@ def estimate_parameters(self, r, y): raise SrMiseEstimationError(emsg) #### Estimation #### - guesspars = np.array([0., 0., 0.], dtype=float) + guesspars = np.array([0.0, 0.0, 0.0], dtype=float) min_y = use_y.min() max_y = use_y.max() center = use_r[use_y.argmax()] if min_y != max_y: - weights = (use_y-min_y)**2 - guesspars[0] = np.sum(use_r*weights)/sum(weights) + weights = (use_y - min_y) ** 2 + guesspars[0] = np.sum(use_r * weights) / sum(weights) # guesspars[0] = center if use_y[0] < max_y: - sigma_left = np.sqrt(-.5*(use_r[0]-guesspars[0])**2/np.log(use_y[0]/max_y)) + sigma_left = np.sqrt(-0.5 * (use_r[0] - guesspars[0]) ** 2 / np.log(use_y[0] / max_y)) else: - sigma_left = np.sqrt(-.5*np.mean(np.abs(np.array([use_r[0]-guesspars[0], use_r[-1]-guesspars[0]])))**2/np.log(min_y/max_y)) - if use_y[-1] self.maxwidth: - #account for width-limit - guesspars[2] = self.c3*max_y*self.maxwidth - guesspars[1] = np.pi/2 #parameterized in terms of sin + # account for width-limit + guesspars[2] = self.c3 * max_y * self.maxwidth + guesspars[1] = np.pi / 2 # parameterized in terms of sin else: - guesspars[2] = self.c3*max_y*guesspars[1] - guesspars[1] = np.arcsin(2*guesspars[1]**2/self.maxwidth**2-1.) #parameterized in terms of sin + guesspars[2] = self.c3 * max_y * guesspars[1] + guesspars[1] = np.arcsin( + 2 * guesspars[1] ** 2 / self.maxwidth**2 - 1.0 + ) # parameterized in terms of sin return guesspars @@ -149,17 +160,17 @@ def scale_at(self, pars, x, scale): x: (float) Position of the border scale: (float > 0) Size of scaling at x.""" if scale <= 0: - emsg = ''.join(["Cannot scale by ", str(scale), "."]) + emsg = "".join(["Cannot scale by ", str(scale), "."]) raise SrMiseScalingError(emsg) if scale == 1: return pars else: - ratio = 1/scale # Ugly: Equations orig. solved in terms of ratio + ratio = 1 / scale # Ugly: Equations orig. solved in terms of ratio tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - #solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) + # solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) # 3. mu1=mu2=rmax (the maximum of a Gaussian occurs at r=mu) # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). @@ -168,51 +179,57 @@ def scale_at(self, pars, x, scale): # the semi-nasty algebra reduces to something nice mu2 = mu1 - area2 = np.sqrt(area1**2/(2*np.log(ratio)*sigma1**2/(x-mu1)**2+1)) - sigma2 = sigma1*area2/area1 + area2 = np.sqrt(area1**2 / (2 * np.log(ratio) * sigma1**2 / (x - mu1) ** 2 + 1)) + sigma2 = sigma1 * area2 / area1 tpars[0] = mu2 tpars[1] = sigma2 tpars[2] = area2 try: tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") - except SrMiseTransformationError, err: + except SrMiseTransformationError as err: raise SrMiseScalingError(str(err)) return tpars def _jacobianraw(self, pars, r, free): """Return Jacobian of width-limited Gaussian. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated + free: sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation. """ - jacobian=[None, None, None] + jacobian = [None, None, None] if (free == False).sum() == self.npars: return jacobian - #Optimization - sin_p = np.sin(pars[1]) + 1. - p0minusr = pars[0]-r - exp_p = np.exp(-(p0minusr)**2/(self.c2*sin_p))/(self.c1*np.sqrt(sin_p)) + # Optimization + sin_p = np.sin(pars[1]) + 1.0 + p0minusr = pars[0] - r + exp_p = np.exp(-((p0minusr) ** 2) / (self.c2 * sin_p)) / (self.c1 * np.sqrt(sin_p)) if free[0]: - #derivative with respect to peak position - jacobian[0] = -2.*exp_p*p0minusr*np.abs(pars[2])/(self.c2*sin_p) + # derivative with respect to peak position + jacobian[0] = -2.0 * exp_p * p0minusr * np.abs(pars[2]) / (self.c2 * sin_p) if free[1]: - #derivative with respect to reparameterized peak width - jacobian[1] = -exp_p*np.abs(pars[2])*np.cos(pars[1])*(self.c2*sin_p-2*p0minusr**2)/(2.*self.c2*sin_p**2) + # derivative with respect to reparameterized peak width + jacobian[1] = ( + -exp_p + * np.abs(pars[2]) + * np.cos(pars[1]) + * (self.c2 * sin_p - 2 * p0minusr**2) + / (2.0 * self.c2 * sin_p**2) + ) if free[2]: - #derivative with respect to peak area - #abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has - #non-zero area and the function is paramaterized such that values of either sign represent equivalent - #curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic - #parameter space should this improbable scenario occur. + # derivative with respect to peak area + # abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has + # non-zero area and the function is paramaterized such that values of either sign represent equivalent + # curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic + # parameter space should this improbable scenario occur. # jacobian[2] = sign(pars[2])*exp_p if pars[2] >= 0: jacobian[2] = exp_p @@ -223,18 +240,18 @@ def _jacobianraw(self, pars, r, free): def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. - - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Parameters + pars: Sequence of parameters + in_format: A format defined for this class + out_format: A format defined for this class + + Defined Formats + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] """ temp = np.array(pars) @@ -248,51 +265,58 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] - n = np.floor((temp[1]+np.pi/2)/np.pi) + n = np.floor((temp[1] + np.pi / 2) / np.pi) if np.mod(n, 2) == 0: - temp[1] = temp[1] - np.pi*n + temp[1] = temp[1] - np.pi * n else: - temp[1] = np.pi*n - temp[1] - temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one + temp[1] = np.pi * n - temp[1] + temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one elif in_format == "pwa": if temp[1] > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(temp[1], self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + temp[1], + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*temp[1]**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * temp[1] ** 2 / self.maxwidth**2 - 1.0) elif in_format == "mu_sigma_area": - fwhm = temp[1]*self.sigma2fwhm + fwhm = temp[1] * self.sigma2fwhm if fwhm > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(fwhm, self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + fwhm, + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*fwhm**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * fwhm**2 / self.maxwidth**2 - 1.0) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass elif out_format == "pwa": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2) + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) elif out_format == "mu_sigma_area": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2)/self.sigma2fwhm + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) / self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): """Return value of width-limited Gaussian for the given parameters and r values. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated """ - return np.abs(pars[2])/(self.c1*np.sqrt(np.sin(pars[1])+1.))* \ - np.exp(-(r-pars[0])**2/(self.c2*(np.sin(pars[1])+1.))) + return ( + np.abs(pars[2]) + / (self.c1 * np.sqrt(np.sin(pars[1]) + 1.0)) + * np.exp(-((r - pars[0]) ** 2) / (self.c2 * (np.sin(pars[1]) + 1.0))) + ) def getmodule(self): return __name__ @@ -312,10 +336,11 @@ def max(self, pars): ymax = self._valueraw(pars, rmax) return np.array([rmax, ymax]) -#end of class Gaussian + +# end of class Gaussian # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn @@ -324,26 +349,26 @@ def max(self, pars): from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import Peaks - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) # default unknown errors - pf = Gaussian(.7) + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = Gaussian(0.7) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.createpeak(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf.createpeak(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y, r, yfit) diff --git a/diffpy/srmise/peaks/gaussianoverr.py b/diffpy/srmise/peaks/gaussianoverr.py index f699d9f..d11467d 100644 --- a/diffpy/srmise/peaks/gaussianoverr.py +++ b/diffpy/srmise/peaks/gaussianoverr.py @@ -22,20 +22,21 @@ logger = logging.getLogger("diffpy.srmise") -class GaussianOverR (PeakFunction): + +class GaussianOverR(PeakFunction): """Methods for evaluation and parameter estimation of width-limited Gaussian/r. - Allowed formats are - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Allowed formats are + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] - The internal parameterization is unconstrained, but are interpreted - so that the width is between 0 and a user-provided maximum full width - at half maximum, and the area is positive. + The internal parameterization is unconstrained, but are interpreted + so that the width is between 0 and a user-provided maximum full width + at half maximum, and the area is positive. - Note that all full width at half maximum values are for the - corresponding Gaussian. + Note that all full width at half maximum values are for the + corresponding Gaussian. """ # Possibly implement cutoff later, but low priority. @@ -46,9 +47,9 @@ class GaussianOverR (PeakFunction): def __init__(self, maxwidth, Cache=None): """maxwidth defined as full width at half maximum for the corresponding Gaussian, which is physically relevant.""" - parameterdict={'position':0,'width':1,'area':2} - formats=['internal','pwa','mu_sigma_area'] - default_formats={'default_input':'internal', 'default_output':'pwa'} + parameterdict = {"position": 0, "width": 1, "area": 2} + formats = ["internal", "pwa", "mu_sigma_area"] + default_formats = {"default_input": "internal", "default_output": "pwa"} metadict = {} metadict["maxwidth"] = (maxwidth, repr) PeakFunction.__init__(self, parameterdict, formats, default_formats, metadict, None, Cache) @@ -59,16 +60,16 @@ def __init__(self, maxwidth, Cache=None): self.maxwidth = maxwidth ### Useful constants ### - #c1 and c2 help with function values - self.c1 = self.maxwidth*np.sqrt(np.pi/(8*np.log(2))) - self.c2 = self.maxwidth**2/(8*np.log(2)) + # c1 and c2 help with function values + self.c1 = self.maxwidth * np.sqrt(np.pi / (8 * np.log(2))) + self.c2 = self.maxwidth**2 / (8 * np.log(2)) - #c3 and c4 help with parameter estimation - self.c3 = .5*np.sqrt(np.pi/np.log(2)) - self.c4 = np.pi/(self.maxwidth*2) + # c3 and c4 help with parameter estimation + self.c3 = 0.5 * np.sqrt(np.pi / np.log(2)) + self.c4 = np.pi / (self.maxwidth * 2) - #convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma - self.sigma2fwhm = 2*np.sqrt(2*np.log(2)) + # convert sigma to fwhm: fwhm = 2 sqrt(2 log 2) sigma + self.sigma2fwhm = 2 * np.sqrt(2 * np.log(2)) return @@ -102,39 +103,49 @@ def estimate_parameters(self, r, y): raise SrMiseEstimationError(emsg) #### Estimation #### - guesspars = np.array([0., 0., 0.], dtype=float) + guesspars = np.array([0.0, 0.0, 0.0], dtype=float) min_y = use_y.min() max_y = use_y.max() center = use_r[use_y.argmax()] if min_y != max_y: - weights = (use_y-min_y)**2 - guesspars[0] = np.sum(use_r*weights)/sum(weights) + weights = (use_y - min_y) ** 2 + guesspars[0] = np.sum(use_r * weights) / sum(weights) # guesspars[0] = center if use_y[0] < max_y: - sigma_left = np.sqrt(-.5*(use_r[0]-guesspars[0])**2/np.log(use_y[0]/max_y)) + sigma_left = np.sqrt(-0.5 * (use_r[0] - guesspars[0]) ** 2 / np.log(use_y[0] / max_y)) else: - sigma_left = np.sqrt(-.5*np.mean(np.abs(np.array([use_r[0]-guesspars[0], use_r[-1]-guesspars[0]])))**2/np.log(min_y/max_y)) - if use_y[-1] self.maxwidth: - #account for width-limit - guesspars[2] = self.c3*max_y*guesspars[0]*self.maxwidth - guesspars[1] = np.pi/2 #parameterized in terms of sin + # account for width-limit + guesspars[2] = self.c3 * max_y * guesspars[0] * self.maxwidth + guesspars[1] = np.pi / 2 # parameterized in terms of sin else: - guesspars[2] = self.c3*max_y*guesspars[0]*guesspars[1] - guesspars[1] = np.arcsin(2*guesspars[1]**2/self.maxwidth**2-1.) #parameterized in terms of sin + guesspars[2] = self.c3 * max_y * guesspars[0] * guesspars[1] + guesspars[1] = np.arcsin( + 2 * guesspars[1] ** 2 / self.maxwidth**2 - 1.0 + ) # parameterized in terms of sin return guesspars @@ -149,17 +160,17 @@ def scale_at(self, pars, x, scale): x: (float) Position of the border scale: (float > 0) Size of scaling at x.""" if scale <= 0: - emsg = ''.join(["Cannot scale by ", str(scale), "."]) + emsg = "".join(["Cannot scale by ", str(scale), "."]) raise SrMiseScalingError(emsg) if scale == 1: return pars else: - ratio = 1/scale # Ugly: Equations orig. solved in terms of ratio + ratio = 1 / scale # Ugly: Equations orig. solved in terms of ratio tpars = self.transform_parameters(pars, in_format="internal", out_format="mu_sigma_area") - #solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) + # solves 1. f(rmax;mu1,sigma1,area1)=f(rmax;mu2,sigma2,area2) # 2. f(x;mu1,sigma1,area1)=ratio*f(x;mu1,sigma2,area2) # 3. 1/2*(mu1+sqrt(mu1^2+sigma1^2))=1/2*(mu2+sqrt(mu2^2+sigma2^2))=rmax # for mu2, sigma2, area2 (with appropriate unit conversions to fwhm at the end). @@ -169,59 +180,70 @@ def scale_at(self, pars, x, scale): # position of the peak maximum try: rmax = self.max(pars)[0] - except ValueError, err: + except ValueError as err: raise SrMiseScalingError(str(err)) # lhs of eqn1/eqn2 multiplied by ratio. Then take the log. - log_ratio_prime = np.log(ratio)+(x-rmax)*(x-2*mu1+rmax)/(2*sigma1**2) + log_ratio_prime = np.log(ratio) + (x - rmax) * (x - 2 * mu1 + rmax) / (2 * sigma1**2) # the semi-nasty algebra reduces to something nice - sigma2 = np.sqrt(.5*rmax*(x-rmax)**2/(x-rmax+rmax*log_ratio_prime)) - mu2 = (sigma2**2+rmax**2)/rmax - area2 = area1*(sigma2/sigma1)*np.exp(-(rmax-mu1)**2/(2*sigma1**2))/np.exp(-(rmax-mu2)**2/(2*sigma2**2)) + sigma2 = np.sqrt(0.5 * rmax * (x - rmax) ** 2 / (x - rmax + rmax * log_ratio_prime)) + mu2 = (sigma2**2 + rmax**2) / rmax + area2 = ( + area1 + * (sigma2 / sigma1) + * np.exp(-((rmax - mu1) ** 2) / (2 * sigma1**2)) + / np.exp(-((rmax - mu2) ** 2) / (2 * sigma2**2)) + ) tpars[0] = mu2 tpars[1] = sigma2 tpars[2] = area2 try: tpars = self.transform_parameters(tpars, in_format="mu_sigma_area", out_format="internal") - except SrMiseTransformationError, err: + except SrMiseTransformationError as err: raise SrMiseScalingError(str(err)) return tpars def _jacobianraw(self, pars, r, free): """Return Jacobian of width-limited Gaussian/r. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation. + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated + free: sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation. """ - jacobian=[None, None, None] + jacobian = [None, None, None] if (free == False).sum() == self.npars: return jacobian - #Optimization - sin_p = np.sin(pars[1]) + 1. - p0minusr = pars[0]-r - exp_p = np.exp(-(p0minusr)**2/(self.c2*sin_p))/(np.abs(r)*self.c1*np.sqrt(sin_p)) + # Optimization + sin_p = np.sin(pars[1]) + 1.0 + p0minusr = pars[0] - r + exp_p = np.exp(-((p0minusr) ** 2) / (self.c2 * sin_p)) / (np.abs(r) * self.c1 * np.sqrt(sin_p)) if free[0]: - #derivative with respect to peak position - jacobian[0] = -2.*exp_p*p0minusr*np.abs(pars[2])/(self.c2*sin_p) + # derivative with respect to peak position + jacobian[0] = -2.0 * exp_p * p0minusr * np.abs(pars[2]) / (self.c2 * sin_p) if free[1]: - #derivative with respect to reparameterized peak width - jacobian[1] = -exp_p*np.abs(pars[2])*np.cos(pars[1])*(self.c2*sin_p-2*p0minusr**2)/(2.*self.c2*sin_p**2) + # derivative with respect to reparameterized peak width + jacobian[1] = ( + -exp_p + * np.abs(pars[2]) + * np.cos(pars[1]) + * (self.c2 * sin_p - 2 * p0minusr**2) + / (2.0 * self.c2 * sin_p**2) + ) if free[2]: - #derivative with respect to peak area - #abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has - #non-zero area and the function is paramaterized such that values of either sign represent equivalent - #curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic - #parameter space should this improbable scenario occur. + # derivative with respect to peak area + # abs'(x)=sign(x) for real x except at 0 where it is undetermined. Since any real peak necessarily has + # non-zero area and the function is paramaterized such that values of either sign represent equivalent + # curves I arbitrarily choose positive sign for pars[2]==0 in order to push the system back into a realistic + # parameter space should this improbable scenario occur. # jacobian[2] = sign(pars[2])*exp_p if pars[2] >= 0: jacobian[2] = exp_p @@ -232,15 +254,15 @@ def _jacobianraw(self, pars, r, free): def _transform_derivativesraw(self, pars, in_format, out_format): """Return gradient matrix for the pars converted from in_format to out_format. - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Parameters + pars: Sequence of parameters + in_format: A format defined for this class + out_format: A format defined for this class + + Defined Formats + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] """ # With these three formats only the width-related parameter changes. # Therefore the gradient matrix is the identity matrix with the possible @@ -252,49 +274,50 @@ def _transform_derivativesraw(self, pars, in_format, out_format): if in_format == "internal": if out_format == "pwa": - g[1,1] = self.maxwidth/(2*np.sqrt(2))*np.cos(pars[1])/np.sqrt(1+np.sin(pars[1])) + g[1, 1] = self.maxwidth / (2 * np.sqrt(2)) * np.cos(pars[1]) / np.sqrt(1 + np.sin(pars[1])) elif out_format == "mu_sigma_area": - g[1,1] = self.maxwidth/(2*np.sqrt(2)*self.sigma2fwhm)*np.cos(pars[1])/np.sqrt(1+np.sin(pars[1])) + g[1, 1] = ( + self.maxwidth + / (2 * np.sqrt(2) * self.sigma2fwhm) + * np.cos(pars[1]) + / np.sqrt(1 + np.sin(pars[1])) + ) else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) elif in_format == "pwa": if out_format == "internal": - g[1,1] = 2/np.sqrt(self.maxwidth**2-pars[1]**2) + g[1, 1] = 2 / np.sqrt(self.maxwidth**2 - pars[1] ** 2) elif out_format == "mu_sigma_area": - g[1,1] = 1/self.sigma2fwhm + g[1, 1] = 1 / self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) elif in_format == "mu_sigma_area": if out_format == "internal": - g[1,1] = 2*self.sigma2fwhm/np.sqrt(self.maxwidth**2-(self.sigma2fwhm*pars[1])**2) + g[1, 1] = 2 * self.sigma2fwhm / np.sqrt(self.maxwidth**2 - (self.sigma2fwhm * pars[1]) ** 2) elif out_format == "pwa": - g[1,1] = self.sigma2fwhm + g[1, 1] = self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) return g def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. - Also restores parameters to a preferred range if it permits multiple - values that correspond to the same physical result. + Also restores parameters to a preferred range if it permits multiple + values that correspond to the same physical result. - Parameters - pars: Sequence of parameters - in_format: A format defined for this class - out_format: A format defined for this class - - Defined Formats - internal: [position, parameterized width-squared, area] - pwa: [position, full width at half maximum, area] - mu_sigma_area: [mu, sigma, area] + Parameters + pars: Sequence of parameters + in_format: A format defined for this class + out_format: A format defined for this class + + Defined Formats + internal: [position, parameterized width-squared, area] + pwa: [position, full width at half maximum, area] + mu_sigma_area: [mu, sigma, area] """ temp = np.array(pars) @@ -308,51 +331,58 @@ def _transform_parametersraw(self, pars, in_format, out_format): if in_format == "internal": # put the parameter for width in the "physical" quadrant [-pi/2,pi/2], # where .5*(sin(p)+1) covers fwhm = [0, maxwidth] - n = np.floor((temp[1]+np.pi/2)/np.pi) + n = np.floor((temp[1] + np.pi / 2) / np.pi) if np.mod(n, 2) == 0: - temp[1] = temp[1] - np.pi*n + temp[1] = temp[1] - np.pi * n else: - temp[1] = np.pi*n - temp[1] - temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one + temp[1] = np.pi * n - temp[1] + temp[2] = np.abs(temp[2]) # map negative area to equivalent positive one elif in_format == "pwa": if temp[1] > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(temp[1], self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + temp[1], + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*temp[1]**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * temp[1] ** 2 / self.maxwidth**2 - 1.0) elif in_format == "mu_sigma_area": - fwhm = temp[1]*self.sigma2fwhm + fwhm = temp[1] * self.sigma2fwhm if fwhm > self.maxwidth: - emsg = "Width %s (FWHM) greater than maximum allowed width %s" %(fwhm, self.maxwidth) + emsg = "Width %s (FWHM) greater than maximum allowed width %s" % ( + fwhm, + self.maxwidth, + ) raise SrMiseTransformationError(emsg) - temp[1] = np.arcsin(2.*fwhm**2/self.maxwidth**2-1.) + temp[1] = np.arcsin(2.0 * fwhm**2 / self.maxwidth**2 - 1.0) else: - raise ValueError("Argument 'in_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'in_format' must be one of %s." % self.parformats) # Convert to specified output format from "internal" format. if out_format == "internal": pass elif out_format == "pwa": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2) + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) elif out_format == "mu_sigma_area": - temp[1] = np.sqrt(.5*(np.sin(temp[1])+1.)*self.maxwidth**2)/self.sigma2fwhm + temp[1] = np.sqrt(0.5 * (np.sin(temp[1]) + 1.0) * self.maxwidth**2) / self.sigma2fwhm else: - raise ValueError("Argument 'out_format' must be one of %s." \ - % self.parformats) + raise ValueError("Argument 'out_format' must be one of %s." % self.parformats) return temp def _valueraw(self, pars, r): """Return value of width-limited Gaussian/r for the given parameters and r values. - pars: Sequence of parameters for a single width-limited Gaussian - pars[0]=peak position - pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. - =tan(pi/2*fwhm/maxwidth) - pars[2]=multiplicative constant a, equivalent to peak area - r: sequence or scalar over which pars is evaluated + pars: Sequence of parameters for a single width-limited Gaussian + pars[0]=peak position + pars[1]=effective width, up to fwhm=maxwidth as par[1] -> inf. + =tan(pi/2*fwhm/maxwidth) + pars[2]=multiplicative constant a, equivalent to peak area + r: sequence or scalar over which pars is evaluated """ - return np.abs(pars[2])/(np.abs(r)*self.c1*np.sqrt(np.sin(pars[1])+1.))* \ - np.exp(-(r-pars[0])**2/(self.c2*(np.sin(pars[1])+1.))) + return ( + np.abs(pars[2]) + / (np.abs(r) * self.c1 * np.sqrt(np.sin(pars[1]) + 1.0)) + * np.exp(-((r - pars[0]) ** 2) / (self.c2 * (np.sin(pars[1]) + 1.0))) + ) def getmodule(self): return __name__ @@ -371,18 +401,19 @@ def max(self, pars): # The Gaussian/r only has a local maximum under this condition. # Physically realistic peaks will always meet this condition, but # trying to fit a signal down to r=0 could conceivably lead to issues. - if tpars[0]**2 <= 4*tpars[1]**2: - emsg = ''.join(["No local maximum with parameters\n", str(pars)]) + if tpars[0] ** 2 <= 4 * tpars[1] ** 2: + emsg = "".join(["No local maximum with parameters\n", str(pars)]) raise ValueError(emsg) - rmax = .5*(tpars[0]+np.sqrt(tpars[0]**2-4*tpars[1]**2)) + rmax = 0.5 * (tpars[0] + np.sqrt(tpars[0] ** 2 - 4 * tpars[1] ** 2)) ymax = self._valueraw(pars, rmax) return np.array([rmax, ymax]) -#end of class GaussianOverR + +# end of class GaussianOverR # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn @@ -391,26 +422,26 @@ def max(self, pars): from diffpy.srmise.modelevaluators import AICc from diffpy.srmise.peaks import Peaks - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) # default unknown errors - pf = GaussianOverR(.7) + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf = GaussianOverR(0.7) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf.createpeak(p, "pwa") for p in pars]) - y = ideal_peaks.value(r) + .1*randn(len(r)) + y = ideal_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf.createpeak(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y, err, None, AICc, [pf]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y, r, yfit) diff --git a/diffpy/srmise/peaks/terminationripples.py b/diffpy/srmise/peaks/terminationripples.py index cdce773..3f12272 100644 --- a/diffpy/srmise/peaks/terminationripples.py +++ b/diffpy/srmise/peaks/terminationripples.py @@ -21,10 +21,11 @@ logger = logging.getLogger("diffpy.srmise") -class TerminationRipples (PeakFunction): + +class TerminationRipples(PeakFunction): """Methods for evaluation and parameter estimation of a peak function with termination ripples.""" - def __init__(self, base, qmax, extension=4., supersample=5., Cache=None): + def __init__(self, base, qmax, extension=4.0, supersample=5.0, Cache=None): """Peak function which adds termination ripples to existing function. Unlike other peak functions, TerminationRipples can only be evaluated @@ -73,7 +74,6 @@ def estimate_parameters(self, r, y): reason.""" return self.base.estimate_parameters(r, y) - # TODO: Can this be implemented sanely for termination ripples? def scale_at(self, pars, x, scale): """Change parameters so value(x)->scale*value(x) for the base function. @@ -90,36 +90,36 @@ def scale_at(self, pars, x, scale): def _jacobianraw(self, pars, r, free): """Return Jacobian of base function with termination ripples. - Parameters - pars: Sequence of parameters for a single peak - r: sequence or scalar over which pars is evaluated - free: sequence of booleans which determines which derivatives are - needed. True for evaluation, False for no evaluation.""" + Parameters + pars: Sequence of parameters for a single peak + r: sequence or scalar over which pars is evaluated + free: sequence of booleans which determines which derivatives are + needed. True for evaluation, False for no evaluation.""" return self.base._jacobianraw(pars, r, free) def _transform_derivativesraw(self, pars, in_format, out_format): """Return gradient matrix for the pars converted from in_format to out_format. - Parameters - pars: Sequence of parameters - in_format: A format defined for base peak function - out_format: A format defined for base peak function""" + Parameters + pars: Sequence of parameters + in_format: A format defined for base peak function + out_format: A format defined for base peak function""" return self.base._transform_derivativesraw(pars, in_format, out_format) def _transform_parametersraw(self, pars, in_format, out_format): """Convert parameter values from in_format to out_format. - Parameters - pars: Sequence of parameters - in_format: A format defined for base peak function - out_format: A format defined for base peak function""" + Parameters + pars: Sequence of parameters + in_format: A format defined for base peak function + out_format: A format defined for base peak function""" return self.base._transform_parametersraw(pars, in_format, out_format) def _valueraw(self, pars, r): """Return value of base peak function for the given parameters and r values. - pars: Sequence of parameters for a single peak - r: sequence or scalar over which pars is evaluated""" + pars: Sequence of parameters for a single peak + r: sequence or scalar over which pars is evaluated""" return self.base._valueraw(pars, r) #### Overridden PeakFunction functions #### @@ -137,17 +137,19 @@ def jacobian(self, peak, r, rng=None): a default value of 0. If caching is enabled these may be previously calculated values instead.""" if self is not peak._owner: - raise ValueError("Argument 'peak' must be evaluated by the " - "PeakFunction subclass instance with which " - "it is associated.") + raise ValueError( + "Argument 'peak' must be evaluated by the " + "PeakFunction subclass instance with which " + "it is associated." + ) # normally r will be a sequence, but also allow single numeric values try: if len(r) > 1: - dr = (r[-1]-r[0])/(len(r)-1) + dr = (r[-1] - r[0]) / (len(r) - 1) else: # dr is ad hoc if r is a single point - dr = 2*np.pi/(self.supersample*self.qmax) + dr = 2 * np.pi / (self.supersample * self.qmax) if rng is None: rng = slice(0, len(r)) @@ -158,12 +160,12 @@ def jacobian(self, peak, r, rng=None): for idx in range(len(output)): if jac[idx] is not None: jac[idx] = self.cut_freq(jac[idx], dr) - output[idx] = r * 0. + output[idx] = r * 0.0 output[idx][rng] = jac[idx][ext_slice] return output - except (TypeError): + except TypeError: # dr is ad hoc if r is a single point. - dr = 2*np.pi/(self.supersample*self.qmax) + dr = 2 * np.pi / (self.supersample * self.qmax) (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr) jac = self._jacobianraw(peak.pars, ext_r, peak.free) for idx in range(len(output)): @@ -171,7 +173,6 @@ def jacobian(self, peak, r, rng=None): jac[idx] = self.cut_freq(jac[idx], dr)[ext_slice][0] return jac - def value(self, peak, r, rng=None): """Calculate (rippled) value of peak, possibly restricted by range. @@ -187,13 +188,15 @@ def value(self, peak, r, rng=None): previously calculated values instead. """ if self is not peak._owner: - raise ValueError("Argument 'peak' must be evaluated by the " - "PeakFunction subclass instance with which " - "it is associated.") + raise ValueError( + "Argument 'peak' must be evaluated by the " + "PeakFunction subclass instance with which " + "it is associated." + ) # normally r will be a sequence, but also allow single numeric values - dr_super = 2*np.pi/(self.supersample*self.qmax) + dr_super = 2 * np.pi / (self.supersample * self.qmax) if np.isscalar(r): # dr is ad hoc if r is a single point. (ext_r, ext_slice) = self.extend_grid(np.array([r]), dr_super) @@ -204,7 +207,7 @@ def value(self, peak, r, rng=None): if rng is None: rng = slice(0, len(r)) - output = r * 0. + output = r * 0.0 # Make sure the actual dr used for finding termination ripples # is at least as fine as dr_super, while still calculating the @@ -215,13 +218,13 @@ def value(self, peak, r, rng=None): # of sampling needed to avoid the worst of these discretization # issues is difficult to determine without detailed knowledge # of the underlying function. - dr = (r[-1]-r[0])/(len(r)-1) - segments = np.ceil(dr/dr_super) - dr_segmented = dr/segments + dr = (r[-1] - r[0]) / (len(r) - 1) + segments = np.ceil(dr / dr_super) + dr_segmented = dr / segments rpart = r[rng] if segments > 1: - rpart = np.arange(rpart[0], rpart[-1] + dr_segmented/2, dr_segmented) + rpart = np.arange(rpart[0], rpart[-1] + dr_segmented / 2, dr_segmented) (ext_r, ext_slice) = self.extend_grid(rpart, dr_segmented) value = self._valueraw(peak.pars, ext_r) @@ -244,64 +247,65 @@ def cut_freq(self, sequence, delta): Parameters sequence: (numpy array) The sequence to alter. delta: The spacing between elements in sequence.""" - padlen = int(2**np.ceil(np.log2(len(sequence)))) + padlen = int(2 ** np.ceil(np.log2(len(sequence)))) padseq = fp.fft(sequence, padlen) - dq = 2*np.pi/((padlen-1)*delta) - lowidx = int(np.ceil(self.qmax/dq)) - hiidx = padlen+1-lowidx + dq = 2 * np.pi / ((padlen - 1) * delta) + lowidx = int(np.ceil(self.qmax / dq)) + hiidx = padlen + 1 - lowidx # Remove hi-frequency components - padseq[lowidx:hiidx]=0 + padseq[lowidx:hiidx] = 0 padseq = fp.ifft(padseq) - return np.real(padseq[0:len(sequence)]) + return np.real(padseq[0 : len(sequence)]) def extend_grid(self, r, dr): """Return (extended r, slice giving original range).""" - ext = self.extension*2*np.pi/self.qmax - left_ext = np.arange(r[0]-dr, max(0., r[0]-ext-dr), -dr)[::-1] - right_ext = np.arange(r[-1]+dr, r[-1]+ext+dr, dr) + ext = self.extension * 2 * np.pi / self.qmax + left_ext = np.arange(r[0] - dr, max(0.0, r[0] - ext - dr), -dr)[::-1] + right_ext = np.arange(r[-1] + dr, r[-1] + ext + dr, dr) ext_r = np.concatenate((left_ext, r, right_ext)) - ext_slice = slice(len(left_ext), len(ext_r)-len(right_ext)) + ext_slice = slice(len(left_ext), len(ext_r) - len(right_ext)) return (ext_r, ext_slice) -#end of class TerminationRipples + +# end of class TerminationRipples # simple test code -if __name__ == '__main__': +if __name__ == "__main__": import matplotlib.pyplot as plt from numpy.random import randn from diffpy.srmise.modelcluster import ModelCluster - from diffpy.srmise.modelevaluator import AICc - from diffpy.srmise.peakfunctions.gaussianoverr import GaussianOverR - from diffpy.srmise.peakfunctions.peaks import Peaks - from diffpy.srmise.peakfunctions.terminationripples import TerminationRipples - - res = .01 - r = np.arange(2,4,res) - err = np.ones(len(r)) #default unknown errors - pf1 = GaussianOverR(.7) - pf2 = TerminationRipples(pf1, 20.) + from diffpy.srmise.modelevaluators import AICc + from diffpy.srmise.peaks.base import Peaks + from diffpy.srmise.peaks.gaussianoverr import GaussianOverR + from diffpy.srmise.peaks.terminationripples import TerminationRipples + + res = 0.01 + r = np.arange(2, 4, res) + err = np.ones(len(r)) # default unknown errors + pf1 = GaussianOverR(0.7) + pf2 = TerminationRipples(pf1, 20.0) evaluator = AICc() - pars = [[3, .2, 10], [3.5, .2, 10]] + pars = [[3, 0.2, 10], [3.5, 0.2, 10]] ideal_peaks = Peaks([pf1.createpeak(p, "pwa") for p in pars]) ripple_peaks = Peaks([pf2.createpeak(p, "pwa") for p in pars]) y_ideal = ideal_peaks.value(r) - y_ripple = ripple_peaks.value(r) + .1*randn(len(r)) + y_ripple = ripple_peaks.value(r) + 0.1 * randn(len(r)) - guesspars = [[2.7, .15, 5], [3.7, .3, 5]] + guesspars = [[2.7, 0.15, 5], [3.7, 0.3, 5]] guess_peaks = Peaks([pf2.createpeak(p, "pwa") for p in guesspars]) cluster = ModelCluster(guess_peaks, r, y_ripple, err, None, AICc, [pf2]) qual1 = cluster.quality() - print qual1.stat + print(qual1.stat) cluster.fit() yfit = cluster.calc() qual2 = cluster.quality() - print qual2.stat + print(qual2.stat) plt.figure(1) plt.plot(r, y_ideal, r, y_ripple, r, yfit) diff --git a/diffpy/srmise/peakstability.py b/diffpy/srmise/peakstability.py index fcf5e31..d704b41 100644 --- a/diffpy/srmise/peakstability.py +++ b/diffpy/srmise/peakstability.py @@ -127,9 +127,7 @@ def setcurrent(self, idx): self.current = idx if idx is not None: result = self.results[idx] - self.ppe.setvars( - quiet=True, effective_dy=result[0] * np.ones(len(self.ppe.x)) - ) + self.ppe.setvars(quiet=True, effective_dy=result[0] * np.ones(len(self.ppe.x))) (r, y, dr, dy) = self.ppe.resampledata(result[3]) self.ppe.extracted = ModelCluster( result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf @@ -178,12 +176,10 @@ def run(self, err, savecovs=False): covs.append(self.ppe.extract()) else: self.ppe.extract() - dr = ( - self.ppe.extracted.r_cluster[-1] - self.ppe.extracted.r_cluster[0] - ) / (len(self.ppe.extracted.r_cluster) - 1) - self.results.append( - [e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr] + dr = (self.ppe.extracted.r_cluster[-1] - self.ppe.extracted.r_cluster[0]) / ( + len(self.ppe.extracted.r_cluster) - 1 ) + self.results.append([e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr]) for e, r, bl, dr in self.results: print("---- Results for uncertainty %s ----" % e) diff --git a/diffpy/srmise/srmiselog.py b/diffpy/srmise/srmiselog.py index d971b73..f93ac2e 100644 --- a/diffpy/srmise/srmiselog.py +++ b/diffpy/srmise/srmiselog.py @@ -242,10 +242,10 @@ def read(self, filename): except SrMiseDataFormatError as err: logger.exception("") basename = os.path.basename(filename) - emsg = ( - "Could not open '%s' due to unsupported file format " - + "or corrupted data. [%s]" - ) % (basename, err) + emsg = ("Could not open '%s' due to unsupported file format " + "or corrupted data. [%s]") % ( + basename, + err, + ) raise SrMiseFileError(emsg) return None @@ -336,9 +336,7 @@ def reset_trace(self): # filter property def setfilter(self, filter): - self.__filter = compile( - " and ".join(["(%s)" % f for f in filter]), "", "eval" - ) + self.__filter = compile(" and ".join(["(%s)" % f for f in filter]), "", "eval") def getfilter(self): return self.__filter diff --git a/doc/examples/multimodel_known_dG2.py b/doc/examples/multimodel_known_dG2.py index d061981..7b3d8ee 100644 --- a/doc/examples/multimodel_known_dG2.py +++ b/doc/examples/multimodel_known_dG2.py @@ -39,8 +39,23 @@ from diffpy.srmise.applications.plot import makeplot # distances from ideal Ag (refined to PDF) -dcif = np.array([11.2394, 11.608, 11.9652, 12.3121, 12.6495, 12.9781, 13.2986, - 13.6116, 13.9175, 14.2168, 14.51, 14.7973]) +dcif = np.array( + [ + 11.2394, + 11.608, + 11.9652, + 12.3121, + 12.6495, + 12.9781, + 13.2986, + 13.6116, + 13.9175, + 14.2168, + 14.51, + 14.7973, + ] +) + def run(plot=True): @@ -56,8 +71,8 @@ def run(plot=True): # Standard AIC analysis assumes the data have independent uncertainties. # Nyquist sampling minimizes correlations in the PDF, which is the closest # approximation to independence possible for the PDF. - dr = np.pi/ms.ppe.qmax - (r,y,dr2,dy) = ms.ppe.resampledata(dr) + dr = np.pi / ms.ppe.qmax + (r, y, dr2, dy) = ms.ppe.resampledata(dr) ## Classify models # All models are placed into classes. Models in the same class @@ -78,11 +93,11 @@ def run(plot=True): ## Summarize various facts about the analysis. num_models = len(ms.results) num_classes = len(ms.classes) - print "------- Multimodeling Summary --------" - print "Models: %i" %num_models - print "Classes: %i (tol=%s)" %(num_classes, tolerance) - print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1]) - print "Nyquist-sampled data points: %i" %len(r) + print("------- Multimodeling Summary --------") + print("Models: %i" % num_models) + print("Classes: %i (tol=%s)" % (num_classes, tolerance)) + print("Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])) + print("Nyquist-sampled data points: %i" % len(r)) ## Get dG usable as key in analysis. # The Akaike probabilities were calculated for many assumed values of the @@ -101,8 +116,8 @@ def run(plot=True): # # The present PDF satisifes these conditions, so the rankings below reflect # an AIC-based estimate of which of the tested models the data best support. - print "\n--------- Model Rankings for dG = %f ---------" %dG - print "Rank Model Class Free AIC Prob File" + print("\n--------- Model Rankings for dG = %f ---------" % dG) + print("Rank Model Class Free AIC Prob File") for i in range(len(ms.classes)): ## Generate information about best model in ith best class. @@ -117,23 +132,25 @@ def run(plot=True): # "prob" -> The AIC probability given uncertainty dG # These all have dedicated getter functions. For example, the model # index can also be obtained using get_model(dG, corder=i) - (model, cls, nfree, aic, prob) = \ - ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i) + (model, cls, nfree, aic, prob) = ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i) - filename_base = "output/known_dG_m"+str(model) + filename_base = "output/known_dG_m" + str(model) # Print info for this model - print "%4i %5i %5i %4i %10.4e %6.3f %s" \ - %(i+1, model, cls, nfree, aic, prob, filename_base + ".pwa") + print( + "%4i %5i %5i %4i %10.4e %6.3f %s" % (i + 1, model, cls, nfree, aic, prob, filename_base + ".pwa") + ) # A message added as a comment to saved .pwa file. - msg = ["Multimodeling Summary", - "---------------------", - "Evaluated at dG: %s" %dG, - "Model: %i (of %i)" %(model, num_models), - "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance), - "Akaike probability: %g" %prob, - "Rank: %i" %(i+1),] + msg = [ + "Multimodeling Summary", + "---------------------", + "Evaluated at dG: %s" % dG, + "Model: %i (of %i)" % (model, num_models), + "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance), + "Akaike probability: %g" % prob, + "Rank: %i" % (i + 1), + ] msg = "\n".join(msg) # Make this the active model @@ -146,12 +163,10 @@ def run(plot=True): if plot: plt.figure() makeplot(ms.ppe, dcif) - plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" \ - %(model, cls, i+1, prob)) + plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" % (model, cls, i + 1, prob)) # Uncomment line below to save figures. # plt.savefig(filename_base + ".png", format="png") - ## 3D plot of Akaike probabilities # This plot shows the Akaike probabilities of all classes as a function # of assumed uncertainty dG. This gives a rough sense of how the models @@ -161,13 +176,14 @@ def run(plot=True): # are highlighted. if plot: plt.figure() - ms.plot3dclassprobs(probfilter=[0.0, 1.], highlight=[dG]) + ms.plot3dclassprobs(probfilter=[0.0, 1.0], highlight=[dG]) plt.tight_layout() # Uncomment line below to save figure. - #plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight") + # plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight") if plot: plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/examples/multimodel_unknown_dG2.py b/doc/examples/multimodel_unknown_dG2.py index 1bc9f60..4e68898 100644 --- a/doc/examples/multimodel_unknown_dG2.py +++ b/doc/examples/multimodel_unknown_dG2.py @@ -46,11 +46,32 @@ from diffpy.srmise.applications.plot import makeplot # distances from ideal (unrefined) C60 -dcif = np.array([1.44, 2.329968944, 2.494153163, 2.88, 3.595985339, - 3.704477734, 4.132591264, 4.520339129, 4.659937888, - 4.877358006, 5.209968944, 5.405310018, 5.522583786, - 5.818426502, 6.099937888, 6.164518388, 6.529777754, - 6.686673127, 6.745638756, 6.989906831, 7.136693738]) +dcif = np.array( + [ + 1.44, + 2.329968944, + 2.494153163, + 2.88, + 3.595985339, + 3.704477734, + 4.132591264, + 4.520339129, + 4.659937888, + 4.877358006, + 5.209968944, + 5.405310018, + 5.522583786, + 5.818426502, + 6.099937888, + 6.164518388, + 6.529777754, + 6.686673127, + 6.745638756, + 6.989906831, + 7.136693738, + ] +) + def run(plot=True): @@ -66,8 +87,8 @@ def run(plot=True): # Standard AIC analysis assumes the data have independent uncertainties. # Nyquist sampling minimizes correlations in the PDF, which is the closest # approximation to independence possible for the PDF. - dr = np.pi/ms.ppe.qmax - (r,y,dr2,dy) = ms.ppe.resampledata(dr) + dr = np.pi / ms.ppe.qmax + (r, y, dr2, dy) = ms.ppe.resampledata(dr) ## Classify models # All models are placed into classes. Models in the same class @@ -88,11 +109,11 @@ def run(plot=True): ## Summarize various facts about the analysis. num_models = len(ms.results) num_classes = len(ms.classes) - print "------- Multimodeling Summary --------" - print "Models: %i" %num_models - print "Classes: %i (tol=%s)" %(num_classes, tolerance) - print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1]) - print "Nyquist-sampled data points: %i" %len(r) + print("------- Multimodeling Summary --------") + print("Models: %i" % num_models) + print("Classes: %i (tol=%s)" % (num_classes, tolerance)) + print("Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])) + print("Nyquist-sampled data points: %i" % len(r)) ## Find "best" models. # In short, models with greatest Akaike probability. Akaike probabilities @@ -115,13 +136,12 @@ def run(plot=True): best_classes = np.unique([ms.get_class(dG) for dG in ms.dgs]) best_dGs = [] for cls in best_classes: - cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 \ - for dG in ms.dgs] + cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 for dG in ms.dgs] dG = ms.dgs[np.argmax(cls_probs)] best_dGs.append(dG) - print "\n--------- Best models for at least one dG ---------" %dG - print " Best dG Model Class Free AIC Prob File" + print("\n--------- Best models for at least one dG ---------" % dG) + print(" Best dG Model Class Free AIC Prob File") for dG in best_dGs: ## Generate information about best model. @@ -135,24 +155,26 @@ def run(plot=True): # "aic" -> The AIC for this model given uncertainty dG # "prob" -> The AIC probability given uncertainty dG # These all have dedicated getter functions. - (model, cls, nfree, aic, prob) = \ - ms.get(dG, "model", "class", "nfree", "aic", "prob") + (model, cls, nfree, aic, prob) = ms.get(dG, "model", "class", "nfree", "aic", "prob") - filename_base = "output/unknown_dG_m"+str(model) + filename_base = "output/unknown_dG_m" + str(model) # Print info for this model - print "%10.4e %5i %5i %4i %10.4e %6.3f %s" \ - %(dG, model, cls, nfree, aic, prob, filename_base + ".pwa") + print( + "%10.4e %5i %5i %4i %10.4e %6.3f %s" % (dG, model, cls, nfree, aic, prob, filename_base + ".pwa") + ) # A message added as a comment to saved .pwa file. best_from = [dg for dg in ms.dgs if ms.get_class(dg) == cls] - msg = ["Multimodeling Summary", - "---------------------", - "Model: %i (of %i)" %(model, num_models), - "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance), - "Best model from dG: %s-%s" %(best_from[0], best_from[-1]), - "Evaluated at dG: %s" %dG, - "Akaike probability: %g" %prob] + msg = [ + "Multimodeling Summary", + "---------------------", + "Model: %i (of %i)" % (model, num_models), + "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance), + "Best model from dG: %s-%s" % (best_from[0], best_from[-1]), + "Evaluated at dG: %s" % dG, + "Akaike probability: %g" % prob, + ] msg = "\n".join(msg) # Make this the active model @@ -165,12 +187,10 @@ def run(plot=True): if plot: plt.figure() makeplot(ms.ppe, dcif) - plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" \ - %(model, cls, dG, prob)) + plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" % (model, cls, dG, prob)) # Uncomment line below to save figures. # plt.savefig(filename_base + ".png", format="png") - ## 3D plot of Akaike probabilities # This plot shows the Akaike probabilities of all classes as a function # of assumed uncertainty dG. This gives a rough sense of how the models @@ -179,13 +199,14 @@ def run(plot=True): # are highlighted at the various dG values found above. if plot: plt.figure() - ms.plot3dclassprobs(probfilter=[0.1, 1.], highlight=best_dGs) + ms.plot3dclassprobs(probfilter=[0.1, 1.0], highlight=best_dGs) plt.tight_layout() # Uncomment line below to save figure. - #plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight") + # plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight") if plot: plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/doc/examples/query_results.py b/doc/examples/query_results.py index c861ee5..57c4346 100644 --- a/doc/examples/query_results.py +++ b/doc/examples/query_results.py @@ -53,7 +53,7 @@ def run(plot=True): # Peaks are extracted between 2 and 10 angstroms, using the baseline # from the isolated peak example. kwds = {} - kwds["rng"] = [2.0, 10.] + kwds["rng"] = [2.0, 10.0] kwds["baseline"] = baseline # Apply peak extraction parameters. @@ -63,8 +63,7 @@ def run(plot=True): # model and the full covariance matrix. cov = ppe.extract() - - print "\n======= Accessing SrMise Results ========" + print("\n======= Accessing SrMise Results ========") ## Accessing results of extraction # # Model parameters are organized using a nested structure, with a list @@ -90,43 +89,39 @@ def run(plot=True): # peak. Thus, this parameter can be referenced as (1,2). Several examples # are presented below. - - print "\n------ Parameter values and uncertainties ------" + print("\n------ Parameter values and uncertainties ------") # ModelCovariance.get() returns a (value, uncertainty) tuple for a given # parameter. These are the results for the nearest-neighbor peak. - p0 = cov.get((0,0)) - w0 = cov.get((0,1)) - a0 = cov.get((0,2)) - print "Nearest-neighbor peak: " - print " position = %f +/- %f" %p0 - print " width = %f +/- %f" %w0 - print " area = %f +/- %f" %a0 - print " Covariance(width, area) = ", cov.getcovariance((0,1),(0,2)) + p0 = cov.get((0, 0)) + w0 = cov.get((0, 1)) + a0 = cov.get((0, 2)) + print("Nearest-neighbor peak: ") + print(" position = %f +/- %f" % p0) + print(" width = %f +/- %f" % w0) + print(" area = %f +/- %f" % a0) + print(" Covariance(width, area) = ", cov.getcovariance((0, 1), (0, 2))) # Baseline parameters. By convention, baseline is final element in cov. (slope, intercept) = cov.model[-1] - print "\nThe linear baseline B(r)=%f*r + %f" \ - % tuple(par for par in cov.model[-1]) - + print("\nThe linear baseline B(r)=%f*r + %f" % tuple(par for par in cov.model[-1])) - print "\n ------ Uncertainties from a Saved File --------" + print("\n ------ Uncertainties from a Saved File --------") # A .srmise file does not save the full covariance matrix, so it must be # recalculated when loading from these files. For example, here is the # nearest-neighbor peak in the file which we used to define the initial # baseline. cov2 = ModelCovariance() ppebl.extracted.fit(fitbaseline=True, cov=cov2, cov_format="default_output") - p0_saved = cov2.get((0,0)) - w0_saved = cov2.get((0,1)) - a0_saved = cov2.get((0,2)) - print "Nearest-neighbor peak:" - print " position = %f +/- %f" %p0_saved - print " width == %f +/- %f" %w0_saved - print " area = = %f +/- %f" %a0_saved - print " Covariance(width, area) = ", cov2.getcovariance((0,1),(0,2)) - - - print "\n ---------- Alternate Parameterizations ---------" + p0_saved = cov2.get((0, 0)) + w0_saved = cov2.get((0, 1)) + a0_saved = cov2.get((0, 2)) + print("Nearest-neighbor peak:") + print(" position = %f +/- %f" % p0_saved) + print(" width == %f +/- %f" % w0_saved) + print(" area = = %f +/- %f" % a0_saved) + print(" Covariance(width, area) = ", cov2.getcovariance((0, 1), (0, 2))) + + print("\n ---------- Alternate Parameterizations ---------") ## Different Parameterizations # Peaks and baselines may have equivalent parameterizations that are useful # in different situations. For example, the types defined by the @@ -151,26 +146,24 @@ def run(plot=True): # would transform the second, third, and fourth peaks). If the keyword # is omitted, the transformation is attempted for all parts of the fit. cov.transform(in_format="pwa", out_format="mu_sigma_area", parts="peaks") - print "Width (sigma) of nearest-neighbor peak: %f +/- %f" %cov.get((0,1)) + print("Width (sigma) of nearest-neighbor peak: %f +/- %f" % cov.get((0, 1))) - - print "\n ------------ Highly Correlated Parameters ------------" + print("\n ------------ Highly Correlated Parameters ------------") # Highly-correlated parameters can indicate difficulties constraining the # fit. This function lists all pairs of parameters with an absolute value # of correlation which exceeds a given threshold. - print "|Correlation| > 0.9:" - print "par1 par2 corr(par1, par2)" - print "\n".join(str(c) for c in cov.correlationwarning(.9)) - + print("|Correlation| > 0.9:") + print("par1 par2 corr(par1, par2)") + print("\n".join(str(c) for c in cov.correlationwarning(0.9))) - print "\n-------- Estimate coordination shell occupancy ---------" + print("\n-------- Estimate coordination shell occupancy ---------") # Estimate the scale factor and its uncertainty from first peak's intensity. # G_normalized = scale * G_observed # dscale = scale * dG_observed/G_observed - scale = 12./a0[0] - dscale = scale * a0[1]/a0[0] - print "Estimate scale factor assuming nearest-neighbor intensity = 12" - print "Scale factor is %f +/- %f" %(scale, dscale) + scale = 12.0 / a0[0] + dscale = scale * a0[1] / a0[0] + print("Estimate scale factor assuming nearest-neighbor intensity = 12") + print("Scale factor is %f +/- %f" % (scale, dscale)) # Reference for number of atoms in coordination shells for FCC. # http://chem-faculty.lsu.edu/watkins/MERLOT/cubic_neighbors/cubic_near_neighbors.html @@ -178,35 +171,33 @@ def run(plot=True): # Calculated the scaled intensities and uncertainties. intensity = [] - for i in range(0, len(cov.model)-1): - (area, darea) = cov.get((i,2)) + for i in range(0, len(cov.model) - 1): + (area, darea) = cov.get((i, 2)) area *= scale - darea = area*np.sqrt((dscale/scale)**2 + (darea/area)**2) + darea = area * np.sqrt((dscale / scale) ** 2 + (darea / area) ** 2) intensity.append((ideal_intensity[i], area, darea)) - print "\nIntensity" - print "Ideal: Estimated" + print("\nIntensity") + print("Ideal: Estimated") for i in intensity: - print "%i: %f +/- %f" %i + print("%i: %f +/- %f" % i) - print "\nTotal intensity" + print("\nTotal intensity") # It is possible to iterate over peaks directly without using indices. # In addition, peak parameters can be accessed using string keys. For the # Gaussian over r all of "position", "width", and "area" are valid. total_observed_intensity = 0 total_ideal_intensity = 0 for peak, ii in zip(cov.model[:-1], ideal_intensity): - total_observed_intensity += scale*peak["area"] + total_observed_intensity += scale * peak["area"] total_ideal_intensity += ii - print "Ideal: Observed (using estimated scale factor)" - print "%i: %f" %(total_ideal_intensity, total_observed_intensity) - + print("Ideal: Observed (using estimated scale factor)") + print("%i: %f" % (total_ideal_intensity, total_observed_intensity)) ## Save output ppe.write("output/query_results.srmise") ppe.writepwa("output/query_results.pwa") - ## Evaluating a model. # Although the ModelCovariance object is useful, the model used for fitting # can be directly accessed through PDFPeakExtraction as well, albeit @@ -217,14 +208,15 @@ def run(plot=True): # peaks are kept separate. if plot: plt.figure() - grid = np.arange(2, 10, .01) + grid = np.arange(2, 10, 0.01) bl = ppe.extracted.baseline everysecondpeak = ppe.extracted.model[::2] - plt.plot(ppe.x, ppe.y, 'o') + plt.plot(ppe.x, ppe.y, "o") for peak in everysecondpeak: plt.plot(grid, bl.value(grid) + peak.value(grid)) plt.xlim(2, 10) plt.show() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4c13f11 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,77 @@ +[build-system] +requires = ["setuptools>=62.0", "setuptools-git-versioning<2"] +build-backend = "setuptools.build_meta" + +[project] +name = "diffpy.srmise" +dynamic=['version'] +authors = [ + { name="Simon J.L. Billinge group", email="simon.billinge@gmail.com" }, + {name="Luke Granlund", email="granlund@pa.msu.edu"} +] +maintainers = [ + { name="Simon J.L. Billinge group", email="simon.billinge@gmail.com" }, +] +description = "Peak extraction/fitting tool for pair distribution functions" +keywords = ['peak extraction fitting PDF AIC multimodeling'] +readme = "README.rst" +requires-python = ">=3.10" +classifiers = [ + # List of possible values at + # http://pypi.python.org/pypi?:action=list_classifiers + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: BSD License', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX', + 'Operating System :: Unix', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Topic :: Scientific/Engineering :: Physics', + 'Topic :: Scientific/Engineering :: Chemistry', + 'Topic :: Software Development :: Libraries', +] + +[project.urls] +Homepage = "https://github.com/diffpy/diffpy.srmise/" +Issues = "https://github.com/diffpy/diffpy.srmise/issues/" + +[tool.setuptools-git-versioning] +enabled = true +template = "{tag}" +dev_template = "{tag}" +dirty_template = "{tag}" + +[tool.setuptools.packages.find] +where = ["src"] # list of folders that contain the packages (["."] by default) +include = ["*"] # package names should match these glob patterns (["*"] by default) +exclude = ["diffpy.srmise.tests*"] # exclude packages matching these glob patterns (empty by default) +namespaces = false # to disable scanning PEP 420 namespaces (true by default) + +[tool.black] +line-length = 115 +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | \.rst + | \.txt + | _build + | buck-out + | build + | dist + + # The following are specific to Black, you probably don't want those. + | blib2to3 + | tests/data +)/ +''' diff --git a/setup.py b/setup.py index daae8c3..18e25d1 100755 --- a/setup.py +++ b/setup.py @@ -77,10 +77,7 @@ def getversioncfg(): }, author="Luke Granlund", author_email="luke.r.granlund@gmail.com", - description=( - "Peak extraction and peak fitting tool for atomic " - "pair distribution functions." - ), + description=("Peak extraction and peak fitting tool for atomic " "pair distribution functions."), license="BSD-style license", url="https://github.com/diffpy/diffpy.srmise/", keywords="peak extraction fitting PDF AIC multimodeling",