Source code for sofia_redux.toolkit.fitting.fitpeaks1d

# Licensed under a 3-clause BSD style license - see LICENSE.rst

from inspect import isclass
import warnings

from astropy import log
from astropy.modeling import models
from astropy.modeling import fitting, Fittable1DModel
from astropy.modeling.parameters import Parameter
from astropy.utils.exceptions import AstropyWarning
import numpy as np

from sofia_redux.toolkit.utilities.func import recursive_dict_update
from sofia_redux.toolkit.stats.stats import find_outliers
from sofia_redux.toolkit.interpolate.interpolate import tabinv

__all__ = ['parse_width_arg', 'get_fitter', 'dofit', 'box_convolve',
           'get_search_model', 'initial_search', 'get_background_fit',
           'get_final_model', 'fitpeaks1d', 'medabs_baseline',
           'guess_xy_mad']

try:
    from astropy.modeling.utils import _ConstraintsDict as astropy_dict
except ImportError:
    astropy_dict = dict


def robust_masking(data, threshold=5):
    """Default outlier identification for robust fitting"""
    if not isinstance(data, np.ma.MaskedArray):
        data = np.ma.array(data)
    mask = ~find_outliers(data, threshold)
    data.mask = mask
    return data


[docs] def medabs_baseline(_, y): """ Default data preparation for `initial_search` and baseline function. Note that the baseline function is also used to prepare an initial estimate of the background parameters (if a background model is supplied as well) Parameters ---------- _ : unused y : array-like Dependent data values Returns ------- baseline_removed_y, baseline : numpy.ndarray, numpy.ndarray The de-baselined `y` data and baseline, both of type numpy.float64. """ y = np.array(y) baseline = np.full(y.shape, float(np.median(y))) return np.abs(y - baseline), baseline
[docs] def guess_xy_mad(x, y): """ Default peak guess function for `initial_search`. This function should take in x (independent) and y (dependent) values and return an (x, y) tuple estimate of the most prominent peak location. Parameters ---------- x : array_like Independent Values y : array_like Dependent Values Returns ------- x_peak, y_peak : 2-tuple The x and y location of the most prominent peak. """ xind = np.argmax(np.abs(y - np.median(y))) return x[xind], y[xind]
def get_model_name(model, base=False): if isinstance(model, str): return model if base: if isclass(model): thing = model() else: thing = model if get_n_submodels(thing) > 1: thing = model[0] else: thing = model if isinstance(thing, Fittable1DModel): name = thing.__class__.name elif hasattr(thing, 'name'): name = thing.name if name is None: name = thing.__class__.name else: name = None log.warning("unable to determine name of %s" % thing) return name def update_model(model, kwargs): for k, v in kwargs.items(): if hasattr(model, k): val = getattr(model, k) if (isinstance(val, astropy_dict) or isinstance(val, dict)) \ and isinstance(v, dict): recursive_dict_update(val, v) elif isinstance(val, Parameter) and isinstance(v, (int, float)): val.value = v setattr(model, k, val) else: val = v setattr(model, k, val) def get_amplitude_parname(model_name, base=True): """The standard amplitude parameter of most common models""" name = get_model_name(model_name, base=base) return 'amplitude_L' if name == 'Voigt1D' else 'amplitude' def get_x_parname(model_name, base=True): """The standard x-center parameter of most common models""" name = get_model_name(model_name, base=base) if name == 'Gaussian1D': x_name = 'mean' elif name == 'Sine1D': x_name = 'phase' else: x_name = 'x_0' return x_name def get_width_parname(model_name, base=True): """The standard width parameter of most common models""" name = get_model_name(model_name, base=base) names = {'Moffat1D': 'gamma', 'Box1D': 'width', 'Gaussian1D': 'stddev', 'Lorentz1D': 'fwhm', 'MexicanHat1D': 'sigma', 'Sersic1D': 'r_eff', 'Trapezoid1D': 'width', 'Voigt1D': 'fwhm_G', 'Sine1D': 'frequency'} return names.get(name) def get_n_submodels(model): """Determine the number of submodels in a model Required due to significant change to astropy.modeling in Astropy v4.0. """ if not hasattr(model, 'n_submodels'): return 1 if callable(model.n_submodels): return model.n_submodels() result = model.n_submodels if isinstance(result, int): return result else: return 1
[docs] def parse_width_arg(model_class, width_arg): """ Simple convenience lookup to get width parameter name for various models. Parameters ---------- model_class : astropy.modeling.Fittable1DModel width_arg : None or float or int or ((str or None), (float or int)) Returns ------- None or (int or float) or (str, (int or float)) If None: Do not apply max_region If (int or float) Apply fixed max_region If (str, float) or (str, int) Apply max_region as max_region[1] multiple of model parameter max_region[0]. Raises ------ ValueError, TypeError """ name = get_model_name(model_class) if name is None: raise TypeError("model_class is not %s" % Fittable1DModel) if width_arg is None: return elif isinstance(width_arg, (int, float)): return width_arg elif not hasattr(width_arg, '__len__') or len(width_arg) != 2: raise ValueError("invalid max_region format") width_param, width = width_arg if not isinstance(width, (int, float)): raise ValueError("second element of max_region is not %s" % float) elif isinstance(width_param, str): if width_param not in model_class.param_names: raise ValueError("%s is not a parameter of %s" % (width_param, model_class)) else: return width_param, width elif width_param is not None: raise ValueError("first element of max_region must be str or None") parname = get_width_parname(model_class) if parname is None: raise ValueError( 'Cannot autodetect width parameter name for "%s" model' % name) return parname, width
def get_min_width(model_class, min_width, x=None): if hasattr(min_width, '__len__') and len(min_width) == 2: if min_width[1] is None: if x is None: raise ValueError( "Require x coordinates to determine dx") dx = np.unique(x) dx = float(np.median(np.abs(dx[1:] - dx[:-1]))) min_width = min_width[0], dx / 2 if isinstance(min_width, (float, int)): min_width = None, min_width return parse_width_arg(model_class, min_width)
[docs] def get_fitter(fitter_class, robust=None, outlier_func=robust_masking, outlier_iter=None, **kwargs): """ Creates the object fitting a model to data Parameters ---------- fitter_class : class of fitting object Typically a "solver" from scipy.optimize or astropy.modeling that when instatiated will create a solver such that: fitted_model = solver(model, x, y) robust : dict or bool, optional If supplied, sets the outlier rejection threshold during fitting. The outlier function itself may be specified via `outlier_func`. outlier_func : function, optional A function of the form data_out = outlier_func(data_in). You may do anything at all to the data such as clipping, but perhaps the simplest option is to convert the data to a np.ma.MaskedArray and set the mask (True=bad) while returning the original data untouched. outlier_iter : int, optional The maximum number of iterations if rejecting outliers. Defaults to 3. kwargs : dict, optional Additional keywords to pass into the solver during initialization. Returns ------- instance of fitting object or function """ fitter = fitter_class(**kwargs) if robust is None: return fitter if isinstance(robust, dict): opts = robust.copy() else: opts = {} if outlier_iter is not None: opts.update({'niter': outlier_iter}) return fitting.FittingWithOutlierRemoval(fitter, outlier_func, **opts)
[docs] def dofit(fitter, model, x, y, **kwargs): """ A simple wrapper to fit model to the data This is a convenience function that throws out the masking array returned by `astropy.fitting.FittingWithOutlierRemoval` and just returns the actual fit. Parameters ---------- fitter : fitting object model : astropy.modeling.Fittable1DModel The model that the solver will use to fit the data. x : array_like of float (N,) array of independent data to fit y : array_like of float (N,) array of dependent data to fit Returns ------- astropy.modeling.Fittable1DModel A new model containing the fitted parameters. """ try: result = fitter(model, x, y, **kwargs) except Exception as err: log.error("fitting failed: %s" % err) result = model.copy() result.parameters = np.nan if isinstance(fitter, fitting.FittingWithOutlierRemoval): result = result[0] fit_info = getattr(fitter.fitter, 'fit_info', {}) else: fit_info = getattr(fitter, 'fit_info', {}) result.solver = fitter result.fit_info = fit_info result.fit_info['x'] = x.copy() result.fit_info['y'] = y.copy() return result
[docs] def box_convolve(model, box_class, box_width=None, box_x=None, model_x=None, box_var=None, box_params=None, **kwargs): """ Convolve a model with a box (or another model) Standard astropy models are easy to convolve (multiply). Unfortunately, during fitting we want the result of this convolution to be treated as a single model, not have the box fitted as well as the original model. `box_convolve` anchors the center of the `box_class` to the center of the `model`. All other box parameters are held constant. As a convenience, `box_width` is supplied so that the user can define a fixed width to the box (which can also be defined using `box_params`). Generally, all astropy models have a fixed amplitude of 1, so in simple cases only the width should be specified. However, if using a more complex model then additional parameters should be specified via `box_params`. In addition, the user may also easily select a single parameter of the `box_class` to be equal to a single parameter of the `model` multiplied by some scaling factor. `fitpeaks1d` uses this by default to convolve a Gaussian function with a step function that will always be set to a multiple factor of the FWHM. This is incredibly useful when trying to get a good fit on a peak by only using data in range of the peaks significance; not the entire set of data which may contain artifacts that will interfere with the fit. To do this set `box_width` to (parameter, factor) of the model. For example ('stddev', 6) would center a box function with a width of 6 times the FWHM of a Gaussian over the original Gaussian. Note though, that box_width is simply a convenience argument. If you know what you are doing then you can tie or fix multiple parameters between the two models. Look at the code in `box_convolve` for an example or how one should use the 'tied', 'bounds', and 'fixed' keywords in the astropy.modeling documentation. These may be suppled to `box_convolve` via kwargs. kwargs is applied to the model at the very end of the algorithm, so anything here will override any previous logic. Parameters ---------- model : astropy.modeling.Fittable1DModel (class or instance) model on which to apply box function box_class : astropy.modeling.Fittable1DModel (class) The box or model to convolve `model` with. box_width : (float or int) or (2-tuple of (str, float)), optional If int or float, specifies the fixed width of the box. Using the 2-tuple form scales the width of the box to be equal to box_width[1] * (the parameter box_width[0]) in `model`. If set to None, the default values of the box model or those set in 'box_params` will define a constant box width. box_x : str, optional Determines the `box_class` parameter to always be set equal to the `model` `model_x` parameter. If omitted, an attempt will be made to determine what the x-centering parameter is for the box. model_x : str, optional Determines which `model` parameter that the `box_x` parameter of the `box_class` will always be set equal to. If omitted, an attempt will be made to determine what the x-centering parameter is for the `model`. box_var : str, optional Determines what the "width" parameter of the box is in relation to the `box_width` parameter and its uses. If omitted, an attempt will be made to determine the "width" parameter name of the box model. box_params : dict, optional Used to set initial `box_class` parameters. All parameters set here will be fixed unless overriden by other keyword options. Please see `astropy.modeling.models` for a list of keywords that can be supplied. kwargs : dict, optional keyword values to be applied that will override everything else. keys should be the name of the output model attributes and values should be the values you wish to set. Dictionaries are applied recursively. Returns ------- result : instance of astropy.modeling.Fittable1DModel The resulting `model` * box_model """ if isclass(box_class) and not issubclass(box_class, Fittable1DModel): raise TypeError( "box_class must be %s" % Fittable1DModel) # pragma: no cover is_compound = get_n_submodels(model) > 1 if box_x is None: box_x = get_x_parname(box_class) if model_x is None: model_x = get_x_parname(model) if is_compound: model_x += '_0' if box_x not in box_class.param_names: raise ValueError('box_x parameter name "%s" not available for %s' % (box_x, get_model_name(box_class))) if model_x not in model.param_names: raise ValueError('model_x parameter name "%s" not available for %s' % (model_x, get_model_name(model))) if box_var is None and box_width is not None: box_var = get_width_parname(box_class) if box_var not in box_class.param_names: raise ValueError( 'box_var parameter name "%s" not available for %s' % (box_var, get_model_name(box_class))) # pragma: no cover if hasattr(box_width, '__len__') and len(box_width) == 2: scale_par, scale = box_width if is_compound: scale_par += '_0' if scale_par not in model.param_names: raise ValueError('"%s" is not a parameter of %s' % (scale_par, get_model_name(model))) if not isinstance(scale, (int, float)): raise ValueError("scaling parameter is not int or float") else: scale_par = None scale = box_width if isinstance(box_width, (float, int)) else None if box_params is None: box_params = {} if 'fixed' not in box_params: box_params['fixed'] = {} if 'tied' not in box_params: box_params['tied'] = {} # Fix every parameter in the box model, we'll change it later for box_par in box_class.param_names: if box_par not in box_params['fixed']: box_params['fixed'][box_par] = True n = get_n_submodels(model) if scale_par is None and scale is not None: box_params[box_var] = scale box_params['fixed'][box_var] = True elif scale_par is not None: box_params['fixed'][box_var] = False box_params['tied'][box_var] = lambda m: scale * getattr( m[:n], scale_par) # tie box_x to model_x box_params['fixed'][box_x] = False box_params['tied'][box_x] = lambda m: getattr(m[:n], model_x) # create the model name = '(%s)*%s' % (get_model_name(model), get_model_name(box_class)) m = model() if isclass(model) else model boxed = (m * box_class(**box_params)).rename(name) try: # in case we were passed an initialized model tmp = boxed.parameters tmp[:model.parameters.size] = model.parameters.copy() boxed.parameters = tmp except AttributeError: pass # If not fitting, ensure the box is centered over model setattr(boxed[-1], box_x, getattr(boxed[:n], model_x)) # And ensure scaling rules are followed (if there are any) if scale_par is not None: setattr(boxed[-1], box_var, boxed[-1].tied[box_var](boxed)) # finally override with kwargs update_model(boxed, kwargs) return boxed
[docs] def get_search_model(peak_model, box_class=None, box_width=None, min_width=None, xrange=None, **kwargs): """ Create the `initial_search` peak fitting model This function is used to create the peak model that will be used during the initial peak search. This is a fairly simple function which adds an optional filtering (box) model to the peak_model and instantiates it with the necessary options. Parameters ---------- peak_model : astropy.modeling.Fittable1DModel Class for the peak model box_class : astropy.modeling.Fittable1DModel, optional Convolve the peak model with this model, generally to reduce the range over which the model is fit. box_width : (float or int) or (2-tuple of (str, float)), optional Used conjunction with `box_model`. See `box_convolve` for full details and usage. min_width : 2-tuple of (str, (int or float)) If supplied will set the minimum bounds of the width parameter min_width[0] to min_width[1]. Overrides kwargs. xrange : 2-tuple of float, optional (minumum, maximum) xrange over which the model is applicable. Set to avoid extrapolation. kwargs : dict, optional Extra keywords to pass into either the `peak_model` or `box_convolve`. Returns ------- result : instance of astropy.modeling.Fittable1DModel The model to be used for an initial peak-search. """ if box_width is not None and box_class is not None: search_model = box_convolve( peak_model, box_class, box_width=box_width, **kwargs) pmodel = search_model[:][0] else: search_model = peak_model(**kwargs) pmodel = search_model if min_width is not None: if isinstance(min_width, (int, float)): min_width = get_width_parname(peak_model), min_width if min_width[0] is None: raise ValueError( "could not determine width parameter for %s model" % get_model_name(peak_model)) wbounds = pmodel.bounds[min_width[0]] wbounds = min_width[1], wbounds[1] pmodel.bounds[min_width[0]] = wbounds model_x = get_x_parname(peak_model) xbounds = pmodel.bounds.get(model_x) if xbounds == (None, None) and xrange is not None: pmodel.bounds[model_x] = xrange return search_model
[docs] def get_background_fit(fitter, peak_model, background_class, x, y, pinit, fitopts=None, bg_args=None, **bg_kwargs): """ Return a background model with initialized parameters Subtract peaks from `y` then fit background on residual Parameters ---------- fitter : object peak_model : astropy.modeling.Fittable1DModel instance background_class : astropy.modeling.Fittable1DModel x : array_like of float y : array_like of float pinit : numpy.ndarray (npeaks, n_peak_parameters) array giving the peak parameters for `peak_model`. fitopts : dict, optional Optional keywordds to pass into the `solver`. bg_args : tuple, optional If the background class requires positional arguments, they should be supplied here. bg_kwargs : dict, optional Optional keywords to pass into the background_class to initialize the background model. Returns ------- numpy.ndarray (n_background_parameters,) array of background parameters based on the fit of (x, y - peaks). """ y = y.copy() if not isclass(background_class): bmodel = background_class.copy() update_model(bmodel, bg_kwargs) else: if bg_args is None: bmodel = background_class(**bg_kwargs) else: if hasattr(bg_args, '__len__'): bmodel = background_class(*bg_args, **bg_kwargs) else: bmodel = background_class(bg_args, **bg_kwargs) init_params = peak_model.parameters.copy() if fitopts is None: fitopts = {} for params in pinit: peak_model.parameters = params y -= peak_model(x) with warnings.catch_warnings(): warnings.simplefilter('ignore', AstropyWarning) bfit = dofit(fitter, bmodel, x, y, **fitopts) peak_model.parameters = init_params return bfit.parameters
[docs] def get_final_model(peak_class, pinit, background_class=None, bg_args=None, binit=None, min_width=None, xbounds=None, **kwargs): """ Refine the initial fit and return a set of models Parameters ---------- peak_class : astropy.modeling.Fittable1DModel instance pinit : array_like of float background_class : astropy.modeling.Fittable1DModel, optional binit : array_like of float, optional (n_background_parameters,) array of initial background parameter estimates. min_width : 2-tuple of (str, float), optional xbounds : 2-tuple of float, optional kwargs : dict, optional Returns ------- list of astropy.modeling.Fittable1DModel """ # This supports old serial implementation. May be useful one day # pinit = np.array(pinit) # npeaks = pinit.shape[0] # min_width = get_min_width(peak_class, min_width) # # model_class, build = None, kwargs.copy() # xpar_name = get_x_parname(peak_class) # # compound = npeaks > 1 or background_class is not None # # build['bounds'] = build.get('bounds', {}) # # if compound: # for i, params in enumerate(pinit): # if model_class is None: # model_class = peak_class # else: # model_class += peak_class # for j, parname in enumerate(peak_class.param_names): # build['%s_%i' % (parname, i)] = params[j] # if min_width is not None: # key = '%s_%i' % (min_width[0], i) # wlim = build['bounds'].get(key, (None, None)) # build['bounds'][key] = min_width[1], wlim[1] # if xbounds is not None: # build['bounds']['%s_%i' % (xpar_name, i)] = xbounds # else: # model_class = peak_class # for j, parname in enumerate(peak_class.param_names): # build['%s' % parname] = pinit[0, j] # if min_width is not None: # wlim = build['bounds'].get(min_width[0], (None, None)) # build['bounds'][min_width[0]] = min_width[1], wlim[1] # if xbounds is not None: # build['bounds'][xpar_name] = xbounds # # if background_class is not None: # model_class += background_class # if binit is not None: # binit = np.array(binit) # for parname, parval in zip(background_class.param_names, binit): # build['%s_%i' % (parname, npeaks)] = parval # # if compound: # name = '%i_peak%s' % (npeaks, '' if npeaks == 1 else 's') # if background_class is not None: # name += '_with_background' # model_class = model_class.rename(name) # model = model_class(**build) # if not compound: # model.name = '1_peak' # # return model pinit = np.array(pinit) npeaks = pinit.shape[0] min_width = get_min_width(peak_class, min_width) build = {} xpar_name = get_x_parname(peak_class) compound = npeaks > 1 # or background_class is not None build['bounds'] = build.get('bounds', {}) model = None if compound: for i, params in enumerate(pinit): if model is None: model = peak_class() else: model += peak_class() for j, parname in enumerate(peak_class.param_names): build['%s_%i' % (parname, i)] = params[j] if min_width is not None: key = '%s_%i' % (min_width[0], i) wlim = build['bounds'].get(key, (None, None)) build['bounds'][key] = min_width[1], wlim[1] if xbounds is not None: build['bounds']['%s_%i' % (xpar_name, i)] = xbounds else: model = peak_class() for j, parname in enumerate(peak_class.param_names): build['%s' % parname] = pinit[0, j] if min_width is not None: wlim = build['bounds'].get(min_width[0], (None, None)) build['bounds'][min_width[0]] = min_width[1], wlim[1] if xbounds is not None: build['bounds'][xpar_name] = xbounds update_model(model, build) name = '%i_peak%s' % (npeaks, '' if npeaks == 1 else 's') if background_class is not None: if isclass(background_class): if bg_args is None: background = background_class() elif not hasattr(bg_args, '__len__'): background = background_class(bg_args) else: background = background_class(*bg_args) else: background = background_class.copy() if binit is not None: background.parameters = binit # if binit is None: # background = background_class() # else: # binit = np.array(binit) # bg_kwargs = {} # for parname, parval in zip(background_class.param_names, binit): # bg_kwargs[parname] = parval # background = background_class(**bg_kwargs) model += background name += '_with_background' model = model.rename(name) if len(kwargs) > 0: update_model(model, kwargs) return model
[docs] def fitpeaks1d(x, y, npeaks=1, xrange=None, min_width=(None, None), peak_class=models.Gaussian1D, box_class=models.Box1D, box_width=(None, 6), background_class=models.Const1D, binit=None, bg_args=None, bg_kwargs=None, fitting_class=fitting.LevMarLSQFitter, maxiter=None, outlier_func=robust_masking, robust=None, outlier_iter=3, baseline_func=medabs_baseline, guess=None, guess_func=guess_xy_mad, optional_func=None, fitter_kwargs=None, fitopts=None, search_kwargs=None, **kwargs): """ Fit peaks (and optionally background) to a 1D set of data. The user may fit for 1 or multiple peaks in the input data using using an array of different peak models and also fit for a model of the background. There are a large number of standard models available for both, or the user may define their own models if necessary. By default, we fit for Gaussian peaks and a constant background. The intention of `fitpeaks1d` is to provide a highly customizable peak fitting utility for one dimensional data. The basic procedure is split into two phases: the search phase and the refinement phase. During the search phase an attempt is made to get a good first estimate of where the most prominent peaks lie in the data along with estimates of their properties. Initial positions can be supplied by the user via `guess`, but may also be derived via the `guess_func` algorithm which is also definable by the user. By default, candidate positions are determined as max(abs(y - median(y))). The basic procedure is as follows: 1. Remove a baseline from the y data via the `baseline_func` function. At this early stage, we are only really interested in peak characteristics or even just where they kind-of are. The default baseline removal function is to subtract the median although it is completely reasonable to use many other methods depending on your data. This baseline_func may also transform the initial data set to something a little more suitable for finding peaks. Users may wish to smooth or filter the data, or perform some other kind of transform. By default the data is set to y = abs(y - median(y)) as we do not know if we're searching for positive or negative peaks. 2. Guess where peaks may lie along the x-axis. These estimates can be provided directly by the user, or derived via the `guess_func` algorithm. This algorithm should take the (x, y) data set and return a single (x0, y0) coordinate giving the x position and amplitude of the most prominent peak candidate in the data. In the default case this is simply y0 = max(y) and x0 = where(y0). 3. Fit a peak using (x0, y0) as an initial condition. Store the peak properties and then subtract a model of the peak from the data and go back to step 2. Only break this loop once the cycle has completed `npeaks` times. This will hopefully yield the `npeaks` most prominent peaks in the data. After fitting we should have a more exact (x1, y1) position for each peak. 4. Take the x1 position of each peak derived in step 3 and interpolate it back onto the original data set before we changed it in step 1. Subtract the derived baseline from this value and we should then have a good estimate of the (x, y) position for each peak in relation to the real dataset. 5. If the user wishes to fit a background to the data, we get an estimate of the background fit parameters by subtracting models of each peak from the original dataset and then fitting the desired background model to the residual. During the refinement phase, a final model is created from all initial peak fits and an optional background. If a background is requested, an initial estimate of its parameters is derived by fitting the model on y - fit(peak_estimates). Parameters ---------- x: array_like of float (N,) array of independent values. y : array_like of float (N,) array of independent values. npeaks : int, optional The number of peaks to find. Set to None to find a single (most prominent) peak. Peaks are returned in the order most prominent -> least prominent. xrange : 2-tuple of (int or float), optional The (minimum, maximum) range of values in x where a fitted peak will be valid. Set to None to use the full range of valid (finite) data. peak_class : astropy.modeling.Fittable1DModel, optional Class of peak model. The default is `astropy.modeling.models.Gaussian1D`. box_width : float or int or (2-tuple of (str, float)), optional If int or float, specifies the width of `box_model`. If specifed as a 2-tuple of (str, float), the first specifies a parameter of the model that will be taken and multiplied by the second element of the 2-tuple (float) to define the width of the box. For example ('stddev', 3) for the Gaussian1D model would limit the range of the model to 3 * FWHM. This is highly recommended when fitting continuous or wide functions. Note that this will not be applied when the refined model is fitted in parallel, since that kind of negates the whole point of fitting in parallel in the first place. min_width : 2-tuple of (str or None, float or None), optional Set a lower limit on the width of each peak. Set to None for no lower limit. min_width[0] is the name of the parameter in the peak model governing width (str). Set this to None to autodetect the parameter name if using a standard model. min_width[1] is the value to set. If set to None, half of the median separation of sorted unique `x` samples will be used. box_class : astropy.modeling.Fittable1DModel, optional Class for filtering the peak model. Will be applied as peak_model * box_model. This is only applied during the initial search stage. The width of the box is defined by `max_region`. The default is astropy.modeling.models.Box1D. background_class : astropy.modeling.Fittable1DModel, optional Class for describing the background. Will be applied as peak_model + background in the final refined fitting. Note that it will only be fitted to the area of interest around each peak (`max_region`). Disable background fitting by supplying `None`. The default is to fit a constant background using astropy.modeling.models.Const1D. NOTE: you can supply a model instance rather than class in order to fit polynomial backgrounds as the number of polynomial parameters are dependent on the polynomial order. For example, in order to fit a 3rd order polynomial, set background_class=Polynomial1D(3). binit : array_like of float, optional A user guess at background parameters bg_args : tuple, optional If the background class requires arguments, they should be supplied here as a tuple. bg_kwargs : dict, optional Optional keyword arguments to pass into the background model during initialization. fitting_class : class of fitting object Typically a "solver" from scipy.optimize or astropy.modeling that when instatiated will create a solver such that: fitted_model = solver(model, x, y) maxiter : int, optional Maximum number of iterations for the solver to attempt before quitting. Set to None to use the default for each solver class. outlier_func : function, optional A function of the form data_out = outlier_func(data_in). You may do anything at all to the data such as clipping, but perhaps the simplest option is to convert the data to a np.ma.MaskedArray and set the mask (True=bad) while returning the original data untouched. The default is to use `robust_masking` which is a basic wrapper for `mc.find_outliers`. robust : dict or anything, optional Supplying anything other than `None` will turn on outlier rejection for the solver. If `robust` is a dict, then it should contain optional keyword arguments that would be supplied to `outlier_func'. For example, the default `robust_masking` outlier rejection function takes the `threshold` keyword. To turn on outlier rejection you could just set robust=True, but if you wanted to change threshold to 10, then set robust={'threshold': 10}. Setting robust=None turns off outlier rejection (default). outlier_iter : int, optional The maximum number of iterations for the solver when rejecting outlers. Defaults to 3. guess : array_like of float, optional An array where each element gives a guess at an initial `x` position for the peak. If there are less guesses than `npeaks`, `guess_func` will be used after the inital guesses have been taken. If there are more guesses than `npeaks`, then only guess[:npeaks] will be used. baseline_func : function, optional A function of the form used during the initial search phase: ysearch, baseline = baseline_func(x, y) This function should remove a baseline from the data so that we mitigate effects from the background as much as possible while keeping the structure of the peaks. In the default case, the baseline is taken as the median in y. This baseline should be subtracted from y and then the user may transform y as they see fit to allow `guess_func` and the `solver` to get the most reliable guess possible at each peaks model parameters. The default `baseline_func` returns ysearch as ysearch = abs(y - median(y)). Note that the most important parameter to guess at during this stage is the x center position of the peak. Amplitude is not important, but we do want to get fairly accurate representations of the rest of the fit parameters such as width or FWHM. guess_func : function A function of the form: (x0, y0) = guess_func(x, y_modified) (x0, y0) is the peak (x_center, y_amplitude) of the most prominent peak in the data after `baseline_func` has been applied. These results are used to initialize peak parameters during the search phase. The default function returns the (x, y) coordinate of the datum that satisfies y0 = max(abs(y - median(y))). optional_func : function, optional An optional function applied to the data prior to the final fit. If fitting a background, then no attempt should be made to remove the background unless it is being accounted for in some way. If you are not fitting a background then feel free to go to town on removing said background. Preserve whatever properties of the peak you wish to have appear in the output results. Good choices may involve filtering the data. The default is to do nothing and fit the final model on the original data. fitter_kwargs : dict, optional Additional keywords to pass into the solver during initialization. fitopts : dict, optional Optional keywords to pass into solver at runtime. search_kwargs : dict, optional Additional keyword arguments to supply during model instantiation during the initial search stage (peak * optional(box)). Note that the model parameter names are suffixed with "_0" and box parameter names are suffixed with "_1", but only if a box is being applied. So for example, to say that you want parameter "p" of the model to remain fixed: kwargs = {"fixed": {"p_0": True}} As another example, to set parameter "a" of the box equal to parameter "b" of the model: kwargs = {"tied": {"b_1": lamba x: getattr(x, "a_0")}} There are lots of other things you could do here, and I've tried to include as many hooks as possible. You don't even need to treat this thing as a box function. It could be any kind of filtering function etc. and you could use kwargs to supply it with the necessary parameters or behaviours as you wish. kwargs : dict, optional Additional keyword arguments to supply during model instantation during the final model refinement stage. Exactly the same type of thing as `search_kwargs`. However, the following suffix rules apply for parallel fitting (default): 1. If fitting only a single peak and no background, do not suffix any parameters. "amplitude" remains "amplitude". 2. If fitting multiple peaks, begin suffixing at _0 for the first peak upto _(npeaks-1) for the last. i.e. the amplitude of the 5th peak should be referred to as "amplitude_4". 3. If fitting a single peak with a background then peak parameters should be suffixed with "_0" and background parameters with "_1". 4. If fitting multiple peaks with a single background then peak parameters should be suffixed according to step 2, while background parameters should be suffixed with _(npeaks). i.e. if there were 5 peaks, the background "intercept" for a linear background fit should be referred to as "intercept_5". and for serial fitting: 1. If fitting a single peak with no background, do no suffix any parameters. "amplitude" remains "amplitude". 2. If fitting one or more peaks with a background then there is no distinction available between peaks. Each peak parameter rule must be applied to all peaks. For example, setting {'bounds': {'amplitude_0': (0, 5)} will limit the amplitude of all peaks to within the range 0->5. The background parameters are suffixed with "_1". Returns ------- model : astropy.modeling.Fittable1DModel instance The final fitted model. If more than one peak or a background was fitted for, then the returned model will be a compound model. All peaks are located in model[:npeaks]. If a background was included it can be found at model[npeaks]. If a single peak and no background was fitted, then an instance of the peak_class will be returned containing the fit parameters. No indexing will be possible. Examples -------- Create compound peaks on a sloped background >>> import numpy as np >>> from astropy.modeling import models >>> from sofia_redux.toolkit.fitting.fitpeaks1d import fitpeaks1d >>> x = np.linspace(0, 10, 1001) >>> model1 = models.Gaussian1D(amplitude=3, mean=5, stddev=0.5) >>> model2 = models.Gaussian1D(amplitude=4, mean=4, stddev=0.2) >>> y = model1(x) + model2(x) + 0.05 * x + 100 >>> rand = np.random.RandomState(42) >>> y += rand.normal(0, 0.01, y.size) >>> fit = fitpeaks1d(x, y, background_class=models.Linear1D, ... npeaks=2, min_width=0.1) >>> rmse = np.sqrt(np.mean(fit.fit_info['fvec'] ** 2)) >>> assert np.allclose(fit[0].parameters, [3, 5, 0.5], atol=0.02) # peak 1 >>> assert np.allclose(fit[1].parameters, [4, 4, 0.2], atol=0.02) # peak 2 >>> assert np.allclose(fit[2].parameters, [0.1, 100], atol=0.2) # bg >>> assert np.isclose(rmse, 0.01, rtol=1) # residuals """ if not isclass(peak_class) or not issubclass( peak_class, Fittable1DModel): raise TypeError("peak model is not %s" % Fittable1DModel) x, y = np.array(x), np.array(y) shape = x.shape if y.shape != shape: raise ValueError("x and y array shape mismatch") if fitter_kwargs is None: fitter_kwargs = {} if search_kwargs is None: search_kwargs = {} if fitopts is None: fitopts = {} if fitting_class in [fitting.LevMarLSQFitter, fitting.SLSQPLSQFitter, fitting.SimplexLSQFitter]: fitopts['maxiter'] = 1000 if bg_kwargs is None: bg_kwargs = {} if maxiter is not None and 'maxiter': fitopts['maxiter'] = maxiter mask = np.isfinite(x) & np.isfinite(y) if not mask.any(): raise ValueError("no finite data") x, y = x[mask], y[mask] if np.unique(x).size < 5: raise ValueError("not enough valid unique points") if xrange is None: xrange = x.min(), x.max() elif not hasattr(xrange, '__len__') or len(xrange) != 2: raise ValueError("xrange must be of the form (xmin, xmax)") box_width = parse_width_arg(peak_class, box_width) min_width = get_min_width(peak_class, min_width, x) fitter = get_fitter(fitting_class, robust=robust, outlier_func=outlier_func, outlier_iter=outlier_iter, **fitter_kwargs) search_model = get_search_model(peak_class, box_class=box_class, box_width=box_width, min_width=min_width, xrange=xrange, **search_kwargs) pinit = initial_search(fitter, search_model, x, y, npeaks=npeaks, guess=guess, guess_func=guess_func, baseline_func=baseline_func, fitopts=fitopts) y = y.copy() if optional_func is None else optional_func(x, y) if background_class is not None and binit is None: if get_n_submodels(search_model) > 1: search_peak = search_model[0] else: search_peak = search_model binit = get_background_fit(fitter, search_peak, background_class, x, y, pinit, bg_args=bg_args, **bg_kwargs) model = get_final_model(peak_class, pinit, background_class=background_class, bg_args=bg_args, binit=binit, min_width=min_width, xbounds=xrange, **kwargs) fit = dofit(fitter, model, x, y, **fitopts) return fit