Source code for sofia_redux.toolkit.fitting.fitpeaks1d

# Licensed under a 3-clause BSD style license - see LICENSE.rst

from inspect import isclass
import warnings

from astropy import log
from astropy.modeling import models
from astropy.modeling import fitting, Fittable1DModel
from astropy.modeling.parameters import Parameter
from astropy.utils.exceptions import AstropyWarning
import numpy as np

from sofia_redux.toolkit.utilities.func import recursive_dict_update
from sofia_redux.toolkit.stats.stats import find_outliers
from sofia_redux.toolkit.interpolate.interpolate import tabinv

__all__ = ['parse_width_arg', 'get_fitter', 'dofit', 'box_convolve',
           'get_search_model', 'initial_search', 'get_background_fit',
           'get_final_model', 'fitpeaks1d', 'medabs_baseline',
           'guess_xy_mad']

try:
    from astropy.modeling.utils import _ConstraintsDict as astropy_dict
except ImportError:
    astropy_dict = dict


def robust_masking(data, threshold=5):
    """Default outlier identification for robust fitting"""
    if not isinstance(data, np.ma.MaskedArray):
        data = np.ma.array(data)
    mask = ~find_outliers(data, threshold)
    data.mask = mask
    return data



[docs]
def medabs_baseline(_, y):
    """
    Default data preparation for `initial_search` and baseline function.

    Note that the baseline function is also used to prepare an initial
    estimate of the background parameters (if a background model is
    supplied as well)

    Parameters
    ----------
    _ : unused
    y : array-like
        Dependent data values

    Returns
    -------
    baseline_removed_y, baseline : numpy.ndarray, numpy.ndarray
        The de-baselined `y` data and baseline, both of type numpy.float64.
    """
    y = np.array(y)
    baseline = np.full(y.shape, float(np.median(y)))
    return np.abs(y - baseline), baseline




[docs]
def guess_xy_mad(x, y):
    """
    Default peak guess function for `initial_search`.

    This function should take in x (independent) and y (dependent) values
    and return an (x, y) tuple estimate of the most prominent peak location.

    Parameters
    ----------
    x : array_like
        Independent Values
    y : array_like
        Dependent Values

    Returns
    -------
    x_peak, y_peak : 2-tuple
        The x and y location of the most prominent peak.
    """
    xind = np.argmax(np.abs(y - np.median(y)))
    return x[xind], y[xind]



def get_model_name(model, base=False):

    if isinstance(model, str):
        return model

    if base:
        if isclass(model):
            thing = model()
        else:
            thing = model

        if get_n_submodels(thing) > 1:
            thing = model[0]
    else:
        thing = model

    if isinstance(thing, Fittable1DModel):
        name = thing.__class__.name
    elif hasattr(thing, 'name'):
        name = thing.name
        if name is None:
            name = thing.__class__.name
    else:
        name = None
        log.warning("unable to determine name of %s" % thing)
    return name


def update_model(model, kwargs):
    for k, v in kwargs.items():
        if hasattr(model, k):
            val = getattr(model, k)
            if (isinstance(val, astropy_dict) or isinstance(val, dict)) \
                    and isinstance(v, dict):
                recursive_dict_update(val, v)
            elif isinstance(val, Parameter) and isinstance(v, (int, float)):
                val.value = v
                setattr(model, k, val)
            else:
                val = v
                setattr(model, k, val)


def get_amplitude_parname(model_name, base=True):
    """The standard amplitude parameter of most common models"""
    name = get_model_name(model_name, base=base)
    return 'amplitude_L' if name == 'Voigt1D' else 'amplitude'


def get_x_parname(model_name, base=True):
    """The standard x-center parameter of most common models"""
    name = get_model_name(model_name, base=base)
    if name == 'Gaussian1D':
        x_name = 'mean'
    elif name == 'Sine1D':
        x_name = 'phase'
    else:
        x_name = 'x_0'

    return x_name


def get_width_parname(model_name, base=True):
    """The standard width parameter of most common models"""
    name = get_model_name(model_name, base=base)
    names = {'Moffat1D': 'gamma',
             'Box1D': 'width',
             'Gaussian1D': 'stddev',
             'Lorentz1D': 'fwhm',
             'MexicanHat1D': 'sigma',
             'Sersic1D': 'r_eff',
             'Trapezoid1D': 'width',
             'Voigt1D': 'fwhm_G',
             'Sine1D': 'frequency'}
    return names.get(name)


def get_n_submodels(model):
    """Determine the number of submodels in a model

    Required due to significant change to astropy.modeling in
    Astropy v4.0.
    """
    if not hasattr(model, 'n_submodels'):
        return 1
    if callable(model.n_submodels):
        return model.n_submodels()
    result = model.n_submodels
    if isinstance(result, int):
        return result
    else:
        return 1



[docs]
def parse_width_arg(model_class, width_arg):
    """
    Simple convenience lookup to get width parameter name for various models.

    Parameters
    ----------
    model_class : astropy.modeling.Fittable1DModel
    width_arg : None or float or int or ((str or None), (float or int))

    Returns
    -------
    None or (int or float) or (str, (int or float))
        If None:
            Do not apply max_region
        If (int or float)
            Apply fixed max_region
        If (str, float) or (str, int)
           Apply max_region as max_region[1] multiple of model
           parameter max_region[0].

    Raises
    ------
    ValueError, TypeError
    """
    name = get_model_name(model_class)
    if name is None:
        raise TypeError("model_class is not %s" % Fittable1DModel)

    if width_arg is None:
        return
    elif isinstance(width_arg, (int, float)):
        return width_arg
    elif not hasattr(width_arg, '__len__') or len(width_arg) != 2:
        raise ValueError("invalid max_region format")

    width_param, width = width_arg
    if not isinstance(width, (int, float)):
        raise ValueError("second element of max_region is not %s" % float)
    elif isinstance(width_param, str):
        if width_param not in model_class.param_names:
            raise ValueError("%s is not a parameter of %s" %
                             (width_param, model_class))
        else:
            return width_param, width
    elif width_param is not None:
        raise ValueError("first element of max_region must be str or None")

    parname = get_width_parname(model_class)
    if parname is None:
        raise ValueError(
            'Cannot autodetect width parameter name for "%s" model' % name)

    return parname, width



def get_min_width(model_class, min_width, x=None):
    if hasattr(min_width, '__len__') and len(min_width) == 2:
        if min_width[1] is None:
            if x is None:
                raise ValueError(
                    "Require x coordinates to determine dx")
            dx = np.unique(x)
            dx = float(np.median(np.abs(dx[1:] - dx[:-1])))
            min_width = min_width[0], dx / 2

    if isinstance(min_width, (float, int)):
        min_width = None, min_width

    return parse_width_arg(model_class, min_width)



[docs]
def get_fitter(fitter_class, robust=None, outlier_func=robust_masking,
               outlier_iter=None, **kwargs):
    """
    Creates the object fitting a model to data

    Parameters
    ----------
    fitter_class : class of fitting object
        Typically a "solver" from scipy.optimize or astropy.modeling
        that when instatiated will create a solver such that:

            fitted_model = solver(model, x, y)
    robust : dict or bool, optional
        If supplied, sets the outlier rejection threshold during fitting.
        The outlier function itself may be specified via `outlier_func`.
    outlier_func : function, optional
        A function of the form data_out = outlier_func(data_in).  You
        may do anything at all to the data such as clipping, but perhaps
        the simplest option is to convert the data to a np.ma.MaskedArray
        and set the mask (True=bad) while returning the original data
        untouched.
    outlier_iter : int, optional
        The maximum number of iterations if rejecting outliers.  Defaults
        to 3.
    kwargs : dict, optional
        Additional keywords to pass into the solver during initialization.

    Returns
    -------
    instance of fitting object or function
    """
    fitter = fitter_class(**kwargs)
    if robust is None:
        return fitter

    if isinstance(robust, dict):
        opts = robust.copy()
    else:
        opts = {}

    if outlier_iter is not None:
        opts.update({'niter': outlier_iter})
    return fitting.FittingWithOutlierRemoval(fitter, outlier_func, **opts)




[docs]
def dofit(fitter, model, x, y, **kwargs):
    """
    A simple wrapper to fit model to the data

    This is a convenience function that throws out the masking
    array returned by `astropy.fitting.FittingWithOutlierRemoval`
    and just returns the actual fit.

    Parameters
    ----------
    fitter : fitting object
    model : astropy.modeling.Fittable1DModel
        The model that the solver will use to fit the data.
    x : array_like of float
        (N,) array of independent data to fit
    y : array_like of float
        (N,) array of dependent data to fit

    Returns
    -------
    astropy.modeling.Fittable1DModel
        A new model containing the fitted parameters.
    """
    try:
        result = fitter(model, x, y, **kwargs)
    except Exception as err:
        log.error("fitting failed: %s" % err)
        result = model.copy()
        result.parameters = np.nan

    if isinstance(fitter, fitting.FittingWithOutlierRemoval):
        result = result[0]
        fit_info = getattr(fitter.fitter, 'fit_info', {})
    else:
        fit_info = getattr(fitter, 'fit_info', {})
    result.solver = fitter

    result.fit_info = fit_info
    result.fit_info['x'] = x.copy()
    result.fit_info['y'] = y.copy()

    return result




[docs]
def box_convolve(model, box_class, box_width=None, box_x=None,
                 model_x=None, box_var=None, box_params=None,
                 **kwargs):
    """
    Convolve a model with a box (or another model)

    Standard astropy models are easy to convolve (multiply).  Unfortunately,
    during fitting we want the result of this convolution to be treated
    as a single model, not have the box fitted as well as the original
    model.  `box_convolve` anchors the center of the `box_class`
    to the center of the `model`.  All other box parameters are held
    constant.  As a convenience, `box_width` is supplied so that the
    user can define a fixed width to the box (which can also be defined
    using `box_params`).  Generally, all astropy models have a fixed
    amplitude of 1, so in simple cases only the width should be
    specified.  However, if using a more complex model then additional
    parameters should be specified via `box_params`.

    In addition, the user may also easily select a single parameter of
    the `box_class` to be equal to a single parameter of the `model`
    multiplied by some scaling factor.  `fitpeaks1d` uses this by
    default to convolve a Gaussian function with a step function that
    will always be set to a multiple factor of the FWHM.  This is
    incredibly useful when trying to get a good fit on a peak by only
    using data in range of the peaks significance; not the entire
    set of data which may contain artifacts that will interfere with
    the fit.  To do this set `box_width` to (parameter, factor) of
    the model.  For example ('stddev', 6) would center a box function
    with a width of 6 times the FWHM of a Gaussian over the original
    Gaussian.

    Note though, that box_width is simply a convenience argument.  If
    you know what you are doing then you can tie or fix multiple
    parameters between the two models.  Look at the code in `box_convolve`
    for an example or how one should use the 'tied', 'bounds', and
    'fixed' keywords in the astropy.modeling documentation.  These
    may be suppled to `box_convolve` via kwargs.  kwargs is applied
    to the model at the very end of the algorithm, so anything here
    will override any previous logic.

    Parameters
    ----------
    model : astropy.modeling.Fittable1DModel (class or instance)
        model on which to apply box function
    box_class : astropy.modeling.Fittable1DModel (class)
        The box or model to convolve `model` with.
    box_width : (float or int) or (2-tuple of (str, float)), optional
        If int or float, specifies the fixed width of the box.
        Using the 2-tuple form scales the width of the box to be
        equal to box_width[1] * (the parameter box_width[0]) in
        `model`.  If set to None, the default values of the box
        model or those set in 'box_params` will define a constant
        box width.
    box_x : str, optional
        Determines the `box_class` parameter to always be set equal to
        the `model` `model_x` parameter.  If omitted, an attempt will be
        made to determine what the x-centering parameter is for the box.
    model_x : str, optional
        Determines which `model` parameter that the `box_x` parameter
        of the `box_class` will always be set equal to.  If omitted, an
        attempt will be made to determine what the x-centering parameter
        is for the `model`.
    box_var : str, optional
        Determines what the "width" parameter of the box is in relation
        to the `box_width` parameter and its uses.  If omitted, an
        attempt will be made to determine the "width" parameter name
        of the box model.
    box_params : dict, optional
        Used to set initial `box_class` parameters.  All parameters set
        here will be fixed unless overriden by other keyword options.
        Please see `astropy.modeling.models` for a list of keywords
        that can be supplied.
    kwargs : dict, optional
        keyword values to be applied that will override everything else.
        keys should be the name of the output model attributes and values
        should be the values you wish to set.  Dictionaries are applied
        recursively.

    Returns
    -------
    result : instance of astropy.modeling.Fittable1DModel
        The resulting `model` * box_model
    """
    if isclass(box_class) and not issubclass(box_class, Fittable1DModel):
        raise TypeError(
            "box_class must be %s" % Fittable1DModel)  # pragma: no cover

    is_compound = get_n_submodels(model) > 1

    if box_x is None:
        box_x = get_x_parname(box_class)
    if model_x is None:
        model_x = get_x_parname(model)
        if is_compound:
            model_x += '_0'
    if box_x not in box_class.param_names:
        raise ValueError('box_x parameter name "%s" not available for %s'
                         % (box_x, get_model_name(box_class)))
    if model_x not in model.param_names:
        raise ValueError('model_x parameter name "%s" not available for %s'
                         % (model_x, get_model_name(model)))

    if box_var is None and box_width is not None:
        box_var = get_width_parname(box_class)
        if box_var not in box_class.param_names:
            raise ValueError(
                'box_var parameter name "%s" not available for %s'
                % (box_var, get_model_name(box_class)))  # pragma: no cover

    if hasattr(box_width, '__len__') and len(box_width) == 2:
        scale_par, scale = box_width
        if is_compound:
            scale_par += '_0'
        if scale_par not in model.param_names:
            raise ValueError('"%s" is not a parameter of %s' %
                             (scale_par, get_model_name(model)))
        if not isinstance(scale, (int, float)):
            raise ValueError("scaling parameter is not int or float")
    else:
        scale_par = None
        scale = box_width if isinstance(box_width, (float, int)) else None

    if box_params is None:
        box_params = {}
    if 'fixed' not in box_params:
        box_params['fixed'] = {}
    if 'tied' not in box_params:
        box_params['tied'] = {}

    # Fix every parameter in the box model, we'll change it later
    for box_par in box_class.param_names:
        if box_par not in box_params['fixed']:
            box_params['fixed'][box_par] = True

    n = get_n_submodels(model)

    if scale_par is None and scale is not None:
        box_params[box_var] = scale
        box_params['fixed'][box_var] = True
    elif scale_par is not None:
        box_params['fixed'][box_var] = False
        box_params['tied'][box_var] = lambda m: scale * getattr(
            m[:n], scale_par)

    # tie box_x to model_x
    box_params['fixed'][box_x] = False
    box_params['tied'][box_x] = lambda m: getattr(m[:n], model_x)

    # create the model
    name = '(%s)*%s' % (get_model_name(model), get_model_name(box_class))
    m = model() if isclass(model) else model
    boxed = (m * box_class(**box_params)).rename(name)

    try:
        # in case we were passed an initialized model
        tmp = boxed.parameters
        tmp[:model.parameters.size] = model.parameters.copy()
        boxed.parameters = tmp
    except AttributeError:
        pass

    # If not fitting, ensure the box is centered over model
    setattr(boxed[-1], box_x, getattr(boxed[:n], model_x))
    # And ensure scaling rules are followed (if there are any)
    if scale_par is not None:
        setattr(boxed[-1], box_var, boxed[-1].tied[box_var](boxed))

    # finally override with kwargs
    update_model(boxed, kwargs)

    return boxed




[docs]
def get_search_model(peak_model, box_class=None, box_width=None,
                     min_width=None, xrange=None, **kwargs):
    """
    Create the `initial_search` peak fitting model

    This function is used to create the peak model that will be used
    during the initial peak search.  This is a fairly simple function
    which adds an optional filtering (box) model to the peak_model
    and instantiates it with the necessary options.

    Parameters
    ----------
    peak_model : astropy.modeling.Fittable1DModel
        Class for the peak model
    box_class : astropy.modeling.Fittable1DModel, optional
        Convolve the peak model with this model, generally to reduce
        the range over which the model is fit.
    box_width : (float or int) or (2-tuple of (str, float)), optional
        Used conjunction with `box_model`.  See `box_convolve` for
        full details and usage.
    min_width : 2-tuple of (str, (int or float))
        If supplied will set the minimum bounds of the width parameter
        min_width[0] to min_width[1].  Overrides kwargs.
    xrange : 2-tuple of float, optional
        (minumum, maximum) xrange over which the model is applicable.
        Set to avoid extrapolation.
    kwargs : dict, optional
        Extra keywords to pass into either the `peak_model` or
        `box_convolve`.

    Returns
    -------
    result : instance of astropy.modeling.Fittable1DModel
        The model to be used for an initial peak-search.
    """
    if box_width is not None and box_class is not None:
        search_model = box_convolve(
            peak_model, box_class, box_width=box_width, **kwargs)
        pmodel = search_model[:][0]
    else:
        search_model = peak_model(**kwargs)
        pmodel = search_model

    if min_width is not None:
        if isinstance(min_width, (int, float)):
            min_width = get_width_parname(peak_model), min_width
            if min_width[0] is None:
                raise ValueError(
                    "could not determine width parameter for %s model" %
                    get_model_name(peak_model))

        wbounds = pmodel.bounds[min_width[0]]
        wbounds = min_width[1], wbounds[1]
        pmodel.bounds[min_width[0]] = wbounds

    model_x = get_x_parname(peak_model)
    xbounds = pmodel.bounds.get(model_x)
    if xbounds == (None, None) and xrange is not None:
        pmodel.bounds[model_x] = xrange
    return search_model




[docs]
def initial_search(fitter, model, x, y, npeaks=1,
                   xpeak_parname=None, ypeak_parname=None,
                   baseline_func=medabs_baseline,
                   guess_func=guess_xy_mad, guess=None, fitopts=None):
    """
    Perform an initial search for peaks in the data

    The procedure is:

    1. Take the original (x, y) data and optionally modify it
       to some form where it is easier to fit a more accurate peak.  In
       the default case, the median is subtracted from y before taking
       the absolute value.  At this stage we do not typically fit the
       background level so instead try to center most of the data
       around zero and then take the absolute value.  This is sensible
       for stable baselines with positive and negative peaks, but
       may not be so for other flavours of data.  Different baseline
       removal and data modification functions may be specified with
       the `baseline_func` argument.

    2. Start a loop of `npeaks` iterations.  On each interation:

        a. Identify the most prominent peak in the modified y data.
           This is done via the `guess_func` function.  In the default
           case this simply finds the (x, y) position of the maximum
           `y` value.  Optionally, the user may parse in specified
           `x` values using the `guess` argument.

        b. Fit the data to the search model using the x and y guesses
           obtained in the previous step.

        c. Store the parameters obtained from the fit, then subtract
           a fit of the peak from the data and begin the next iteration.
           If successful, the most prominent peak was removed and we
           can move on to fitting the next most prominent peak.

    3. After `npeaks` iterations take the `x` values of the fit and
       use interpolation to get an estimate of `y` on the original
       unmodified data set.  This y estimate is added to the
       other stored peak parameters which are then used as a more
       accurate starting point for a refined search later.

    Parameters
    ----------
    fitter : fitting object
        Typically a "solver" from scipy.optimize or astropy.modeling
        that performs the function::

            fitted_model = solver(model, x, y)

    model : astropy.modeling.Fittable1DModel
        The model to fit.  Typically from `get_search_model`.
    x : array_like of float
        (N,) array of independent data to fit
    y : array_like of float
        (N,) array of dependent data to fit
    xpeak_parname : str, optional
        Name of the parameter in the peak model governing the location
        of the peak along the x-axis.  If None, will attempt to autodetect.
    ypeak_parname : str, optional
        Name of the parameter in the peak model governing the location
        of the peak along the y-axis.  If None, will attempt to autodetect.
    npeaks : int, optional
        The number of peaks to find
    guess_func : function
        A function of the form::

            x_guess, y_guess = guess_func(x, y_modified)

        Here x_guess and y_guess are the x (position) and y (amplitude)
        estimates of the most prominent peak in the data.  Here we use
        the output from `baseline_func` as the dependent values which
        allows the user to do things such as smoothing, filtering, or
        whatever other ideas they may have.  The default function,
        `guess_xy_mad` simply finds the x and y coordinate of the
        maximum value in y_modified.
    guess : array_like of float, optional
        An array where each element gives a guess at an initial
        `x` position for the peak.  If there are less guesses than
        `npeaks`, `guess_func` will be used after the inital guesses
        have been taken.  If there are more guesses than `npeaks`, then
        only guess[:npeaks] will be used.
    baseline_func : function, optional
        A function of the form::

            y_modified, baseline = baseline_func(x, y)

        Here y_modified may be the original `y` data, or the `y` data
        modified such that the combination of `model` and `solver` is
        able to identify peaks.  In the default case, the `medabs_baseline`
        function is used which returns y_modified = abs(y - median(y))
        and baseline = y - median(y).
    fitopts : dict, optional
        Optional arguments to pass into the solver at runtime.

    Returns
    -------
    numpy.ndarray of numpy.float64
        (npeaks, n_parameters)  array of the initial guesses for the
        model parameters of each peak.  Note that this contains the
        `peak` only portion of parameters and not the filtering
        parameters of the optional `box_model` that may have been
        appended to the peak model at `get_search_model`.
    """
    if fitopts is None:
        fitopts = {}
    pmodel = model[:][0] if hasattr(model, 'submodel_names') else model
    pnames = np.array(pmodel.param_names)
    npar = pnames.size

    if isinstance(guess, (int, float)):
        guess = np.array([guess])

    if xpeak_parname is None:
        xpeak_parname = get_x_parname(pmodel)
    if xpeak_parname not in pmodel.param_names:
        raise ValueError("xpeak_parname %s not in %s" %
                         (xpeak_parname, get_model_name(pmodel)))
    if ypeak_parname is None:
        ypeak_parname = get_amplitude_parname(pmodel)
    if ypeak_parname not in pmodel.param_names:
        raise ValueError("ypeak_parname %s not in %s" %
                         (ypeak_parname, get_model_name(pmodel)))

    xpar_ind = np.where(pnames == xpeak_parname)[0][0]
    ypar_ind = np.where(pnames == ypeak_parname)[0][0]
    params = np.empty((npeaks, npar))
    ysearch, baseline = baseline_func(x, y)

    for i in range(npeaks):
        if guess is not None and i < len(guess):
            guess_ind = int(
                np.round(np.clip(tabinv(x, guess[i]), 0, x.size - 1)))
            yguess = ysearch[guess_ind]
            xguess = x[guess_ind]
        else:
            xguess, yguess = guess_func(x, ysearch)

        tmp = pmodel.parameters
        tmp[xpar_ind] = xguess
        tmp[ypar_ind] = yguess
        pmodel.parameters = tmp

        fit = dofit(fitter, model, x, ysearch, **fitopts)

        params[i] = fit.parameters[:npar].copy()
        pmodel.parameters = fit.parameters[:npar]
        ysearch -= pmodel(x)

    # refit amplitudes as real data relative to baseline
    ycen = np.interp(params[:, xpar_ind], x, y)
    bcen = np.interp(params[:, xpar_ind], x, baseline)
    params[:, ypar_ind] = ycen - bcen

    return params




[docs]
def get_background_fit(fitter, peak_model, background_class, x, y,
                       pinit, fitopts=None, bg_args=None, **bg_kwargs):
    """
    Return a background model with initialized parameters

    Subtract peaks from `y` then fit background on residual

    Parameters
    ----------
    fitter : object
    peak_model : astropy.modeling.Fittable1DModel instance
    background_class : astropy.modeling.Fittable1DModel
    x : array_like of float
    y : array_like of float
    pinit : numpy.ndarray
        (npeaks, n_peak_parameters) array giving the peak parameters
        for `peak_model`.
    fitopts : dict, optional
        Optional keywordds to pass into the `solver`.
    bg_args : tuple, optional
        If the background class requires positional arguments, they should
        be supplied here.
    bg_kwargs : dict, optional
        Optional keywords to pass into the background_class to initialize
        the background model.

    Returns
    -------
    numpy.ndarray
        (n_background_parameters,) array of background parameters based
        on the fit of (x, y - peaks).
    """
    y = y.copy()

    if not isclass(background_class):
        bmodel = background_class.copy()
        update_model(bmodel, bg_kwargs)
    else:
        if bg_args is None:
            bmodel = background_class(**bg_kwargs)
        else:
            if hasattr(bg_args, '__len__'):
                bmodel = background_class(*bg_args, **bg_kwargs)
            else:
                bmodel = background_class(bg_args, **bg_kwargs)

    init_params = peak_model.parameters.copy()
    if fitopts is None:
        fitopts = {}
    for params in pinit:
        peak_model.parameters = params
        y -= peak_model(x)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', AstropyWarning)
        bfit = dofit(fitter, bmodel, x, y, **fitopts)
    peak_model.parameters = init_params
    return bfit.parameters




[docs]
def get_final_model(peak_class, pinit, background_class=None,
                    bg_args=None, binit=None,
                    min_width=None, xbounds=None, **kwargs):
    """
    Refine the initial fit and return a set of models

    Parameters
    ----------
    peak_class : astropy.modeling.Fittable1DModel instance
    pinit : array_like of float
    background_class : astropy.modeling.Fittable1DModel, optional
    binit : array_like of float, optional
        (n_background_parameters,) array of initial background parameter
        estimates.
    min_width : 2-tuple of (str, float), optional
    xbounds : 2-tuple of float, optional
    kwargs : dict, optional

    Returns
    -------
    list of astropy.modeling.Fittable1DModel
    """
    # This supports old serial implementation.  May be useful one day
    # pinit = np.array(pinit)
    # npeaks = pinit.shape[0]
    # min_width = get_min_width(peak_class, min_width)
    #
    # model_class, build = None, kwargs.copy()
    # xpar_name = get_x_parname(peak_class)
    #
    # compound = npeaks > 1 or background_class is not None
    #
    # build['bounds'] = build.get('bounds', {})
    #
    # if compound:
    #     for i, params in enumerate(pinit):
    #         if model_class is None:
    #             model_class = peak_class
    #         else:
    #             model_class += peak_class
    #         for j, parname in enumerate(peak_class.param_names):
    #             build['%s_%i' % (parname, i)] = params[j]
    #         if min_width is not None:
    #             key = '%s_%i' % (min_width[0], i)
    #             wlim = build['bounds'].get(key, (None, None))
    #             build['bounds'][key] = min_width[1], wlim[1]
    #         if xbounds is not None:
    #             build['bounds']['%s_%i' % (xpar_name, i)] = xbounds
    # else:
    #     model_class = peak_class
    #     for j, parname in enumerate(peak_class.param_names):
    #         build['%s' % parname] = pinit[0, j]
    #     if min_width is not None:
    #         wlim = build['bounds'].get(min_width[0], (None, None))
    #         build['bounds'][min_width[0]] = min_width[1], wlim[1]
    #     if xbounds is not None:
    #         build['bounds'][xpar_name] = xbounds
    #
    # if background_class is not None:
    #     model_class += background_class
    #     if binit is not None:
    #         binit = np.array(binit)
    #         for parname, parval in zip(background_class.param_names, binit):
    #             build['%s_%i' % (parname, npeaks)] = parval
    #
    # if compound:
    #     name = '%i_peak%s' % (npeaks, '' if npeaks == 1 else 's')
    #     if background_class is not None:
    #         name += '_with_background'
    #     model_class = model_class.rename(name)
    # model = model_class(**build)
    # if not compound:
    #     model.name = '1_peak'
    #
    # return model

    pinit = np.array(pinit)
    npeaks = pinit.shape[0]
    min_width = get_min_width(peak_class, min_width)

    build = {}
    xpar_name = get_x_parname(peak_class)

    compound = npeaks > 1  # or background_class is not None

    build['bounds'] = build.get('bounds', {})

    model = None
    if compound:
        for i, params in enumerate(pinit):
            if model is None:
                model = peak_class()
            else:
                model += peak_class()
            for j, parname in enumerate(peak_class.param_names):
                build['%s_%i' % (parname, i)] = params[j]
            if min_width is not None:
                key = '%s_%i' % (min_width[0], i)
                wlim = build['bounds'].get(key, (None, None))
                build['bounds'][key] = min_width[1], wlim[1]
            if xbounds is not None:
                build['bounds']['%s_%i' % (xpar_name, i)] = xbounds
    else:
        model = peak_class()
        for j, parname in enumerate(peak_class.param_names):
            build['%s' % parname] = pinit[0, j]
        if min_width is not None:
            wlim = build['bounds'].get(min_width[0], (None, None))
            build['bounds'][min_width[0]] = min_width[1], wlim[1]
        if xbounds is not None:
            build['bounds'][xpar_name] = xbounds

    update_model(model, build)
    name = '%i_peak%s' % (npeaks, '' if npeaks == 1 else 's')

    if background_class is not None:
        if isclass(background_class):
            if bg_args is None:
                background = background_class()
            elif not hasattr(bg_args, '__len__'):
                background = background_class(bg_args)
            else:
                background = background_class(*bg_args)
        else:
            background = background_class.copy()

        if binit is not None:
            background.parameters = binit

        # if binit is None:
        #     background = background_class()
        # else:
        #     binit = np.array(binit)
        #     bg_kwargs = {}
        #     for parname, parval in zip(background_class.param_names, binit):
        #         bg_kwargs[parname] = parval
        #     background = background_class(**bg_kwargs)
        model += background
        name += '_with_background'

    model = model.rename(name)
    if len(kwargs) > 0:
        update_model(model, kwargs)

    return model




[docs]
def fitpeaks1d(x, y, npeaks=1, xrange=None, min_width=(None, None),
               peak_class=models.Gaussian1D,
               box_class=models.Box1D, box_width=(None, 6),
               background_class=models.Const1D, binit=None,
               bg_args=None, bg_kwargs=None,
               fitting_class=fitting.LevMarLSQFitter, maxiter=None,
               outlier_func=robust_masking, robust=None, outlier_iter=3,
               baseline_func=medabs_baseline,
               guess=None, guess_func=guess_xy_mad, optional_func=None,
               fitter_kwargs=None, fitopts=None, search_kwargs=None,
               **kwargs):
    """
    Fit peaks (and optionally background) to a 1D set of data.

    The user may fit for 1 or multiple peaks in the input data using
    using an array of different peak models and also fit for a model
    of the background.  There are a large number of standard models
    available for both, or the user may define their own models if
    necessary.  By default, we fit for Gaussian peaks and a
    constant background.

    The intention of `fitpeaks1d` is to provide a highly customizable
    peak fitting utility for one dimensional data.  The basic procedure
    is split into two phases:  the search phase and the refinement phase.

    During the search phase an attempt is made to get a good first estimate
    of where the most prominent peaks lie in the data along with
    estimates of their properties.  Initial positions can be
    supplied by the user via `guess`, but may also be derived via
    the `guess_func` algorithm which is also definable by the user.
    By default, candidate positions are determined as
    max(abs(y - median(y))).  The basic procedure is as follows:

    1. Remove a baseline from the y data via the `baseline_func`
       function.  At this early stage, we are only really interested
       in peak characteristics or even just where they kind-of are.
       The default baseline removal function is to subtract the
       median although it is completely reasonable to use many other
       methods depending on your data.  This baseline_func may also
       transform the initial data set to something a little more
       suitable for finding peaks.  Users may wish to smooth or
       filter the data, or perform some other kind of transform.
       By default the data is set to y = abs(y - median(y)) as
       we do not know if we're searching for positive or negative
       peaks.

    2. Guess where peaks may lie along the x-axis.  These estimates
       can be provided directly by the user, or derived via the
       `guess_func` algorithm.  This algorithm should take the
       (x, y) data set and return a single (x0, y0) coordinate
       giving the x position and amplitude of the most prominent
       peak candidate in the data.  In the default case this is
       simply y0 = max(y) and x0 = where(y0).

    3. Fit a peak using (x0, y0) as an initial condition.  Store
       the peak properties and then subtract a model of the peak
       from the data and go back to step 2.  Only break this loop
       once the cycle has completed `npeaks` times.  This will
       hopefully yield the `npeaks` most prominent peaks in the
       data.  After fitting we should have a more exact (x1, y1)
       position for each peak.

    4. Take the x1 position of each peak derived in step 3 and
       interpolate it back onto the original data set before we
       changed it in step 1.  Subtract the derived baseline from
       this value and we should then have a good estimate of the
       (x, y) position for each peak in relation to the real
       dataset.

    5. If the user wishes to fit a background to the data, we
       get an estimate of the background fit parameters by
       subtracting models of each peak from the original dataset
       and then fitting the desired background model to the
       residual.

    During the refinement phase, a final model is created
    from all initial peak fits and an
    optional background.  If a background is requested, an initial
    estimate of its parameters is derived by fitting the model
    on y - fit(peak_estimates).

    Parameters
    ----------
    x: array_like of float
        (N,) array of independent values.
    y : array_like of float
        (N,) array of independent values.
    npeaks : int, optional
        The number of peaks to find.  Set to None to find a single
        (most prominent) peak.  Peaks are returned in the order
        most prominent -> least prominent.
    xrange : 2-tuple of (int or float), optional
        The (minimum, maximum) range of values in x where a fitted
        peak will be valid.  Set to None to use the full range of
        valid (finite) data.
    peak_class : astropy.modeling.Fittable1DModel, optional
        Class of peak model.  The default is
        `astropy.modeling.models.Gaussian1D`.
    box_width : float or int or (2-tuple of (str, float)), optional
        If int or float, specifies the width of `box_model`. If specifed
        as a 2-tuple of (str, float), the first specifies a
        parameter of the model that will be taken and multiplied by
        the second element of the 2-tuple (float) to define the width
        of the box.  For example ('stddev', 3) for the Gaussian1D model
        would limit the range of the model to 3 * FWHM.  This is highly
        recommended when fitting continuous or wide functions.  Note
        that this will not be applied when the refined model is
        fitted in parallel, since that kind of negates the whole
        point of fitting in parallel in the first place.
    min_width : 2-tuple of (str or None, float or None), optional
        Set a lower limit on the width of each peak.  Set to None for
        no lower limit.  min_width[0] is the name of the parameter in
        the peak model governing width (str).  Set this to None to
        autodetect the parameter name if using a standard model.
        min_width[1] is the value to set.  If set to None, half of the
        median separation of sorted unique `x` samples will be used.
    box_class :  astropy.modeling.Fittable1DModel, optional
        Class for filtering the peak model.  Will be applied as
        peak_model * box_model.  This is only applied during the initial
        search stage.  The width of the box is defined by `max_region`.
        The default is astropy.modeling.models.Box1D.
    background_class : astropy.modeling.Fittable1DModel, optional
        Class for describing the background.  Will be applied as
        peak_model + background in the final refined fitting.  Note that
        it will only be fitted to the area of interest around each peak
        (`max_region`).  Disable background fitting by supplying `None`.
        The default is to fit a constant background using
        astropy.modeling.models.Const1D.  NOTE: you can supply a model
        instance rather than class in order to fit polynomial backgrounds
        as the number of polynomial parameters are dependent on the
        polynomial order.  For example, in order to fit a 3rd order
        polynomial, set background_class=Polynomial1D(3).
    binit : array_like of float, optional
        A user guess at background parameters
    bg_args : tuple, optional
        If the background class requires arguments, they should be supplied
        here as a tuple.
    bg_kwargs : dict, optional
        Optional keyword arguments to pass into the background model
        during initialization.
    fitting_class : class of fitting object
        Typically a "solver" from scipy.optimize or astropy.modeling
        that when instatiated will create a solver such that:

            fitted_model = solver(model, x, y)

    maxiter : int, optional
        Maximum number of iterations for the solver to attempt before
        quitting.  Set to None to use the default for each solver class.
    outlier_func : function, optional
        A function of the form data_out = outlier_func(data_in).  You
        may do anything at all to the data such as clipping, but perhaps
        the simplest option is to convert the data to a np.ma.MaskedArray
        and set the mask (True=bad) while returning the original data
        untouched.  The default is to use `robust_masking` which is a
        basic wrapper for `mc.find_outliers`.
    robust : dict or anything, optional
        Supplying anything other than `None` will turn on outlier rejection
        for the solver.  If `robust` is a dict, then it should contain
        optional keyword arguments that would be supplied to `outlier_func'.
        For example, the default `robust_masking` outlier rejection function
        takes the `threshold` keyword.  To turn on outlier rejection you
        could just set robust=True, but if you wanted to change threshold to
        10, then set robust={'threshold': 10}.  Setting robust=None turns
        off outlier rejection (default).
    outlier_iter : int, optional
        The maximum number of iterations for the solver when rejecting
        outlers.  Defaults to 3.
    guess : array_like of float, optional
        An array where each element gives a guess at an initial
        `x` position for the peak.  If there are less guesses than
        `npeaks`, `guess_func` will be used after the inital guesses
        have been taken.  If there are more guesses than `npeaks`, then
        only guess[:npeaks] will be used.
    baseline_func : function, optional
        A function of the form used during the initial search phase:

            ysearch, baseline = baseline_func(x, y)

        This function should remove a baseline from the data so that
        we mitigate effects from the background as much as possible
        while keeping the structure of the peaks.  In the default
        case, the baseline is taken as the median in y.  This baseline
        should be subtracted from y and then the user may transform
        y as they see fit to allow `guess_func` and the `solver` to
        get the most reliable guess possible at each peaks model
        parameters.  The default `baseline_func` returns ysearch as
        ysearch = abs(y - median(y)).  Note that the most important
        parameter to guess at during this stage is the x center
        position of the peak.  Amplitude is not important, but we
        do want to get fairly accurate representations of the
        rest of the fit parameters such as width or FWHM.
    guess_func : function
        A function of the form:

            (x0, y0) = guess_func(x, y_modified)

        (x0, y0) is the peak (x_center, y_amplitude) of
        the most prominent peak in the data after `baseline_func` has been
        applied.  These results are used to initialize peak parameters
        during the search phase.  The default function returns the
        (x, y) coordinate of the datum that satisfies
        y0 = max(abs(y - median(y))).
    optional_func : function, optional
        An optional function applied to the data prior to the final
        fit.  If fitting a background, then no attempt should be made to
        remove the background unless it is being accounted for in
        some way.  If you are not fitting a background then feel free
        to go to town on removing said background.  Preserve whatever
        properties of the peak you wish to have appear in the output
        results.  Good choices may involve filtering the data.  The
        default is to do nothing and fit the final model on the original
        data.
    fitter_kwargs : dict, optional
        Additional keywords to pass into the solver during initialization.
    fitopts : dict, optional
        Optional keywords to pass into solver at runtime.
    search_kwargs : dict, optional
        Additional keyword arguments to supply during model instantiation
        during the initial search stage (peak * optional(box)).
        Note that the model parameter names are suffixed with "_0" and
        box parameter names are suffixed with "_1", but only if a box
        is being applied.  So for example, to
        say that you want parameter "p" of the model to remain fixed:

            kwargs = {"fixed": {"p_0": True}}

        As another example, to set parameter "a" of the box equal to
        parameter "b" of the model:

            kwargs = {"tied": {"b_1": lamba x: getattr(x, "a_0")}}

        There are lots of other things you could do here, and I've tried
        to include as many hooks as possible.  You don't even need to
        treat this thing as a box function.  It could be any kind of
        filtering function etc. and you could use kwargs to supply it
        with the necessary parameters or behaviours as you wish.
    kwargs : dict, optional
        Additional keyword arguments to supply during model instantation
        during the final model refinement stage.  Exactly the same type
        of thing as `search_kwargs`.  However, the following suffix
        rules apply for parallel fitting (default):

            1. If fitting only a single peak and no background, do not
               suffix any parameters.  "amplitude" remains "amplitude".
            2. If fitting multiple peaks, begin suffixing at _0 for the
               first peak upto _(npeaks-1) for the last.  i.e. the
               amplitude of the 5th peak should be referred to as
               "amplitude_4".
            3. If fitting a single peak with a background then peak
               parameters should be suffixed with "_0" and background
               parameters with "_1".
            4. If fitting multiple peaks with a single background then
               peak parameters should be suffixed according to step 2,
               while background parameters should be suffixed with
               _(npeaks).  i.e. if there were 5 peaks, the background
               "intercept" for a linear background fit should be
               referred to as "intercept_5".

        and for serial fitting:

            1. If fitting a single peak with no background, do no
               suffix any parameters.  "amplitude" remains "amplitude".
            2. If fitting one or more peaks with a background then
               there is no distinction available between peaks.  Each
               peak parameter rule must be applied to all peaks.  For
               example, setting {'bounds': {'amplitude_0': (0, 5)} will
               limit the amplitude of all peaks to within the range
               0->5.  The background parameters are suffixed with "_1".

    Returns
    -------
    model : astropy.modeling.Fittable1DModel instance
        The final fitted model.  If more than one peak or a background was
        fitted for, then the returned model will be a compound model.
        All peaks are located in model[:npeaks].  If a background was
        included it can be found at model[npeaks].  If a single peak and
        no background was fitted, then an instance of the peak_class will
        be returned containing the fit parameters.  No indexing will be
        possible.

    Examples
    --------
    Create compound peaks on a sloped background
    >>> import numpy as np
    >>> from astropy.modeling import models
    >>> from sofia_redux.toolkit.fitting.fitpeaks1d import fitpeaks1d
    >>> x = np.linspace(0, 10, 1001)
    >>> model1 = models.Gaussian1D(amplitude=3, mean=5, stddev=0.5)
    >>> model2 = models.Gaussian1D(amplitude=4, mean=4, stddev=0.2)
    >>> y = model1(x) + model2(x) + 0.05 * x + 100
    >>> rand = np.random.RandomState(42)
    >>> y += rand.normal(0, 0.01, y.size)
    >>> fit = fitpeaks1d(x, y, background_class=models.Linear1D,
    ...                  npeaks=2, min_width=0.1)
    >>> rmse = np.sqrt(np.mean(fit.fit_info['fvec'] ** 2))
    >>> assert np.allclose(fit[0].parameters, [3, 5, 0.5], atol=0.02)  # peak 1
    >>> assert np.allclose(fit[1].parameters, [4, 4, 0.2], atol=0.02)  # peak 2
    >>> assert np.allclose(fit[2].parameters, [0.1, 100], atol=0.2)  # bg
    >>> assert np.isclose(rmse, 0.01, rtol=1)  # residuals
    """
    if not isclass(peak_class) or not issubclass(
            peak_class, Fittable1DModel):
        raise TypeError("peak model is not %s" % Fittable1DModel)
    x, y = np.array(x), np.array(y)
    shape = x.shape
    if y.shape != shape:
        raise ValueError("x and y array shape mismatch")

    if fitter_kwargs is None:
        fitter_kwargs = {}
    if search_kwargs is None:
        search_kwargs = {}
    if fitopts is None:
        fitopts = {}
        if fitting_class in [fitting.LevMarLSQFitter,
                             fitting.SLSQPLSQFitter,
                             fitting.SimplexLSQFitter]:
            fitopts['maxiter'] = 1000
    if bg_kwargs is None:
        bg_kwargs = {}
    if maxiter is not None and 'maxiter':
        fitopts['maxiter'] = maxiter

    mask = np.isfinite(x) & np.isfinite(y)
    if not mask.any():
        raise ValueError("no finite data")
    x, y = x[mask], y[mask]
    if np.unique(x).size < 5:
        raise ValueError("not enough valid unique points")
    if xrange is None:
        xrange = x.min(), x.max()
    elif not hasattr(xrange, '__len__') or len(xrange) != 2:
        raise ValueError("xrange must be of the form (xmin, xmax)")

    box_width = parse_width_arg(peak_class, box_width)
    min_width = get_min_width(peak_class, min_width, x)

    fitter = get_fitter(fitting_class, robust=robust,
                        outlier_func=outlier_func,
                        outlier_iter=outlier_iter,
                        **fitter_kwargs)

    search_model = get_search_model(peak_class,
                                    box_class=box_class,
                                    box_width=box_width,
                                    min_width=min_width,
                                    xrange=xrange,
                                    **search_kwargs)

    pinit = initial_search(fitter, search_model, x, y, npeaks=npeaks,
                           guess=guess, guess_func=guess_func,
                           baseline_func=baseline_func,
                           fitopts=fitopts)

    y = y.copy() if optional_func is None else optional_func(x, y)
    if background_class is not None and binit is None:
        if get_n_submodels(search_model) > 1:
            search_peak = search_model[0]
        else:
            search_peak = search_model

        binit = get_background_fit(fitter, search_peak, background_class,
                                   x, y, pinit, bg_args=bg_args, **bg_kwargs)

    model = get_final_model(peak_class, pinit,
                            background_class=background_class,
                            bg_args=bg_args, binit=binit,
                            min_width=min_width, xbounds=xrange, **kwargs)

    fit = dofit(fitter, model, x, y, **fitopts)

    return fit
Navigation

Source code for sofia_redux.toolkit.fitting.fitpeaks1d