Source code for grig.resample_kernel

# Licensed under a 3-clause BSD style license - see LICENSE.rst

import numba
import numpy as np


from grig.resample_utils import (
    scale_coordinates)
from grig.resample_kernel_utils import (
    solve_kernel_fits)
from grig.resample_base import (
    ResampleBase, _global_resampling_values)
from grig.tree.kernel_tree import KernelTree
from grig.toolkit.multiproc import unpickle_file


__all__ = ['ResampleKernel']



[docs]
class ResampleKernel(ResampleBase):

    def __init__(self, coordinates, data, kernel,
                 error=None, mask=None,
                 robust=None, negthresh=None,
                 kernel_spacing=None,
                 kernel_offsets=None,
                 kernel_weights=None,
                 limits=None,
                 degrees=3,
                 smoothing=0.0,
                 knots=None,
                 knot_estimate=None,
                 eps=1e-8,
                 fix_knots=None,
                 tolerance=1e-3,
                 max_iteration=20,
                 exact=False,
                 reduce_degrees=False,
                 imperfect=False,
                 leaf_size=40,
                 large_data=False,
                 **distance_kwargs):
        """
        Class to resample data using kernel convolution.

        The kernel resampler may take regular or irregular spaced data, an
        irregular or regular kernel, and use kernel convolution to resample
        onto arbitrary coordinates where each output value is the convolution
        of the original sample data and the given kernel.  Here, irregular
        refers values that have specified coordinates.  i.e., they do not
        necessarily fall onto a regular grid.

        Generally, this class was designed to process irregular data when
        convolution is required.  If both the kernel and samples exist on
        regularly spaced grids, there are many alternatives that could be used
        to perform such a convolution much more quickly such as
        :func:`scipy.ndimage.convolve`.

        Convolution is achieved by creating a spline representation of the
        kernel which is then used to interpolate values of the kernel at the
        required locations.  Note that while the default settings assume that
        the kernel is perfect, noisy kernels may also be supplied and smoothed
        if required.  Please see the :class:`Spline` class for further details.

        Parameters
        ----------
        coordinates : array_like of float
            (n_features, n_samples) array of independent values.  A local
            internal copy will be created if it is not a numpy.float64
            type.
        data : array_like of float
            (n_sets, n_samples) or (n_samples,) array of dependent values.
            multiple (n_sets) sets of data are supplied, then n_sets solutions
            will be calculated at each resampling point.
        kernel : array_like of float
            An n_features-dimensional array containing a regularly spaced
            resampling kernel or an irregular 1-D kernel array.  If the kernel
            is irregular, `kernel_offsets` must be provided.
        error : array_like of float, optional
            (n_sets, n_samples) or (n_samples,) array of error (1-sigma) values
            associated with the `data` array.  `error` will be used to
            weight fits, and be propagated to the output error values.  If not
            supplied, the error may still be calculated from residuals to the
            fit during :func:`ResamplePolynomial.__call__`.
        mask : array_like of bool, optional
            (n_sets, n_data) or (n_data,) array of bool where `True`
            indicates a valid data point that can be included the fitting
            and `False` indicates data points that should be excluded from
            the fit.  Masked points will be reflected in the output counts
            array.  If not supplied, all samples are considered valid.
        robust : float, optional
            Specifies an outlier rejection threshold for `data`. A data point
            is identified as an outlier if abs(x_i - x_med)/MAD > robust, where
            x_med is the median, and MAD is the Median Absolute Deviation
            defined as 1.482 * median(abs(x_i - x_med)).
        negthresh : float, optional
            Specifies a negative value rejection threshold such that
            data < (-stddev(data) * negthresh) will be excluded from the fit.
        kernel_spacing : float or numpy.ndarray (float), optional
            The spacing between kernel elements for all or each feature.  If
            an array is supplied, should be of shape (n_features,).  This
            feature may only be used for regular grids with n_features
            dimensions.  Either `kernel_spacing` or `kernel_offsets` must be
            supplied.
        kernel_offsets : tuple or array_like, optional
            If the kernel is regular, should be an n-dimensional tuple
            containing the grid indices in each dimension.  Otherwise, should
            be an array of shape (n_dimensions, kernel.size).  Either
            `kernel_spacing` or `kernel_offsets` must be supplied.
        kernel_weights : numpy.ndarray, optional
            Optional weights to supply to the spline fit for each data point.
            Should be the same shape as the supplied kernel.
        limits : numpy.ndarray (float), optional
            An array of shape (n_dimensions, 2) that may be supplied to set the
            minimum and maximum coordinate values used during the spline fit.
            For example, limits[1, 0] sets the minimum knot value in the second
            dimensions and limits[1, 1] sets the maximum knot value in the
            second dimension.  By default this is set to the minimum and
            maximum values of the coordinates in each dimension.
        degrees : int or numpy.ndarray (int), optional
            The degree of spline to fit in each dimension.  Either a scalar can
            be supplied pertaining to all dimensions, or an array of shape
            (n_dimensions,) can be used.
        smoothing : float, optional
            Used to specify the smoothing factor.  If set to `None`, the
            smoothing will be determined based on user settings or input data.
            If `exact` is `True`, smoothing will be disabled (zero).  If
            `exact`is `False`, smoothing will be set to n - sqrt(2 * n) where
            n is the number of data values.  If supplied, smoothing must be
            greater than zero.  See above for further details.  Note that if
            smoothing is zero, and the degrees are not equal over each
            dimension, smoothing will be set to `eps` due to numerical
            instabilities.
        knots : list or tuple or numpy.ndarray, optional
            A set of starting knot coordinates for each dimension.  If a list
            or tuple is supplied it should be of length n_dimensions where
            element i is an array of shape (n_knots[i]) for dimension i.  If
            an array is supplied, it should be of shape
            (n_dimension, max(n_knots)). Note that there must be at least
            2 * (degree + 1) knots for each dimension.  Unused or invalid
            knots may be set to NaN, at the end of each array.  Knot
            coordinates must also be monotonically increasing in each
            dimension.
        knot_estimate : numpy.ndarray (int), optional
            The maximum number of knots required for the spline fit in each
            dimension and of shape (n_dimensions,).  If not supplied, the knot
            estimate will be set to
            int((n / n_dimensions) ** n_dimensions^(-1)) or n_knots if
            knots were supplied and fixed.
        eps : float, optional
            A value where 0 < eps < 1.  This defines the magnitude used to
            identify singular values in the spline observation matrix (A).  If
            any row of A[:, 0] < (eps * max(A[:,0])) it will be considered
            singular.
        fix_knots : bool, optional
            If `True`, do not attempt to modify or add knots to the spline fit.
            Only the initial supplied user knots will be used.
        tolerance : float, optional
            A value in the range 0 < tolerance < 1 used to determine the exit
            criteria for the spline fit.  See above for further details.
        max_iteration : int, optional
            The maximum number of iterations to perform when solving for the
            spline fit.
        exact : bool, optional
            If `True`, the initial knots used will coincide with the actual
            input coordinates and smoothing will be set to zero.  No knots
            should be supplied by the user in this instance.
        reduce_degrees : bool, optional
            Only relevant if `exact` is `True`.  If set to `True`, the maximum
            allowable degree in each dimension will be limited to
            (len(unique(x)) // 2) - 1 where x are the coordinate values in any
            dimension.
        leaf_size : int, optional
            Number of points at which to switch to brute-force during the
            ball tree query algorithm.  See `sklearn.neighbours.BallTree`
            for further details.
        large_data : bool, optional
            If `True`, indicates that this resampling algorithm will run on
            a large set of data, and the ball tree should be created on
            subsets of the data.
        distance_kwargs : dict, optional
            Optional keyword arguments passed into
            :func:`sklearn.neighbors.DistanceMetric`.  The default is to use
            the "minkowski" definition with `p=2`, i.e., the Euclidean
            definition.  This is important in determining which samples lie
            inside the window region of a resampling point, and when deriving
            distance weighting factors.

        Raises
        ------
        ValueError : Invalid inputs to __init__ or __call__
        """
        self.kernel_args = {
            'kernel': kernel,
            'kernel_spacing': kernel_spacing,
            'kernel_offsets': kernel_offsets,
            'weights': kernel_weights,
            'limits': limits,
            'degrees': degrees,
            'smoothing': smoothing,
            'knots': knots,
            'knot_estimate': knot_estimate,
            'eps': eps,
            'fix_knots': fix_knots,
            'tolerance': tolerance,
            'max_iteration': max_iteration,
            'exact': exact,
            'reduce_degrees': reduce_degrees,
            'imperfect': imperfect,
        }
        self._distance_kwargs = distance_kwargs
        super().__init__(coordinates, data, error=error, mask=mask,
                         robust=robust, negthresh=negthresh,
                         leaf_size=leaf_size, large_data=large_data,
                         **distance_kwargs)

    @property
    def kernel(self):
        """
        Return the resampling kernel.

        Returns
        -------
        numpy.ndarray (float)
        """
        return self.sample_tree.kernel

    @property
    def kernel_spacing(self):
        """
        Return the spacing between kernel grid points.

        Returns
        -------
        numpy.ndarray (float)
        """
        spacing = self.sample_tree.kernel_spacing
        if spacing is None:
            return None
        return scale_coordinates(spacing, self.window, self.window * 0,
                                 reverse=True)

    @property
    def kernel_offsets(self):
        """
        Return the coordinate offsets for the kernel.

        Returns
        -------
        numpy.ndarray (float)
        """
        offsets = self.sample_tree.kernel_coordinates
        return scale_coordinates(offsets, self.window, self.window * 0,
                                 reverse=True)

    @property
    def degrees(self):
        """
        Return the degrees of the spline fit to the kernel.

        Returns
        -------
        numpy.ndarray (int)
        """
        return self.sample_tree.degrees

    @property
    def exit_code(self):
        """
        Return the exit code of the spline fit.

        Please see the :class:`Spline` class for further details on
        the meanings of each code.

        Returns
        -------
        int
        """
        return self.sample_tree.exit_code

    @property
    def exit_message(self):
        """
        Return the spline exit message.

        Returns
        -------
        str
        """
        return self.sample_tree.exit_message

    @property
    def spline(self):
        """
        Return the spline object for the kernel fit.

        Returns
        -------
        Spline
        """
        return self.sample_tree.spline


[docs]
    def set_sample_tree(self, coordinates,
                        leaf_size=40,
                        large_data=False,
                        **distance_kwargs):
        """
        Build the sample tree from input coordinates.

        Parameters
        ----------
        coordinates : numpy.ndarray (float)
            The input coordinates of shape (n_features, n_samples).
        leaf_size : int, optional
            Number of points at which to switch to brute-force during the
            ball tree query algorithm.  See `sklearn.neighbours.BallTree`
            for further details.
        large_data : bool, optional
            If `True`, indicates that this resampling algorithm will run on
            a large set of data, and the ball tree should be created on
            subsets of the data.
        distance_kwargs : dict, optional
            Optional keyword arguments passed into
            :func:`sklearn.neighbors.DistanceMetric`.  The default is to use
            the "minkowski" definition with `p=2`, i.e., the Euclidean
            definition.  This is important in determining which samples lie
            inside the window region of a resampling point, and when deriving
            distance weighting factors.

        Returns
        -------
        None
        """
        super().set_sample_tree(
            coordinates,
            radius=self.estimate_feature_windows(),
            leaf_size=leaf_size, large_data=large_data,
            **self._distance_kwargs)
        self.set_kernel(**self.kernel_args)



[docs]
    def estimate_feature_windows(self, *args, **kwargs):
        r"""
        Estimates the radius of the fitting window for each feature.

        The window for the resampling algorithm will be set to encompass the
        kernel extent over all dimensions.  Unlike the standard resampling
        algorithm, the coordinates of the data samples are irrelevant.

        The window radius is based on the kernel extent in this implementation.

        Since the resampling algorithm uses an ellipsoid window to determine
        possible candidates for fitting, the window is determined as an
        ellipsoid which circumscribes the cuboid kernel array.  Although there
        are an infinite number of possible ellipsoids, this particular method
        uses the constraints that principle axes of the ellipsoid and cuboid
        widths are at a constant ratio.  For example, in two dimensions where
        a and b are the principle axes of an ellipsoid, and w and h are the
        widths of the kernel:

        a/w = b/h; (w / 2a)^2 + (h / 2b)^2 = 1.

        This leads to us setting the principle axes as:

        a_i = sqrt(n / 4) * w_i + delta

        where w_i is the width of the kernel in dimension i in n-dimensions,
        and delta = spacing/1e6 so that any edge cases can be included safely.
        This is also valid in one dimension (a_i = w_i / 2).  Note that kernels
        are assumed to be exactly centered, so the width of a kernel in any
        dimension will be:

        w_i = spacing_i * (kernel.shape[i] - 1)

        Note that this implementation takes the width as 2 * the maximum
        absolute offset of the kernel for each dimension, so that irregular
        kernels may be safely used.

        Returns
        -------
        window : numpy.ndarray (n_features,)
            The principle axes of an ellipsoid used to create a fitting
            region around each resampling point.
        """
        temp_shape = tuple([1] * self.features)
        temp_tree = KernelTree(temp_shape)
        temp_tree.parse_kernel(
            self.kernel_args['kernel'],
            kernel_spacing=self.kernel_args['kernel_spacing'],
            kernel_offsets=self.kernel_args['kernel_offsets'])
        max_width = np.max(np.abs(temp_tree.extent), axis=1)

        typical_spacing = np.empty(self.features, dtype=float)
        for feature in range(self.features):
            typical_spacing[feature] = np.nanmedian(np.diff(np.unique(
                temp_tree.kernel_coordinates[feature])))

        window = max_width * np.sqrt(self.features)
        window += typical_spacing * 1e-6
        return window



[docs]
    def set_kernel(self, kernel, kernel_spacing=None, kernel_offsets=None,
                   weights=None, limits=None, degrees=3, smoothing=0.0,
                   knots=None, knot_estimate=None, eps=1e-8, fix_knots=None,
                   tolerance=1e-3, max_iteration=20, exact=False,
                   reduce_degrees=False, imperfect=False):
        """
        Set the kernel for subsequent fitting.

        During this process, a spline will be fit to describe the kernel at
        all intermediate points.

        Parameters
        ----------
        kernel : numpy.ndarray (float)
            The kernel to set.  Must have n_features dimensions.
        kernel_spacing : float or numpy.ndarray (float), optional
            The spacing between each kernel element in units of the
            coordinates. Either supplied as a single value for all features,
            or as an array of shape (n_features,) giving the kernel spacing
            for each feature.
        kernel_offsets : tuple or array_like, optional
            If the kernel is regular, should be an n-dimensional tuple
            containing the grid indices in each dimension.  Otherwise, should
            be an array of shape (n_dimensions, kernel.size).
        weights : numpy.ndarray, optional
            Optional weights to supply to the spline fit for each data point.
            Should be the same shape as the supplied kernel.
        limits : numpy.ndarray (float), optional
            An array of shape (n_dimensions, 2) that may be supplied to set the
            minimum and maximum coordinate values used during the spline fit.
            For example, limits[1, 0] sets the minimum knot value in the second
            dimensions and limits[1, 1] sets the maximum knot value in the
            second dimension.  By default this is set to the minimum and
            maximum values of the coordinates in each dimension.
        degrees : int or numpy.ndarray (int), optional
            The degree of spline to fit in each dimension.  Either a scalar can
            be supplied pertaining to all dimensions, or an array of shape
            (n_dimensions,) can be used.
        smoothing : float, optional
            Used to specify the smoothing factor.  If set to `None`, the
            smoothing will be determined based on user settings or input data.
            If `exact` is `True`, smoothing will be disabled (zero).  If
            `exact` is `False`, smoothing will be set to n - sqrt(2 * n)
            where n is the number of data values.  If supplied, smoothing
            must be greater than zero.  See above for further details.  Note
            that if smoothing is zero, and the degrees are not equal over
            each dimension, smoothing will be set to `eps` due to numerical
            instabilities.
        knots : list or tuple or numpy.ndarray, optional
            A set of starting knot coordinates for each dimension.  If a list
            or tuple is supplied it should be of length n_dimensions where
            element i is an array of shape (n_knots[i]) for dimension i.  If
            an array is supplied, it should be of shape
            (n_dimension, max(n_knots)). Note that there must be at least
            2 * (degree + 1) knots for each dimension.  Unused or invalid
            knots may be set to NaN, at the end of each array.  Knot
            coordinates must also be monotonically increasing in each
            dimension.
        knot_estimate : numpy.ndarray (int), optional
            The maximum number of knots required for the spline fit in each
            dimension and of shape (n_dimensions,).  If not supplied, the knot
            estimate will be set to
            int((n / n_dimensions) ** n_dimensions^(-1)) or n_knots if
            knots were supplied and fixed.
        eps : float, optional
            A value where 0 < eps < 1.  This defines the magnitude used to
            identify singular values in the spline observation matrix (A).  If
            any row of A[:, 0] < (eps * max(A[:,0])) it will be considered
            singular.
        fix_knots : bool, optional
            If `True`, do not attempt to modify or add knots to the spline fit.
            Only the initial supplied user knots will be used.
        tolerance : float, optional
            A value in the range 0 < tolerance < 1 used to determine the exit
            criteria for the spline fit.  See above for further details.
        max_iteration : int, optional
            The maximum number of iterations to perform when solving for the
            spline fit.
        exact : bool, optional
            If `True`, the initial knots used will coincide with the actual
            input coordinates and smoothing will be set to zero.  No knots
            should be supplied by the user in this instance.
        reduce_degrees : bool, optional
            Only relevant if `exact` is `True`.  If set to `True`, the maximum
            allowable degree in each dimension will be limited to
            (len(unique(x)) // 2) - 1 where x are the coordinate values in any
            dimension.
        imperfect : bool, optional
            If a spline fit to the kernel is allowed to be imperfect (`True`),
            will only raise an error on spline fitting if a major error was
            encountered.  Otherwise, fits will be permitted so long as a
            solution was reached, even if that solution did not meet
            expectations.

        Returns
        -------
        None
        """
        if kernel_spacing is None and kernel_offsets is None:
            raise ValueError("Kernel spacing or offsets must be supplied.")

        # Scale offsets or spacings
        # Kernel coordinates are always relative to the center.

        # offsets supersede spacings
        if kernel_offsets is not None:
            scaled_offsets = []
            kernel_spacing = None
            for feature in range(self.features):
                scaled_offsets.append(np.asarray(kernel_offsets[feature])
                                      / self.window[feature])
            kernel_offsets = scaled_offsets
        else:
            kernel_offsets = None
            kernel_spacing = np.atleast_1d(kernel_spacing).astype(float)
            if kernel_spacing.size == 1 and self.features > 1:
                kernel_spacing = np.full(self.features, kernel_spacing[0])
            kernel_spacing /= self.window

        self.sample_tree.set_kernel(kernel,
                                    kernel_spacing=kernel_spacing,
                                    kernel_offsets=kernel_offsets,
                                    weights=weights,
                                    limits=limits,
                                    degrees=degrees,
                                    smoothing=smoothing,
                                    knots=knots,
                                    knot_estimate=knot_estimate,
                                    eps=eps,
                                    fix_knots=fix_knots,
                                    tolerance=tolerance,
                                    max_iteration=max_iteration,
                                    exact=exact,
                                    reduce_degrees=reduce_degrees,
                                    imperfect=imperfect)



[docs]
    def reduction_settings(self, error_weighting=True,
                           absolute_weight=None,
                           fit_threshold=0.0, cval=np.nan,
                           edge_threshold=0.0, edge_algorithm='distribution',
                           is_covar=False, jobs=None, use_threading=None,
                           use_processes=None, **kwargs):
        r"""
        Define a set of reduction instructions based on user input.

        This method is responsible for determining, formatting, and checking
        a number variables required for the resampling algorithm based on
        user input.  For detailed descriptions of user options, please see
        :func:`ResamplePolynomial.__call__`.

        Parameters
        ----------
        error_weighting : bool, optional
            If `True` (default), weight polynomial fitting by the `error`
            values of each sample.
        absolute_weight : bool, optional
            If the kernel weights are negative, can lead to almost zero-like
            divisions in many of the algorithms.  If set to `True`, the sum of
            the absolute weights are used for normalization.
        fit_threshold : float, optional
            Not implemented for kernel fitting.
        cval : float, optional
            During fitting, any fit that does not meet the `order_algorithm`
            requirement will be set to this value. This will be NaN by default.
        edge_threshold : float or array_like or float
            If set to a value > 0, edges of the fit will be masked out
            according to `edge_algorithm`. Values close to zero will result in
            a low degree of edge clipping, while values close to 1 clip edges
            to a greater extent.  The exact definition of `edge_threshold`
            depends on the algorithm.  For further details, please see
            :func:`resampling.resample_utils.check_edges`.
        edge_algorithm : str, optional
            Describes how to clip edges if edge_threshold is non-zero. The
            available algorithms are:

                - 'distribution' (default): Statistics on sample distributions
                  are calculated, and if the resampling point is > 1/threshold
                  standard deviations away from the sample mean, it will be
                  clipped.
                - 'ellipsoid': If the samples used to fit a
                  resampling point deviate from the resampling point
                  location by more than this amount, it will be clipped.
                - 'box': If the flattened 1-dimensional distribution
                  of samples center-of-mass deviates from the resampling
                  point location in any dimension, it will be clipped.
                - 'range': Over each dimension, check the distribution of
                  points is greater than edge_threshold to the "left" and
                  "right" of the resampling point.
        is_covar : bool, optional
            If True, the input data is treated as a covariance instead of
            a flux, and is propagated as if through a weighted mean.
        jobs : int, optional
            Specifies the maximum number of concurrently running jobs.  An
            attempt will be made to parallel process using a thread-pool if
            available, but will otherwise revert to the "loky" backend.
            Values of 0 or 1 will result in serial processing.  A negative
            value sets jobs to `n_cpus + 1 + jobs` such that -1 would use
            all cpus, and -2 would use all but one cpu.
        use_threading : bool, optional
            If `True`, force use of threads during multiprocessing.
        use_processes : bool, optional
            If `True`, force use of sub-processes during multiprocessing.
        kwargs : dict
            Optional keyword arguments to the reduction settings.

        Returns
        -------
        settings : dict
            The reduction settings.  Also, stored as
            :func:`ResamplePolynomial.fit_settings`.
        """
        settings = super().reduction_settings(
            error_weighting=error_weighting,
            fit_threshold=fit_threshold,
            cval=cval,
            edge_threshold=edge_threshold,
            edge_algorithm=edge_algorithm,
            jobs=jobs,
            use_threading=use_threading,
            use_processes=use_processes)
        settings['is_covar'] = is_covar

        if absolute_weight is None:
            settings['absolute_weight'] = np.any(self.spline.values < 0)
        else:
            settings['absolute_weight'] = absolute_weight

        self._fit_settings = settings
        return settings


    def __call__(self, *args,
                 cval=np.nan, edge_threshold=0.0,
                 edge_algorithm='distribution',
                 error_weighting=True, absolute_weight=None, normalize=True,
                 is_covar=False, jobs=None, use_threading=None,
                 use_processes=None,
                 get_error=False, get_counts=False, get_weights=False,
                 get_distance_weights=False, get_rchi2=False,
                 get_offset_variance=False, **kwargs):
        """
        Resample data defined during initialization onto new coordinates.

        Parameters
        ----------
        args : array_like or n-tuple of array_like
            args can take one of the following formats:

                - grid : n-tuple of array_like
                  Here `n` is the number of dimensions and each array should
                  be of shape (n_data,).  This indicates that resampling
                  should occur on a grid where the first argument
                  corresponds to the coordinates along the first dimension.
                - single point : n-tuple of float
                  `n` is the number of dimensions.  The coordinate to
                  resample onto.
                - irregular : array_like
                  An array of shape (n_dimensions, n_ndata) defining a
                  set of coordinates onto which to resample.
        cval : float, optional
            During fitting, any fit that does not meet the `order_algorithm`
            requirement will be set to this value. This will be NaN by default.
        edge_threshold : float or array_like or float
            If set to a value > 0, edges of the fit will be masked out
            according to `edge_algorithm`. Values close to zero will result in
            a low degree of edge clipping, while values close to 1 clip edges
            to a greater extent.  The exact definition of `edge_threshold`
            depends on the algorithm.  For further details, please see
            :func:`resampling.resample_utils.check_edges`.
        edge_algorithm : str, optional
            Describes how to clip edges if edge_threshold is non-zero. The
            available algorithms are:

                - 'distribution' (default): Statistics on sample distributions
                  are calculated, and if the resampling point is > 1/threshold
                  standard deviations away from the sample mean, it will be
                  clipped.
                - 'ellipsoid': If the samples used to fit a
                  resampling point deviate from the resampling point
                  location by more than this amount, it will be clipped.
                - 'box': If the flattened 1-dimensional distribution
                  of samples center-of-mass deviates from the resampling
                  point location in any dimension, it will be clipped.
                - 'range': Over each dimension, check the distribution of
                  points is greater than edge_threshold to the "left" and
                  "right" of the resampling point.
        error_weighting : bool, optional
            If `True` (default), weight polynomial fitting by the `error`
            values of each sample.
        absolute_weight : bool, optional
            If the kernel weights are negative, can lead to almost zero-like
            divisions in many of the algorithms.  If set to `True`, the sum of
            the absolute weights are used for normalization.
        normalize : bool, optional
            If `True`, the data will be convolved with a normalized kernel
            such that sum(kernel) = 1.  Note that this may be sum(abs(kernel))
            if `absolute_weight` is `True`.
        is_covar : bool, optional
            If True, the input data is treated as a covariance instead of
            a flux, and is propagated as if through a weighted mean.
        jobs : int, optional
            Specifies the maximum number of concurrently running jobs.  An
            attempt will be made to parallel process using a thread-pool if
            available, but will otherwise revert to the "loky" backend.
            Values of 0 or 1 will result in serial processing.  A negative
            value sets jobs to `n_cpus + 1 + jobs` such that -1 would use
            all cpus, and -2 would use all but one cpu.
        use_threading : bool, optional
            If `True`, force use of threads during multiprocessing.
        use_processes : bool, optional
            If `True`, force use of sub-processes during multiprocessing.
        get_error : bool, optional
            If `True`, If True returns the error which is given as the weighted
            RMS of the samples used for each resampling point.
        get_counts : bool, optional
            If `True` returns the number of samples used to fit each resampling
            point.
        get_weights : bool, optional
            If `True`, returns the sum of all sample weights (error and
            distance) used in the fit at each resampling point.
        get_distance_weights : bool, optional
            If `True`, returns the sum of all sample distance weights (not
            including error) used in the fit at each resampling point.
        get_rchi2 : bool, optional
            If `True`, returns the reduced chi-squared statistic of the fit
            at each resampling point.  Note that this is only valid if errors
            were supplied during :func:`ResamplePolynomial.__init__`.
        get_offset_variance : bool, optional
            If `True`, returns the offset of the resampling point from the
            center of the sample distribution used to generate the fit as
            a variance.  i.e., a return value of 9 indicates a 3-sigma
            deviation of the resampling point from the sample distribution.

        Returns
        -------
        fit, [optional return values]
            The data fit at `args`.  Optional return values may also be
            returned if any of the get_* options are `True`.  If all are set
            to `True`, the return order is: fit, error, counts, weights,
            distance_weights, rchi2, cross_derivatives, offset_variance.
        """
        self._check_call_arguments(*args)

        settings = self.reduction_settings(
            error_weighting=error_weighting,
            absolute_weight=absolute_weight,
            cval=cval,
            edge_threshold=edge_threshold,
            edge_algorithm=edge_algorithm,
            is_covar=is_covar,
            jobs=jobs,
            use_threading=use_threading,
            use_processes=use_processes,
            **kwargs)

        self.pre_fit(settings, *args)

        if not normalize:
            return_distance_weights = get_distance_weights
            get_distance_weights = True
        else:
            return_distance_weights = get_distance_weights

        (fit, error, counts, weights, distance_weights, sigma,
         distribution_offset_variance) = self.block_loop(
            self.data, self.error, self.mask, self.fit_tree, self.sample_tree,
            settings, self.iteration, get_error=get_error,
            get_counts=get_counts, get_weights=get_weights,
            get_distance_weights=get_distance_weights, get_rchi2=get_rchi2,
            get_offset_variance=get_offset_variance, jobs=jobs)

        self.iteration += 1

        if not normalize:
            nzi = distance_weights != 0
            fit[nzi] *= distance_weights[nzi]
            fit[~nzi] = cval

        get_distance_weights = return_distance_weights

        if not self.multi_set:
            fit = fit[0]
            if get_error:
                error = error[0]
            if get_counts:
                counts = counts[0]
            if get_weights:
                weights = weights[0]
            if get_distance_weights:
                distance_weights = distance_weights[0]
            if get_rchi2:
                sigma = sigma[0]
            if get_offset_variance:
                distribution_offset_variance = distribution_offset_variance[0]

        fit = self.fit_grid.reshape_data(fit)
        if (get_error or get_counts or get_weights
                or get_rchi2 or get_distance_weights):
            result = (fit,)
            if get_error:
                result += (self.fit_grid.reshape_data(error),)
            if get_counts:
                result += (self.fit_grid.reshape_data(counts),)
            if get_weights:
                result += (self.fit_grid.reshape_data(weights),)
            if get_distance_weights:
                result += (self.fit_grid.reshape_data(distance_weights),)
            if get_rchi2:
                result += (self.fit_grid.reshape_data(sigma),)
            if get_offset_variance:
                result += (self.fit_grid.reshape_data(
                    distribution_offset_variance),)

            return result
        else:
            return fit


[docs]
    @classmethod
    def block_loop(cls, sample_values, error, mask, fit_tree, sample_tree,
                   settings, iteration, get_error=True, get_counts=True,
                   get_weights=True, get_distance_weights=True, get_rchi2=True,
                   get_offset_variance=True, jobs=None, **kwargs):
        r"""
        Perform resampling reduction in parallel or series.

        Utility function to allow the resampling algorithm to process blocks
        of data in series or parallel, recombining the data once complete.
        Please see :func:`ResamplePolynomial.__call__` for descriptions of the
        arguments.

        Parameters
        ----------
        sample_values : numpy.ndarray
        error : numpy.ndarray
        mask : numpy.ndarray
        fit_tree : resampling.tree.PolynomialTree object
        sample_tree : resampling.tree.PolynomialTree object
        settings : dict
        iteration : int
        get_error : bool, optional
        get_counts : bool, optional
        get_weights : bool, optional
        get_distance_weights : bool, optional
        get_rchi2 : bool, optional
        get_offset_variance : bool, optional
        jobs : int, optional
        kwargs : dict, optional
            For consistency with the resampler base only.

        Returns
        -------
        combined_results : 8-tuple of numpy.ndarray
            In order: fit, error, counts, weights, distance weights,
                reduced chi-squared, MSCP derivatives, distribution offset.
        """
        args = (sample_values, error, mask, fit_tree, sample_tree,
                get_error, get_counts, get_weights, get_distance_weights,
                get_rchi2, get_offset_variance, settings)
        kwargs = None

        blocks = cls.process_blocks(args, kwargs, settings, sample_tree,
                                    fit_tree, jobs, iteration)

        n_sets = sample_values.shape[0]
        n_fits = fit_tree.n_members
        n_features = sample_tree.features

        return cls.combine_blocks(
            blocks, n_sets, n_fits, n_features, settings['cval'],
            get_error=get_error,
            get_counts=get_counts,
            get_weights=get_weights,
            get_distance_weights=get_distance_weights,
            get_rchi2=get_rchi2,
            get_offset_variance=get_offset_variance)



[docs]
    @staticmethod
    def combine_blocks(blocks, n_sets, n_fits, n_dimensions, cval,
                       get_error=True, get_counts=True, get_weights=True,
                       get_distance_weights=True, get_rchi2=True,
                       get_offset_variance=True, **kwargs):
        r"""
        Combines the results from multiple reductions into one set.

        The resampling reduction may be performed in serial or parallel over
        multiple "blocks", where each block contains a set of spatially
        close fit coordinates and all samples necessary to perform a fit
        over all points.

        Parameters
        ----------
        blocks : n-tuple of processed reductions for n blocks.
        n_sets : int
            The number of data sets in the reduction.  Each set is contains
            the same sample coordinates as all other sets, but the sample
            values may vary.
        n_fits : int
            The number of fitting points over all blocks.
        n_dimensions : int
            The number of coordinate dimensions.
        cval : float
            The fill value for missing data in the output fit value arrays.
        get_error : bool, optional
            If `True`, indicates that errors on the fit were calculated.
        get_counts : bool, optional
            If `True`, indicates that the number of samples used for each
            fit should be returned.
        get_weights : bool, optional
            If `True`, indicates that the total weight sum of all samples used
            in each fit should be returned.
        get_distance_weights : bool, optional
            If `True`, indicates that the distance weight sum of all samples
            used in each fit should be returned.
        get_rchi2 : bool, optional
            If `True`, indicates that the reduced chi-squared statistic for
            each fit should be returned.
        get_offset_variance : bool, optional
            If `True`, indicates that the offset variance of the fit from the
            sample distribution should be returned.
        kwargs : dict, optional
            For consistency with ResampleBase only (not used).

        Returns
        -------
        results : 7-tuple of numpy.ndarray
            results[0] = fitted values
            results[1] = error on the fit
            results[2] = counts
            results[3] = total weight sums
            results[4] = total distance weight sums
            results[5] = reduced chi-squared statistic
            results[6] = offset variance

        Notes
        -----
        The return value is always an 8-tuple, and the get_* keywords indicate
        whether the calculated values in the block reductions are valid.  If
        `False`, the corresponding output array will have the correct number of
        axes, but be of zero size.
        """
        fit = np.full((n_sets, n_fits), float(cval))

        if get_error:
            error = np.full((n_sets, n_fits), np.nan)
        else:
            error = np.empty((0, 0), dtype=float)

        if get_counts:
            counts = np.zeros((n_sets, n_fits), dtype=int)
        else:
            counts = np.empty((0, 0), dtype=int)

        if get_weights:
            weights = np.zeros((n_sets, n_fits), dtype=float)
        else:
            weights = np.empty((0, 0), dtype=float)

        if get_distance_weights:
            distance_weights = np.zeros((n_sets, n_fits), dtype=float)
        else:
            distance_weights = np.empty((0, 0), dtype=float)

        if get_rchi2:
            rchi2 = np.full((n_sets, n_fits), np.nan)
        else:
            rchi2 = np.empty((0, 0), dtype=float)

        if get_offset_variance:
            offsets = np.full((n_sets, n_fits), np.nan, dtype=float)
        else:
            offsets = np.empty((0, 0), dtype=float)

        for block in blocks:
            fit_indices = block[0]
            fit[:, fit_indices] = block[1]
            if get_error:
                error[:, fit_indices] = block[2]
            if get_counts:
                counts[:, fit_indices] = block[3]
            if get_weights:
                weights[:, fit_indices] = block[4]
            if get_distance_weights:
                distance_weights[:, fit_indices] = block[5]
            if get_rchi2:
                rchi2[:, fit_indices] = block[6]
            if get_offset_variance:
                offsets[:, fit_indices] = block[7]

        return fit, error, counts, weights, distance_weights, rchi2, offsets



[docs]
    @classmethod
    def process_block(cls, args, block):
        r"""
        Run :func:`solve_fits` on each block.

        Utility function that parses the settings and tree objects to something
        usable by the numba JIT compiled resampling functions.  This is not
        meant to be called directly.

        Parameters
        ----------
        args : 2-tuple
            A tuple of form (filename, iteration) where the filename is a
            string pointing towards a previously saved pickle file containing
            the relevant information for the reduction if required.  If set to
            `None`, the arguments are retrieved from the
            `_global_resampling_values` global parameter.
        block : int
            The block index to process.

        Returns
        -------
        results : 9-tuple of numpy.ndarray
            The first element contains the fit point indices to be fit.  For
            the remaining elements, please see :func:`solve_fits` return
            values.
        """
        filename, iteration = args

        # Loading cannot be covered in tests as it occurs on other CPUs.
        load_args = False
        if filename is not None:  # pragma: no cover
            if 'args' not in _global_resampling_values:
                load_args = True
            elif 'iteration' not in _global_resampling_values:
                load_args = True
            elif iteration != _global_resampling_values.get('iteration'):
                load_args = True
            elif filename != _global_resampling_values.get('filename'):
                load_args = True

        if load_args:  # pragma: no cover
            _global_resampling_values['args'], _ = unpickle_file(filename)
            _global_resampling_values['iteration'] = iteration
            _global_resampling_values['filename'] = filename

        (sample_values, sample_error, sample_mask, fit_tree, sample_tree,
         get_error, get_counts, get_weights, get_distance_weights, get_rchi2,
         get_offset_variance, settings) = _global_resampling_values['args']

        fit_indices, fit_coordinates = \
            fit_tree.block_members(block, get_locations=True)

        sample_indices = numba.typed.List((sample_tree.query_radius(
            fit_coordinates, 1.0, return_distance=False)))

        (knots, coefficients, degrees, panel_mapping, panel_steps, knot_steps,
         nk1, spline_mapping, n_knots) = sample_tree.resampling_arguments

        return (fit_indices,
                *solve_kernel_fits(
                    sample_indices, sample_tree.coordinates,
                    sample_values, sample_error, sample_mask,
                    fit_coordinates, knots, coefficients,
                    degrees, panel_mapping, panel_steps,
                    knot_steps, nk1, spline_mapping, n_knots,
                    is_covar=settings['is_covar'],
                    cval=settings['cval'],
                    error_weighting=settings['error_weighting'],
                    absolute_weight=settings['absolute_weight'],
                    edge_algorithm_idx=settings['edge_algorithm_idx'],
                    edge_threshold=settings['edge_threshold'],
                    get_error=get_error,
                    get_counts=get_counts,
                    get_weights=get_weights,
                    get_distance_weights=get_distance_weights,
                    get_rchi2=get_rchi2,
                    get_offset_variance=get_offset_variance)
                )