Source code for grig.resample_polynomial

# Licensed under a 3-clause BSD style license - see LICENSE.rst

import numba as nb
import numpy as np
import psutil

from grig.resample_utils import (
    scale_coordinates,
    shaped_adaptive_weight_matrices,
    scaled_adaptive_weight_matrices,
    relative_density, solve_fits)
from grig.resample_base import (
    ResampleBase, _global_resampling_values)
from grig.toolkit.multiproc import unpickle_file


__all__ = ['ResamplePolynomial', 'resamp']



[docs]
class ResamplePolynomial(ResampleBase):

    def __init__(self, coordinates, data,
                 error=None, mask=None, window=None,
                 robust=None, negthresh=None,
                 window_estimate_bins=10,
                 window_estimate_percentile=50,
                 window_estimate_oversample=2.0,
                 leaf_size=40,
                 order=1, fix_order=True,
                 large_data=None,
                 check_memory=True,
                 memory_buffer=0.25,
                 **distance_kwargs):
        """
        Class to resample data using local polynomial fits.

        Parameters
        ----------
        coordinates : array_like of float
            (n_features, n_samples) array of independent values.  A local
            internal copy will be created if it is not a numpy.float64
            type.
        data : array_like of float
            (n_sets, n_samples) or (n_samples,) array of dependent values.
            multiple (n_sets) sets of data are supplied, then n_sets solutions
            will be calculated at each resampling point.
        error : array_like of float, optional
            (n_sets, n_samples) or (n_samples,) array of error (1-sigma) values
            associated with the `data` array.  `error` will be used to
            weight fits, and be propagated to the output error values.  If not
            supplied, the error may still be calculated from residuals to the
            fit during :func:`ResamplePolynomial.__call__`.
        mask : array_like of bool, optional
            (n_sets, n_data) or (n_data,) array of bool where `True`
            indicates a valid data point that can be included the fitting
            and `False` indicates data points that should be excluded from
            the fit.  Masked points will be reflected in the output counts
            array.  If not supplied, all samples are considered valid.
        window : array_like or float or int, optional
            (n_features,) array or single value specifying the maximum
            distance (distance definition is handled by `distance_kwargs`) of a
            data sample from a resampling point such that it can be included in
            a local fit.  `window` may be declared for each feature.  For
            example, when fitting 2-dimensional (x, y) data, a window of 1.0
            would create a circular fitting window around each resampling
            point, whereas a window of (1.0, 0.5) would create an elliptical
            fitting window with a semi-major axis of 1.0 in x and semi-minor
            axis of 0.5 in y.  If not supplied, `window` is estimated via
            :func:`ResamplePolynomial.estimate_feature_windows`.
        order : array_like or int, optional
            (n_features,) array or single integer value specifying the
            polynomial fit order for each feature.
        fix_order : bool, optional
            In order for local polynomial fitting to occur, the basic
            requirement is that n_samples >= (order + 1) ** n_features,
            where n_samples is the number of data samples within `window`.
            If `fix_order` is True and this condition is not met, then
            local fitting will be aborted for that point, and a value of
            `cval` will be returned instead.  If `fix_order` is False,
            then `order` will be reduced to the maximum value where this
            condition can be met.  NOTE: this is only available if
            `order` is symmetrical. i.e. it was passed in as a single
            integer to be applied across all features.  Otherwise, it is
            unclear as to which feature order should be reduced to meet
            the condition.
        robust : float, optional
            Specifies an outlier rejection threshold for `data`. A data point
            is identified as an outlier if abs(x_i - x_med)/MAD > robust, where
            x_med is the median, and MAD is the Median Absolute Deviation
            defined as 1.482 * median(abs(x_i - x_med)).
        negthresh : float, optional
            Specifies a negative value rejection threshold such that
            data < (-stddev(data) * negthresh) will be excluded from the fit.
        window_estimate_bins : int, optional
            Used to estimate the `window` if not supplied using
            :func:`ResamplePolynomial.estimate_feature_windows`.
        window_estimate_percentile : int or float, optional
            Used to estimate the `window` if not supplied using
            :func:`ResamplePolynomial.estimate_feature_windows`.
        window_estimate_oversample : int or float, optional
            Used to estimate the `window` if not supplied using
            :func:`ResamplePolynomial.estimate_feature_windows`.
        leaf_size : int, optional
            Number of points at which to switch to brute-force during the
            ball tree query algorithm.  See `sklearn.neighbours.BallTree`
            for further details.
        large_data : bool, optional
            If `True`, indicates that this resampling algorithm will run on
            a large set of data, and the ball tree should be created on
            subsets of the data.
        check_memory : bool, optional
            If `True`, check the memory requirements for resampling the
            supplied data.
        memory_buffer : float, optional
            A fraction (positive or negative) with which to modify the memory
            estimates for the process memory requirements.  Memory estimates
            are scaled by the factor 1 + memory_buffer.
        distance_kwargs : dict, optional
            Optional keyword arguments passed into
            :func:`sklearn.neighbors.DistanceMetric`.  The default is to use
            the "minkowski" definition with `p=2`, i.e., the Euclidean
            definition.  This is important in determining which samples lie
            inside the window region of a resampling point, and when deriving
            distance weighting factors.

        Raises
        ------
        ValueError : Invalid inputs to __init__ or __call__
        """
        self._order = order
        self._fix_order = fix_order
        super().__init__(coordinates, data, error=error, mask=mask,
                         window=window, robust=robust, negthresh=negthresh,
                         window_estimate_bins=window_estimate_bins,
                         window_estimate_percentile=window_estimate_percentile,
                         window_estimate_oversample=window_estimate_oversample,
                         leaf_size=leaf_size, large_data=large_data,
                         check_memory=check_memory,
                         memory_buffer=memory_buffer, **distance_kwargs)

    @property
    def order(self):
        """
        Return the order of polynomial fit.

        Returns
        -------
        order : int or numpy.ndarray (int)
            A symmetrical order, or the order for each feature.
        """
        if self.sample_tree is None:
            return self._order
        return self.sample_tree.order

    @property
    def fit_tree(self):
        """
        Return the fitting tree representative of points to fit.

        Returns
        -------
        PolynomialTree
        """
        return super().fit_tree


[docs]
    def set_sample_tree(self, coordinates,
                        radius=None,
                        window_estimate_bins=10,
                        window_estimate_percentile=50,
                        window_estimate_oversample=2.0,
                        leaf_size=40,
                        large_data=None,
                        check_memory=True,
                        memory_buffer=0.25,
                        **distance_kwargs):
        """
        Build the sample tree from input coordinates.

        Parameters
        ----------
        coordinates : numpy.ndarray (float)
            The input coordinates of shape (n_features, n_samples).
        radius :  float or sequence (float), optional
            The radius of the window around each fitting point used to
            determine sample selection for fit.  If not supplied, will be
            estimated using
            :func:`ResamplePolynomial.estimate_feature_windows`.
        window_estimate_bins : int, optional
            Used to estimate the `window` if not supplied using
            :func:`ResamplePolynomial.estimate_feature_windows`.
        window_estimate_percentile : int or float, optional
            Used to estimate the `window` if not supplied using
            :func:`ResamplePolynomial.estimate_feature_windows`.
        window_estimate_oversample : int or float, optional
            Used to estimate the `window` if not supplied using
            :func:`ResamplePolynomial.estimate_feature_windows`.
        leaf_size : int, optional
            Number of points at which to switch to brute-force during the
            ball tree query algorithm.  See `sklearn.neighbours.BallTree`
            for further details.
        large_data : bool, optional
            If `True`, indicates that this resampling algorithm will run on
            a large set of data, and the ball tree should be created on
            subsets of the data.
        check_memory : bool, optional
            If `True`, check the memory requirements for resampling the
            supplied data.
        memory_buffer : float, optional
            A fraction (positive or negative) with which to modify the memory
            estimates for the process memory requirements.  Memory estimates
            are scaled by the factor 1 + memory_buffer.
        distance_kwargs : dict, optional
            Optional keyword arguments passed into
            :func:`sklearn.neighbors.DistanceMetric`.  The default is to use
            the "minkowski" definition with `p=2`, i.e., the Euclidean
            definition.  This is important in determining which samples lie
            inside the window region of a resampling point, and when deriving
            distance weighting factors.

        Returns
        -------
        None
        """
        self._order = self.check_order(self._order, self._n_features,
                                       self._n_samples)

        memory_kwargs = {'order': self._order}

        super().set_sample_tree(
            coordinates,
            radius=radius,
            window_estimate_bins=window_estimate_bins,
            window_estimate_percentile=window_estimate_percentile,
            window_estimate_oversample=window_estimate_oversample,
            leaf_size=leaf_size, large_data=large_data,
            check_memory=check_memory,
            memory_buffer=memory_buffer,
            memory_kwargs=memory_kwargs,
            **distance_kwargs)
        self.sample_tree.set_order(self._order, fix_order=self._fix_order)
        self.sample_tree.precalculate_phi_terms()



[docs]
    @staticmethod
    def check_order(order, n_features, n_samples):
        r"""
        Check the order is of correct format and enough samples exist.

        Parameters
        ----------
        order : int or array_like of int (n_features,)
            The polynomial order to fit, either supplied as an integer to be
            applied over all dimensions, or as an array to give the order
            in each dimension.
        n_features : int
            The number of features (dimensions) of the sample coordinates.
        n_samples : int
            The number of samples.

        Returns
        -------
        order : int or numpy.ndarray of int (n_features,)
            The formatted polynomial order.

        Raises
        ------
        ValueError
            If there are too few samples for the desired order, or `order` is
            not formatted correctly.
        """
        order = np.asarray(order, dtype=np.int64)
        if order.ndim > 1:
            raise ValueError(
                "Order should be a scalar or 1-D array")
        elif order.ndim == 1 and order.size != n_features:
            raise ValueError(
                "Order vector does not match number of features")

        if order.ndim == 0:
            min_points = (order + 1) ** n_features
        else:
            min_points = np.product(order + 1)
        if n_samples < min_points:
            raise ValueError("Too few data samples for order")

        return order



[docs]
    def calculate_minimum_points(self):
        """
        Return the minimum number of points for a polynomial fit.

        Returns
        -------
        minimum_points : int
            The minimum number of points for a polynomial fit
        """
        o = np.asarray(self.order)
        if o.shape == ():
            o = np.full(self.features, int(o))
        return np.product(o + 1)



[docs]
    def reduction_settings(self, smoothing=0.0, relative_smooth=False,
                           adaptive_threshold=None,
                           adaptive_algorithm='scaled', error_weighting=True,
                           fit_threshold=0.0, cval=np.nan,
                           edge_threshold=0.0, edge_algorithm='distribution',
                           order_algorithm='bounded', is_covar=False,
                           estimate_covariance=False, jobs=None,
                           adaptive_region_coordinates=None,
                           use_threading=None,
                           use_processes=None):
        r"""
        Define a set of reduction instructions based on user input.

        This method is responsible for determining, formatting, and checking
        a number variables required for the resampling algorithm based on
        user input.  For detailed descriptions of user options, please see
        :func:`ResamplePolynomial.__call__`.

        Parameters
        ----------
        smoothing : float or array_like (n_features,), optional
        relative_smooth : bool, optional
        adaptive_threshold : float or array_like (n_features,), optional
        adaptive_algorithm : str, optional
        error_weighting : bool, optional
        fit_threshold : float, optional
        cval : float, optional
        edge_threshold : float or array_like (n_features,), optional
        edge_algorithm : str, optional
        order_algorithm : str, optional
        is_covar : bool, optional
        estimate_covariance : bool, optional
        jobs : int, optional
        adaptive_region_coordinates : array_like, optional
        use_threading : bool, optional
            If `True`, force use of threads during multiprocessing.
        use_processes : bool, optional
            If `True`, force use of sub-processes during multiprocessing.

        Returns
        -------
        settings : dict
            The reduction settings.  Also, stored as
            :func:`ResamplePolynomial.fit_settings`.
        """
        settings = super().reduction_settings(
            error_weighting=error_weighting,
            fit_threshold=fit_threshold,
            cval=cval,
            edge_threshold=edge_threshold,
            edge_algorithm=edge_algorithm,
            jobs=jobs,
            use_threading=use_threading,
            use_processes=use_processes)

        settings['estimate_covariance'] = estimate_covariance

        adaptive_algorithm = str(adaptive_algorithm).lower().strip()
        if (adaptive_algorithm != 'none'
                and adaptive_threshold is not None
                and not np.allclose(adaptive_threshold, 0)):
            if not error_weighting:
                raise ValueError("Error weighting must be enabled for "
                                 "adaptive smoothing")
            if self.sample_tree.order is None or np.allclose(
                    self.sample_tree.order, 0):
                raise ValueError("Adaptive smoothing cannot be applied for "
                                 "polynomial fit of zero order.")
            else:
                adaptive = True
                adaptive_algorithm = str(adaptive_algorithm).strip().lower()
                if adaptive_algorithm == 'shaped':
                    # only relevant in 2+ dimensions
                    shaped = self.features > 1
                elif adaptive_algorithm == 'scaled':
                    shaped = False
                else:
                    raise ValueError("Adaptive algorithm must be one of "
                                     "{None, 'shaped', 'scaled'}.")
        else:
            adaptive = False
            shaped = False
            adaptive_threshold = 0.0

        distance_weighting = smoothing is not None or adaptive
        error_weighting = adaptive or error_weighting
        if adaptive and not self._error_valid:
            raise ValueError(
                "Errors must be provided for adaptive smoothing")
        else:
            error_weighting &= self._error_valid

        if distance_weighting:

            # In the case that adaptive smoothing is set, but no distance
            # weighting is selected.  Smoothing of 1/3 is reasonable.
            if smoothing is None:
                smoothing = np.full(self.features, 1 / 3 if relative_smooth
                                    else self.window / 3)

            alpha = np.atleast_1d(np.asarray(smoothing, dtype=np.float64))
            if alpha.size not in [1, self.features]:
                raise ValueError(
                    "Smoothing size does not match number of features")

            if adaptive:
                adaptive_threshold = np.atleast_1d(
                    np.asarray(adaptive_threshold, dtype=np.float64))

                if alpha.size != self.features:
                    alpha = np.full(self.features, alpha[0])

                if adaptive_threshold.size not in [1, self.features]:
                    raise ValueError("Adaptive smoothing size does not "
                                     "match number of features")
                elif adaptive_threshold.size != self.features:
                    adaptive_threshold = np.full(
                        self.features, adaptive_threshold[0])

            if not relative_smooth:
                if alpha.size != self.features:  # alpha size = 1
                    alpha = np.full(self.features, alpha[0])
                if not adaptive:
                    alpha = 2 * (alpha ** 2) / (self.window ** 2)
                else:
                    alpha /= self.window  # sigma in terms of window

            if not adaptive and alpha.size == 1 or np.unique(alpha).size == 1:
                # Symmetrical across dimensions - use single value
                alpha = np.atleast_1d(np.float64(alpha[0]))

        else:
            alpha = np.asarray([0.0])

        order = self.sample_tree.order
        order_varies = self.sample_tree.order_varies
        order_symmetry = self.sample_tree.order_symmetry
        order_algorithm = str(order_algorithm).lower().strip()
        order_func_lookup = {
            'none': 0,
            'bounded': 1,
            'extrapolate': 2,
            'counts': 3
        }
        if order_algorithm not in order_func_lookup:
            raise ValueError(f"Unknown order algorithm: {order_algorithm}")
        order_algorithm_idx = order_func_lookup[order_algorithm]

        if order_symmetry:
            order_minimum_points = (order + 1) ** self.features
        else:
            order_minimum_points = np.prod(order + 1)

        if is_covar:
            mean_fit = True
        else:
            mean_fit = order_symmetry and order == 0

        if adaptive:
            region_coordinates = adaptive_region_coordinates
        else:
            region_coordinates = None

        settings['distance_weighting'] = distance_weighting
        settings['alpha'] = alpha
        settings['adaptive_threshold'] = adaptive_threshold
        settings['shaped'] = shaped
        settings['adaptive_alpha'] = np.empty((0, 0, 0, 0), dtype=np.float64)
        settings['order'] = np.atleast_1d(order)
        settings['order_varies'] = order_varies
        settings['order_algorithm'] = order_algorithm
        settings['order_algorithm_idx'] = order_algorithm_idx
        settings['order_symmetry'] = order_symmetry
        settings['order_minimum_points'] = order_minimum_points
        settings['is_covar'] = is_covar
        settings['mean_fit'] = mean_fit
        settings['relative_smooth'] = relative_smooth
        settings['adaptive_region_coordinates'] = region_coordinates
        self._fit_settings = settings
        return settings



[docs]
    def calculate_adaptive_smoothing(self, settings):
        r"""
        Calculate the adaptive distance weighting kernel.

        Calculates a weighting kernel for each sample in the reduction.  i.e,
        each sample will have a different weighting factor based on its
        distance (and optionally, direction) from each point at which a fit
        is required.  This is done by performing a fit centered on each sample,
        and measuring how that fit deviates from the actual observed sample
        value.  The first step is to decide upon the distance weighting factor
        used to perform the initial fit.

        It is assumed that the coordinate error is known (or approximately
        known), and supplied as a :math:`1 \sigma` error as the 'alpha'
        keyword in `settings`.  For example, the error in astronomical
        observations taken using a Gaussian beam with known FWHM is:

        .. math::

            \sigma = \frac{\text{FWHM}}{2 \sqrt{2 \ln{2}}}

        However, adaptive weighting may be calculated over select dimensions,
        leaving some fixed.  In this case, the `alpha` keyword will always
        apply a weighting factor in a "fixed" dimension :math:`k` as:

        .. math::

            w_k = exp \left(
                  \frac{-\Delta x_k^2}{\alpha_{fixed, k} \Omega_k^2}
                  \right)

        where :math:`\Delta x_k` is the distance from the resampling point to
        the sample in question, and :math:`\Omega_k` is the window principle
        axis in dimension :math:`k`.  Note that in the weighting function,
        :math:`\sigma` is related to :math:`\alpha` by:

        .. math::

            \alpha_k = 2 \sigma_k^2

        and weighting is applied using the definition of :math:`w_k` above.  To
        signify that :math:`\alpha_{fixed, k}` should be used instead of
        :math:`\sigma` for dimension :math:`k`, the "adaptive_threshold" array
        in `settings` should be set to zero for the corresponding dimension.
        For example, if we had::

            settings['alpha'] = [0.3, 0.3]
            settings['adaptive_threshold'] = [1, 0]

        the first dimension would have :math:`\alpha_0=0.18`
        (:math:`2 \times 0.3^2`), and the second would have
        :math:`\alpha_1=0.3`.  In this example, :math:`\alpha_0` would be
        allowed to vary per sample, while :math:`\alpha_1` would be fixed for
        each sample at 0.3.  An initial fit is then performed at each sample
        coordinate using a test distance weighting parameter:

        .. math::

            \sigma_{test, k} = \frac{\pi \sigma_k}{\sqrt{2 ln{2}}}

            \alpha_{test, k} = 2 \sigma_{test, k}^2

        Using this initial weighting parameter, the adaptive kernels are
        derived using either
        :func:`resample_utils.shaped_adaptive_weight_matrix` or
        :func:`resample_utils.scaled_adaptive_weight_matrix` depending on
        whether the "shaped" keyword in `settings` is set to `True` or `False`
        respectively.

        The weighting kernels are stored in the "adaptive_alpha" keyword value
        in `settings`.

        Parameters
        ----------
        settings : dict
            Reduction settings, as returned by
            :func:`ResamplePolynomial.reduction_settings`.

        Returns
        -------
        None
        """

        adaptive = settings['adaptive_threshold']
        do_adaptive = adaptive is not None
        if do_adaptive:
            adaptive = np.atleast_1d(adaptive).astype(float)
            if adaptive.size == 0 or np.allclose(adaptive, 0):
                do_adaptive = False

        if not do_adaptive:
            settings['adaptive_threshold'] = None
            settings['adaptive_alpha'] = np.empty((0, 0, 0, 0))
            return

        sigma = np.atleast_1d(settings['alpha'])
        if sigma.size != self.features:
            sigma = np.full(self.features, sigma[0])

        fixed = adaptive == 0

        nyquist = (np.pi / np.sqrt(2 * np.log(2)))
        test_sigma = sigma * nyquist
        test_sigma[fixed] = sigma[fixed].copy()

        scaled_test_sigma = test_sigma.copy()
        scaled_test_sigma[~fixed] *= adaptive[~fixed]

        scaled_test_alpha = 2 * (scaled_test_sigma ** 2)

        if settings['relative_smooth']:
            scaled_test_alpha[fixed] = sigma[fixed].copy()
        else:
            scaled_test_alpha[fixed] = 2 * (sigma[fixed] ** 2)

        shaped = settings['shaped']

        test_reduction = self.__call__(
            scale_coordinates(self.sample_tree.coordinates.copy(),
                              self._radius, self._scale_offsets, reverse=True),
            smoothing=scaled_test_alpha,
            relative_smooth=True,
            adaptive_threshold=None,
            fit_threshold=0.0,  # No fit checking
            cval=np.nan,  # NaN on failure
            edge_threshold=0.0,  # No edge checking
            error_weighting=settings['error_weighting'],
            order_algorithm=settings['order_algorithm'],
            estimate_covariance=settings['estimate_covariance'],
            get_error=False,
            get_counts=shaped,
            get_weights=False,
            get_distance_weights=shaped,
            get_rchi2=True,
            get_cross_derivatives=shaped,
            get_offset_variance=shaped,
            is_covar=False,
            jobs=settings['jobs'],
            adaptive_region_coordinates=settings[
                'adaptive_region_coordinates'])

        if shaped:
            counts = np.atleast_2d(test_reduction[1])
            distance_weights = np.atleast_2d(test_reduction[2])
            rchi2 = np.atleast_2d(test_reduction[3])
            gradient_mscp = test_reduction[4]
            distribution_offset_variance = test_reduction[5]
            if gradient_mscp.ndim == 3:
                gradient_mscp = gradient_mscp[None]
            rho = relative_density(scaled_test_sigma, counts, distance_weights)

        else:
            rchi2 = np.atleast_2d(test_reduction[1])
            gradient_mscp = None
            rho = None
            distribution_offset_variance = None

        # Here the Nyquist level sigma is used, thereby implementing the
        # requested scaling "adaptive" factor.
        if shaped:
            gmat = shaped_adaptive_weight_matrices(
                test_sigma, rchi2, gradient_mscp,
                density=rho,
                variance_offsets=distribution_offset_variance,
                fixed=fixed)

        else:
            gmat = scaled_adaptive_weight_matrices(
                test_sigma, rchi2, fixed=fixed)

        settings['adaptive_alpha'] = np.swapaxes(gmat, 0, 1).copy()

        self._fit_settings = settings



[docs]
    def pre_fit(self, settings, *args, adaptive_region_coordinates=None):
        """
        Perform pre-fitting steps and build the fitting tree.

        Parameters
        ----------
        settings : dict
            Settings calculated via `reduction_settings` to be applied
            if necessary.
        args : n-tuple
            The call input arguments.
        adaptive_region_coordinates : numpy.ndarray (float), optional
            The coordinates determined from a previous adaptive smoothing
            algorithm.


        Returns
        -------
        None
        """
        settings['adaptive_region_coordinates'] = args
        self.calculate_adaptive_smoothing(settings)
        super().pre_fit(settings, *args)

        if adaptive_region_coordinates is not None:
            region_grid = self.grid_class(
                *adaptive_region_coordinates,
                tree_shape=self.sample_tree.shape,
                build_tree=True, scale_factor=self._radius,
                scale_offset=self._scale_offsets, dtype=np.float64)
            skip_blocks = region_grid.tree.hood_population == 0
            self.fit_tree.block_population[skip_blocks] = 0

        if settings['order_symmetry']:
            o = settings['order'][0]
        else:
            o = settings['order']

        self.fit_tree.set_order(o, fix_order=not settings['order_varies'])
        self.fit_tree.precalculate_phi_terms()



[docs]
    @classmethod
    def process_block(cls, args, block):
        r"""
        Run :func:`solve_fits` on each block.

        Utility function that parses the settings and tree objects to something
        usable by the numba JIT compiled resampling functions.  This is not
        meant to be called directly.

        Parameters
        ----------
        args : 2-tuple
            A tuple of form (filename, iteration) where the filename is a
            string pointing towards a previously saved pickle file containing
            the relevant information for the reduction if required.  If set to
            `None`, the arguments are retrieved from the
            `_global_resampling_values` global parameter.
        block : int
            The block index to process.

        Returns
        -------
        results : 9-tuple of numpy.ndarray
            The first element contains the fit point indices to be fit.  For
            the remaining elements, please see :func:`solve_fits` return
            values.
        """
        filename, iteration = args

        # Loading cannot be covered in tests as it occurs on other CPUs.
        load_args = False

        block_memory = _global_resampling_values.get('block_memory_usage')
        if not cls.sufficient_memory_for(block_memory):  # pragma: no cover
            raise MemoryError(
                f"Insufficient memory to process block {block}. "
                f"Estimated: {block_memory} bytes. "
                f"Available: {psutil.virtual_memory().available} bytes.")

        if filename is not None:  # pragma: no cover
            if 'args' not in _global_resampling_values:
                load_args = True
            elif 'iteration' not in _global_resampling_values:
                load_args = True
            elif iteration != _global_resampling_values.get('iteration'):
                load_args = True
            elif filename != _global_resampling_values.get('filename'):
                load_args = True

        if load_args:  # pragma: no cover
            _global_resampling_values['args'], _ = unpickle_file(filename)
            _global_resampling_values['iteration'] = iteration
            _global_resampling_values['filename'] = filename

        (sample_values, sample_error, sample_mask, fit_tree, sample_tree,
         get_error, get_counts, get_weights,
         get_distance_weights, get_rchi2, get_cross_derivatives,
         get_offset_variance, settings) = _global_resampling_values['args']

        if load_args:  # pragma: no cover
            block_memory = settings.get('block_memory_usage')
            if not cls.sufficient_memory_for(block_memory):
                raise MemoryError(
                    f"Insufficient memory to process {filename}. "
                    f"Estimated: {block_memory} bytes. "
                    f"Available: {psutil.virtual_memory().available} bytes.")

        fit_indices, fit_coordinates, fit_phi_terms = \
            fit_tree.block_members(block, get_locations=True, get_terms=True)

        sample_indices = nb.typed.List(sample_tree.query_radius(
            fit_coordinates, 1.0, block=block, return_distance=False))

        if not sample_tree.large_data:
            sample_phi_terms = sample_tree.phi_terms
        else:
            phi_indices = sample_tree.create_phi_terms_for(block)
            n_coeffs = sample_tree.exponents.shape[0]
            n = sample_tree.n_members
            sample_phi_terms = np.empty((n_coeffs, n), dtype=float)
            sample_phi_terms[:, phi_indices] = sample_tree.phi_terms

        return (fit_indices,
                *solve_fits(
                    sample_indices, sample_tree.coordinates,
                    sample_phi_terms,
                    sample_values, sample_error, sample_mask,
                    fit_coordinates, fit_phi_terms, settings['order'],
                    settings['alpha'], settings['adaptive_alpha'],
                    is_covar=settings['is_covar'],
                    mean_fit=settings['mean_fit'],
                    cval=settings['cval'],
                    fit_threshold=settings['fit_threshold'],
                    error_weighting=settings['error_weighting'],
                    estimate_covariance=settings['estimate_covariance'],
                    order_algorithm_idx=settings['order_algorithm_idx'],
                    order_term_indices=sample_tree.term_indices,
                    derivative_term_map=sample_tree.derivative_term_map,
                    edge_algorithm_idx=settings['edge_algorithm_idx'],
                    edge_threshold=settings['edge_threshold'],
                    minimum_points=settings['order_minimum_points'],
                    get_error=get_error, get_counts=get_counts,
                    get_weights=get_weights,
                    get_distance_weights=get_distance_weights,
                    get_rchi2=get_rchi2,
                    get_cross_derivatives=get_cross_derivatives,
                    get_offset_variance=get_offset_variance))


    def __call__(self, *args, smoothing=0.0, relative_smooth=False,
                 adaptive_threshold=None, adaptive_algorithm='scaled',
                 fit_threshold=0.0, cval=np.nan, edge_threshold=0.0,
                 edge_algorithm='distribution', order_algorithm='bounded',
                 error_weighting=True, estimate_covariance=False,
                 is_covar=False, jobs=None, use_threading=None,
                 use_processes=None, adaptive_region_coordinates=None,
                 get_error=False, get_counts=False, get_weights=False,
                 get_distance_weights=False, get_rchi2=False,
                 get_cross_derivatives=False, get_offset_variance=False,
                 **kwargs):
        """
        Resample data defined during initialization to new coordinates.

        Parameters
        ----------
        args : array_like or n-tuple of array_like
            args can take one of the following formats:

                - grid : n-tuple of array_like
                  Here `n` is the number of dimensions and each array should
                  be of shape (n_data,).  This indicates that resampling
                  should occur on a grid where the first argument
                  corresponds to the coordinates along the first dimension.
                - single point : n-tuple of float
                  `n` is the number of dimensions.  The coordinate to
                  resample onto.
                - irregular : array_like
                  An array of shape (n_dimensions, n_ndata) defining a
                  set of coordinates onto which to resample.

        smoothing : float or array_like of float, optional
            If set, weights the polynomial fitting by distance from the
            resampling coordinate to the sample coordinate.  The weighting
            factor applied is exp(-dx^2 / `smoothing`).  A value may be
            defined for each dimension by providing `smoothing` as an array of
            shape (n_dimensions,).  However, if `adaptive_threshold` > 0 for a
            certain feature, the corresponding smoothing element has a
            different meaning.  In this case, it gives the 1-sigma uncertainty
            in the coordinate position for that feature.  As a reference, for
            astronomical observations using a Gaussian beam with known FWHM,
            `smoothing` should be set to `FWHM / (2 * sqrt(2 * log(2)))`
            so long as adaptive weighting is enabled.
        relative_smooth : bool, optional
            If `True`, the supplied `smoothing` value is defined in units
            of `window`.  Otherwise, `smoothing` is in the same units as the
            supplied coordinates.
        adaptive_threshold : float or array_like (n_features,)
            If a single value is supplied, each feature will use this value.
            Otherwise, each feature should specify an adaptive threshold
            as an element of `adaptive_threshold`.  If a non-zero value is
            supplied, adaptive weighting will be enabled for that feature.
            These values define the size of the initial weighting kernel
            used during fitting.  The nominal value is 1.
        adaptive_algorithm : str, optional
            May be one of {'scaled', 'shaped', None}. `None` disables adaptive
            weighting.  The "scaled" algorithm allows the weighting kernel
            to change in size only.  The "shaped" algorithm allows the kernel
            to change in size, rotate, and stretch each primary axis.  Please
            see :func:`resample_utils.scaled_adaptive_weight_matrix` and
            :func:`resample_utils.shaped_adaptive_weight_matrix` for further
            details.
        fit_threshold : float, optional
            If nonzero, rejects a polynomial fit if it deviates by
            `|fit_threshold|` * the RMS of the samples.  If it is rejected,
            that value will be replaced by the mean (error weighted if set),
            if `fit_threshold > 0` or NaN if `fit_threshold < 0`.
        cval : float, optional
            During fitting, any fit that does not meet the `order_algorithm`
            requirement will be set to this value. This will be NaN by default.
        edge_threshold : float or array_like or float
            If set to a value > 0, edges of the fit will be masked out
            according to `edge_algorithm`. Values close to zero will result in
            a low degree of edge clipping, while values close to 1 clip edges
            to a greater extent.  The exact definition of `edge_threshold`
            depends on the algorithm.  For further details, please see
            :func:`resampling.resample_utils.check_edges`.
        edge_algorithm : str, optional
            Describes how to clip edges if edge_threshold is non-zero. The
            available algorithms are:

                - 'distribution' (default): Statistics on sample distributions
                  are calculated, and if the resampling point is > 1/threshold
                  standard deviations away from the sample mean, it will be
                  clipped.
                - 'ellipsoid': If the samples used to fit a
                  resampling point deviate from the resampling point
                  location by more than this amount, it will be clipped.
                - 'box': If the flattened 1-dimensional distribution
                  of samples center-of-mass deviates from the resampling
                  point location in any dimension, it will be clipped.
                - 'range': Over each dimension, check the distribution of
                  points is greater than edge_threshold to the "left" and
                  "right" of the resampling point.

        order_algorithm : str, optional
            The type of check to perform on whether the sample distribution
            for each resampling point is adequate to derive a polynomial fit.
            Depending on `order` and `fix_order`, if the distribution does
            not meet the criteria for `order_algorithm`, either the fit will
            be aborted, returning a value of `cval`, or the fit order will be
            reduced.  Available algorithms are:

                - 'bounded': Require that there are `order` samples in both
                  the negative and positive directions of each feature
                  from the resampling point.
                - 'counts': Require that there are (order + 1) ** n_features
                  samples within the `window` of each resampling point.
                - 'extrapolate': Attempt to fit regardless of the sample
                  distribution.

            Note that 'bounded' is the most robust mode as it ensures
            that no singular values will be encountered during the
            least-squares fitting of polynomial coefficients.
        error_weighting : bool, optional
            If `True` (default), weight polynomial fitting by the `error`
            values of each sample.
        estimate_covariance : bool, optional
            If `True`, calculate errors on the fit using
            :func:`resample_utils.estimated_covariance_matrix_inverse`.
            Otherwise, use :func:`resample_utils.covariance_matrix_inverse`.
        is_covar : bool, optional
            If True, the input data is treated as a covariance instead of
            a flux, and is propagated as if through a weighted mean.
        jobs : int, optional
            Specifies the maximum number of concurrently running jobs.  An
            attempt will be made to parallel process using a thread-pool if
            available, but will otherwise revert to the "loky" backend.
            Values of 0 or 1 will result in serial processing.  A negative
            value sets jobs to `n_cpus + 1 + jobs` such that -1 would use
            all cpus, and -2 would use all but one cpu.
        use_threading : bool, optional
            If `True`, force use of threads during multiprocessing.
        use_processes : bool, optional
            If `True`, force use of sub-processes during multiprocessing.
        get_error : bool, optional
            If `True`, If True returns the error which is given as the weighted
            RMS of the samples used for each resampling point.
        get_counts : bool, optional
            If `True` returns the number of samples used to fit each resampling
            point.
        get_weights : bool, optional
            If `True`, returns the sum of all sample weights (error and
            distance) used in the fit at each resampling point.
        get_distance_weights : bool, optional
            If `True`, returns the sum of all sample distance weights (not
            including error) used in the fit at each resampling point.
        get_rchi2 : bool, optional
            If `True`, returns the reduced chi-squared statistic of the fit
            at each resampling point.  Note that this is only valid if errors
            were supplied during :func:`ResamplePolynomial.__init__`.
        get_cross_derivatives : bool, optional
            If `True`, returns the derivative mean-squared-cross-product of
            the fit derivatives at each resampling point.
        get_offset_variance : bool, optional
            If `True`, returns the offset of the resampling point from the
            center of the sample distribution used to generate the fit as
            a variance.  i.e., a return value of 9 indicates a 3-sigma
            deviation of the resampling point from the sample distribution.

        Returns
        -------
        fit_data, [fit_error], [fit_counts]
            The data fit at `args` and optionally, the error associated with
            the fit, and the number of samples used to generate each point.
            Will be of a shape determined by `args`.  `fit_data` and
            `fit_error` will be of type np.float64, and `fit_counts` will
            be of type np.int64.
        """
        return super().__call__(
            *args,
            smoothing=smoothing,
            relative_smooth=relative_smooth,
            adaptive_threshold=adaptive_threshold,
            adaptive_algorithm=adaptive_algorithm,
            fit_threshold=fit_threshold,
            cval=cval,
            edge_threshold=edge_threshold,
            edge_algorithm=edge_algorithm,
            order_algorithm=order_algorithm,
            error_weighting=error_weighting,
            estimate_covariance=estimate_covariance,
            is_covar=is_covar,
            jobs=jobs,
            adaptive_region_coordinates=adaptive_region_coordinates,
            use_threading=use_threading,
            use_processes=use_processes,
            get_error=get_error,
            get_counts=get_counts,
            get_weights=get_weights,
            get_distance_weights=get_distance_weights,
            get_rchi2=get_rchi2,
            get_cross_derivatives=get_cross_derivatives,
            get_offset_variance=get_offset_variance,
            **kwargs)




[docs]
def resamp(coordinates, data, *locations,
           error=None, mask=None, window=None,
           order=1, fix_order=True,
           robust=None, negthresh=None,
           window_estimate_bins=10,
           window_estimate_percentile=50,
           window_estimate_oversample=2.0,
           leaf_size=40,
           large_data=False,
           smoothing=0.0, relative_smooth=False,
           adaptive_threshold=None, adaptive_algorithm='scaled',
           fit_threshold=0.0, cval=np.nan, edge_threshold=0.0,
           edge_algorithm='distribution', order_algorithm='bounded',
           error_weighting=True, estimate_covariance=False,
           is_covar=False, jobs=None,
           get_error=False, get_counts=False, get_weights=False,
           get_distance_weights=False, get_rchi2=False,
           get_cross_derivatives=False, get_offset_variance=False,
           **distance_kwargs):
    r"""
    ResamplePolynomial data using local polynomial fitting.

    Initializes and then calls the :class:`ResamplePolynomial` class.  For
    further details on all available parameters, please see
    :func:`ResamplePolynomial.__init__` and
    :func:`ResamplePolynomial.__call__`.

    Parameters
    ----------
    coordinates
    data
    locations
    error
    mask
    window
    order
    fix_order
    robust
    negthresh
    window_estimate_bins
    window_estimate_percentile
    window_estimate_oversample
    leaf_size
    large_data
    smoothing
    relative_smooth
    adaptive_threshold
    adaptive_algorithm
    fit_threshold
    cval
    edge_threshold
    edge_algorithm
    order_algorithm
    error_weighting
    estimate_covariance
    is_covar
    jobs
    get_error
    get_counts
    get_weights
    get_distance_weights
    get_rchi2
    get_cross_derivatives
    get_offset_variance
    distance_kwargs

    Returns
    -------
    results : float or numpy.ndarray or n-tuple of (float or numpy.ndarray)
        If a fit is performed at a single location, the output will consist
        of int or float scalar values.  Multiple fits result in numpy arrays.
        The exact output shape depends on the number of data sets, number of
        fitted points, dimensions of the fit locations.  Assuming that all
        get_* keywords are set to `True`, the output order is:

            results[0] = fitted values
            results[1] = error on the fit
            results[2] = sample counts for each fit
            results[3] = total weight of all samples in fit
            results[4] = total distance weight sum of all samples in fit
            results[5] = reduced chi-squared statistic of the fit
            results[6] = derivative mean squared cross products
            results[7] = offset variance of fit from sample distribution
    """

    resampler = ResamplePolynomial(
        coordinates, data,
        error=error, mask=mask, window=window,
        order=order, fix_order=fix_order,
        robust=robust, negthresh=negthresh,
        window_estimate_bins=window_estimate_bins,
        window_estimate_percentile=window_estimate_percentile,
        window_estimate_oversample=window_estimate_oversample,
        leaf_size=leaf_size, large_data=large_data,
        **distance_kwargs)

    return resampler(*locations,
                     smoothing=smoothing,
                     relative_smooth=relative_smooth,
                     adaptive_threshold=adaptive_threshold,
                     adaptive_algorithm=adaptive_algorithm,
                     fit_threshold=fit_threshold,
                     cval=cval,
                     edge_threshold=edge_threshold,
                     edge_algorithm=edge_algorithm,
                     order_algorithm=order_algorithm,
                     error_weighting=error_weighting,
                     estimate_covariance=estimate_covariance,
                     is_covar=is_covar,
                     jobs=jobs,
                     get_error=get_error,
                     get_counts=get_counts,
                     get_weights=get_weights,
                     get_distance_weights=get_distance_weights,
                     get_rchi2=get_rchi2,
                     get_cross_derivatives=get_cross_derivatives,
                     get_offset_variance=get_offset_variance)