Source code for grig.resample_polynomial

# Licensed under a 3-clause BSD style license - see LICENSE.rst

import numba as nb
import numpy as np
import psutil

from grig.resample_utils import (
    scale_coordinates,
    shaped_adaptive_weight_matrices,
    scaled_adaptive_weight_matrices,
    relative_density, solve_fits)
from grig.resample_base import (
    ResampleBase, _global_resampling_values)
from grig.toolkit.multiproc import unpickle_file


__all__ = ['ResamplePolynomial', 'resamp']


[docs] class ResamplePolynomial(ResampleBase): def __init__(self, coordinates, data, error=None, mask=None, window=None, robust=None, negthresh=None, window_estimate_bins=10, window_estimate_percentile=50, window_estimate_oversample=2.0, leaf_size=40, order=1, fix_order=True, large_data=None, check_memory=True, memory_buffer=0.25, **distance_kwargs): """ Class to resample data using local polynomial fits. Parameters ---------- coordinates : array_like of float (n_features, n_samples) array of independent values. A local internal copy will be created if it is not a numpy.float64 type. data : array_like of float (n_sets, n_samples) or (n_samples,) array of dependent values. multiple (n_sets) sets of data are supplied, then n_sets solutions will be calculated at each resampling point. error : array_like of float, optional (n_sets, n_samples) or (n_samples,) array of error (1-sigma) values associated with the `data` array. `error` will be used to weight fits, and be propagated to the output error values. If not supplied, the error may still be calculated from residuals to the fit during :func:`ResamplePolynomial.__call__`. mask : array_like of bool, optional (n_sets, n_data) or (n_data,) array of bool where `True` indicates a valid data point that can be included the fitting and `False` indicates data points that should be excluded from the fit. Masked points will be reflected in the output counts array. If not supplied, all samples are considered valid. window : array_like or float or int, optional (n_features,) array or single value specifying the maximum distance (distance definition is handled by `distance_kwargs`) of a data sample from a resampling point such that it can be included in a local fit. `window` may be declared for each feature. For example, when fitting 2-dimensional (x, y) data, a window of 1.0 would create a circular fitting window around each resampling point, whereas a window of (1.0, 0.5) would create an elliptical fitting window with a semi-major axis of 1.0 in x and semi-minor axis of 0.5 in y. If not supplied, `window` is estimated via :func:`ResamplePolynomial.estimate_feature_windows`. order : array_like or int, optional (n_features,) array or single integer value specifying the polynomial fit order for each feature. fix_order : bool, optional In order for local polynomial fitting to occur, the basic requirement is that n_samples >= (order + 1) ** n_features, where n_samples is the number of data samples within `window`. If `fix_order` is True and this condition is not met, then local fitting will be aborted for that point, and a value of `cval` will be returned instead. If `fix_order` is False, then `order` will be reduced to the maximum value where this condition can be met. NOTE: this is only available if `order` is symmetrical. i.e. it was passed in as a single integer to be applied across all features. Otherwise, it is unclear as to which feature order should be reduced to meet the condition. robust : float, optional Specifies an outlier rejection threshold for `data`. A data point is identified as an outlier if abs(x_i - x_med)/MAD > robust, where x_med is the median, and MAD is the Median Absolute Deviation defined as 1.482 * median(abs(x_i - x_med)). negthresh : float, optional Specifies a negative value rejection threshold such that data < (-stddev(data) * negthresh) will be excluded from the fit. window_estimate_bins : int, optional Used to estimate the `window` if not supplied using :func:`ResamplePolynomial.estimate_feature_windows`. window_estimate_percentile : int or float, optional Used to estimate the `window` if not supplied using :func:`ResamplePolynomial.estimate_feature_windows`. window_estimate_oversample : int or float, optional Used to estimate the `window` if not supplied using :func:`ResamplePolynomial.estimate_feature_windows`. leaf_size : int, optional Number of points at which to switch to brute-force during the ball tree query algorithm. See `sklearn.neighbours.BallTree` for further details. large_data : bool, optional If `True`, indicates that this resampling algorithm will run on a large set of data, and the ball tree should be created on subsets of the data. check_memory : bool, optional If `True`, check the memory requirements for resampling the supplied data. memory_buffer : float, optional A fraction (positive or negative) with which to modify the memory estimates for the process memory requirements. Memory estimates are scaled by the factor 1 + memory_buffer. distance_kwargs : dict, optional Optional keyword arguments passed into :func:`sklearn.neighbors.DistanceMetric`. The default is to use the "minkowski" definition with `p=2`, i.e., the Euclidean definition. This is important in determining which samples lie inside the window region of a resampling point, and when deriving distance weighting factors. Raises ------ ValueError : Invalid inputs to __init__ or __call__ """ self._order = order self._fix_order = fix_order super().__init__(coordinates, data, error=error, mask=mask, window=window, robust=robust, negthresh=negthresh, window_estimate_bins=window_estimate_bins, window_estimate_percentile=window_estimate_percentile, window_estimate_oversample=window_estimate_oversample, leaf_size=leaf_size, large_data=large_data, check_memory=check_memory, memory_buffer=memory_buffer, **distance_kwargs) @property def order(self): """ Return the order of polynomial fit. Returns ------- order : int or numpy.ndarray (int) A symmetrical order, or the order for each feature. """ if self.sample_tree is None: return self._order return self.sample_tree.order @property def fit_tree(self): """ Return the fitting tree representative of points to fit. Returns ------- PolynomialTree """ return super().fit_tree
[docs] def set_sample_tree(self, coordinates, radius=None, window_estimate_bins=10, window_estimate_percentile=50, window_estimate_oversample=2.0, leaf_size=40, large_data=None, check_memory=True, memory_buffer=0.25, **distance_kwargs): """ Build the sample tree from input coordinates. Parameters ---------- coordinates : numpy.ndarray (float) The input coordinates of shape (n_features, n_samples). radius : float or sequence (float), optional The radius of the window around each fitting point used to determine sample selection for fit. If not supplied, will be estimated using :func:`ResamplePolynomial.estimate_feature_windows`. window_estimate_bins : int, optional Used to estimate the `window` if not supplied using :func:`ResamplePolynomial.estimate_feature_windows`. window_estimate_percentile : int or float, optional Used to estimate the `window` if not supplied using :func:`ResamplePolynomial.estimate_feature_windows`. window_estimate_oversample : int or float, optional Used to estimate the `window` if not supplied using :func:`ResamplePolynomial.estimate_feature_windows`. leaf_size : int, optional Number of points at which to switch to brute-force during the ball tree query algorithm. See `sklearn.neighbours.BallTree` for further details. large_data : bool, optional If `True`, indicates that this resampling algorithm will run on a large set of data, and the ball tree should be created on subsets of the data. check_memory : bool, optional If `True`, check the memory requirements for resampling the supplied data. memory_buffer : float, optional A fraction (positive or negative) with which to modify the memory estimates for the process memory requirements. Memory estimates are scaled by the factor 1 + memory_buffer. distance_kwargs : dict, optional Optional keyword arguments passed into :func:`sklearn.neighbors.DistanceMetric`. The default is to use the "minkowski" definition with `p=2`, i.e., the Euclidean definition. This is important in determining which samples lie inside the window region of a resampling point, and when deriving distance weighting factors. Returns ------- None """ self._order = self.check_order(self._order, self._n_features, self._n_samples) memory_kwargs = {'order': self._order} super().set_sample_tree( coordinates, radius=radius, window_estimate_bins=window_estimate_bins, window_estimate_percentile=window_estimate_percentile, window_estimate_oversample=window_estimate_oversample, leaf_size=leaf_size, large_data=large_data, check_memory=check_memory, memory_buffer=memory_buffer, memory_kwargs=memory_kwargs, **distance_kwargs) self.sample_tree.set_order(self._order, fix_order=self._fix_order) self.sample_tree.precalculate_phi_terms()
[docs] @staticmethod def check_order(order, n_features, n_samples): r""" Check the order is of correct format and enough samples exist. Parameters ---------- order : int or array_like of int (n_features,) The polynomial order to fit, either supplied as an integer to be applied over all dimensions, or as an array to give the order in each dimension. n_features : int The number of features (dimensions) of the sample coordinates. n_samples : int The number of samples. Returns ------- order : int or numpy.ndarray of int (n_features,) The formatted polynomial order. Raises ------ ValueError If there are too few samples for the desired order, or `order` is not formatted correctly. """ order = np.asarray(order, dtype=np.int64) if order.ndim > 1: raise ValueError( "Order should be a scalar or 1-D array") elif order.ndim == 1 and order.size != n_features: raise ValueError( "Order vector does not match number of features") if order.ndim == 0: min_points = (order + 1) ** n_features else: min_points = np.product(order + 1) if n_samples < min_points: raise ValueError("Too few data samples for order") return order
[docs] def calculate_minimum_points(self): """ Return the minimum number of points for a polynomial fit. Returns ------- minimum_points : int The minimum number of points for a polynomial fit """ o = np.asarray(self.order) if o.shape == (): o = np.full(self.features, int(o)) return np.product(o + 1)
[docs] def reduction_settings(self, smoothing=0.0, relative_smooth=False, adaptive_threshold=None, adaptive_algorithm='scaled', error_weighting=True, fit_threshold=0.0, cval=np.nan, edge_threshold=0.0, edge_algorithm='distribution', order_algorithm='bounded', is_covar=False, estimate_covariance=False, jobs=None, adaptive_region_coordinates=None, use_threading=None, use_processes=None): r""" Define a set of reduction instructions based on user input. This method is responsible for determining, formatting, and checking a number variables required for the resampling algorithm based on user input. For detailed descriptions of user options, please see :func:`ResamplePolynomial.__call__`. Parameters ---------- smoothing : float or array_like (n_features,), optional relative_smooth : bool, optional adaptive_threshold : float or array_like (n_features,), optional adaptive_algorithm : str, optional error_weighting : bool, optional fit_threshold : float, optional cval : float, optional edge_threshold : float or array_like (n_features,), optional edge_algorithm : str, optional order_algorithm : str, optional is_covar : bool, optional estimate_covariance : bool, optional jobs : int, optional adaptive_region_coordinates : array_like, optional use_threading : bool, optional If `True`, force use of threads during multiprocessing. use_processes : bool, optional If `True`, force use of sub-processes during multiprocessing. Returns ------- settings : dict The reduction settings. Also, stored as :func:`ResamplePolynomial.fit_settings`. """ settings = super().reduction_settings( error_weighting=error_weighting, fit_threshold=fit_threshold, cval=cval, edge_threshold=edge_threshold, edge_algorithm=edge_algorithm, jobs=jobs, use_threading=use_threading, use_processes=use_processes) settings['estimate_covariance'] = estimate_covariance adaptive_algorithm = str(adaptive_algorithm).lower().strip() if (adaptive_algorithm != 'none' and adaptive_threshold is not None and not np.allclose(adaptive_threshold, 0)): if not error_weighting: raise ValueError("Error weighting must be enabled for " "adaptive smoothing") if self.sample_tree.order is None or np.allclose( self.sample_tree.order, 0): raise ValueError("Adaptive smoothing cannot be applied for " "polynomial fit of zero order.") else: adaptive = True adaptive_algorithm = str(adaptive_algorithm).strip().lower() if adaptive_algorithm == 'shaped': # only relevant in 2+ dimensions shaped = self.features > 1 elif adaptive_algorithm == 'scaled': shaped = False else: raise ValueError("Adaptive algorithm must be one of " "{None, 'shaped', 'scaled'}.") else: adaptive = False shaped = False adaptive_threshold = 0.0 distance_weighting = smoothing is not None or adaptive error_weighting = adaptive or error_weighting if adaptive and not self._error_valid: raise ValueError( "Errors must be provided for adaptive smoothing") else: error_weighting &= self._error_valid if distance_weighting: # In the case that adaptive smoothing is set, but no distance # weighting is selected. Smoothing of 1/3 is reasonable. if smoothing is None: smoothing = np.full(self.features, 1 / 3 if relative_smooth else self.window / 3) alpha = np.atleast_1d(np.asarray(smoothing, dtype=np.float64)) if alpha.size not in [1, self.features]: raise ValueError( "Smoothing size does not match number of features") if adaptive: adaptive_threshold = np.atleast_1d( np.asarray(adaptive_threshold, dtype=np.float64)) if alpha.size != self.features: alpha = np.full(self.features, alpha[0]) if adaptive_threshold.size not in [1, self.features]: raise ValueError("Adaptive smoothing size does not " "match number of features") elif adaptive_threshold.size != self.features: adaptive_threshold = np.full( self.features, adaptive_threshold[0]) if not relative_smooth: if alpha.size != self.features: # alpha size = 1 alpha = np.full(self.features, alpha[0]) if not adaptive: alpha = 2 * (alpha ** 2) / (self.window ** 2) else: alpha /= self.window # sigma in terms of window if not adaptive and alpha.size == 1 or np.unique(alpha).size == 1: # Symmetrical across dimensions - use single value alpha = np.atleast_1d(np.float64(alpha[0])) else: alpha = np.asarray([0.0]) order = self.sample_tree.order order_varies = self.sample_tree.order_varies order_symmetry = self.sample_tree.order_symmetry order_algorithm = str(order_algorithm).lower().strip() order_func_lookup = { 'none': 0, 'bounded': 1, 'extrapolate': 2, 'counts': 3 } if order_algorithm not in order_func_lookup: raise ValueError(f"Unknown order algorithm: {order_algorithm}") order_algorithm_idx = order_func_lookup[order_algorithm] if order_symmetry: order_minimum_points = (order + 1) ** self.features else: order_minimum_points = np.prod(order + 1) if is_covar: mean_fit = True else: mean_fit = order_symmetry and order == 0 if adaptive: region_coordinates = adaptive_region_coordinates else: region_coordinates = None settings['distance_weighting'] = distance_weighting settings['alpha'] = alpha settings['adaptive_threshold'] = adaptive_threshold settings['shaped'] = shaped settings['adaptive_alpha'] = np.empty((0, 0, 0, 0), dtype=np.float64) settings['order'] = np.atleast_1d(order) settings['order_varies'] = order_varies settings['order_algorithm'] = order_algorithm settings['order_algorithm_idx'] = order_algorithm_idx settings['order_symmetry'] = order_symmetry settings['order_minimum_points'] = order_minimum_points settings['is_covar'] = is_covar settings['mean_fit'] = mean_fit settings['relative_smooth'] = relative_smooth settings['adaptive_region_coordinates'] = region_coordinates self._fit_settings = settings return settings
[docs] def calculate_adaptive_smoothing(self, settings): r""" Calculate the adaptive distance weighting kernel. Calculates a weighting kernel for each sample in the reduction. i.e, each sample will have a different weighting factor based on its distance (and optionally, direction) from each point at which a fit is required. This is done by performing a fit centered on each sample, and measuring how that fit deviates from the actual observed sample value. The first step is to decide upon the distance weighting factor used to perform the initial fit. It is assumed that the coordinate error is known (or approximately known), and supplied as a :math:`1 \sigma` error as the 'alpha' keyword in `settings`. For example, the error in astronomical observations taken using a Gaussian beam with known FWHM is: .. math:: \sigma = \frac{\text{FWHM}}{2 \sqrt{2 \ln{2}}} However, adaptive weighting may be calculated over select dimensions, leaving some fixed. In this case, the `alpha` keyword will always apply a weighting factor in a "fixed" dimension :math:`k` as: .. math:: w_k = exp \left( \frac{-\Delta x_k^2}{\alpha_{fixed, k} \Omega_k^2} \right) where :math:`\Delta x_k` is the distance from the resampling point to the sample in question, and :math:`\Omega_k` is the window principle axis in dimension :math:`k`. Note that in the weighting function, :math:`\sigma` is related to :math:`\alpha` by: .. math:: \alpha_k = 2 \sigma_k^2 and weighting is applied using the definition of :math:`w_k` above. To signify that :math:`\alpha_{fixed, k}` should be used instead of :math:`\sigma` for dimension :math:`k`, the "adaptive_threshold" array in `settings` should be set to zero for the corresponding dimension. For example, if we had:: settings['alpha'] = [0.3, 0.3] settings['adaptive_threshold'] = [1, 0] the first dimension would have :math:`\alpha_0=0.18` (:math:`2 \times 0.3^2`), and the second would have :math:`\alpha_1=0.3`. In this example, :math:`\alpha_0` would be allowed to vary per sample, while :math:`\alpha_1` would be fixed for each sample at 0.3. An initial fit is then performed at each sample coordinate using a test distance weighting parameter: .. math:: \sigma_{test, k} = \frac{\pi \sigma_k}{\sqrt{2 ln{2}}} \alpha_{test, k} = 2 \sigma_{test, k}^2 Using this initial weighting parameter, the adaptive kernels are derived using either :func:`resample_utils.shaped_adaptive_weight_matrix` or :func:`resample_utils.scaled_adaptive_weight_matrix` depending on whether the "shaped" keyword in `settings` is set to `True` or `False` respectively. The weighting kernels are stored in the "adaptive_alpha" keyword value in `settings`. Parameters ---------- settings : dict Reduction settings, as returned by :func:`ResamplePolynomial.reduction_settings`. Returns ------- None """ adaptive = settings['adaptive_threshold'] do_adaptive = adaptive is not None if do_adaptive: adaptive = np.atleast_1d(adaptive).astype(float) if adaptive.size == 0 or np.allclose(adaptive, 0): do_adaptive = False if not do_adaptive: settings['adaptive_threshold'] = None settings['adaptive_alpha'] = np.empty((0, 0, 0, 0)) return sigma = np.atleast_1d(settings['alpha']) if sigma.size != self.features: sigma = np.full(self.features, sigma[0]) fixed = adaptive == 0 nyquist = (np.pi / np.sqrt(2 * np.log(2))) test_sigma = sigma * nyquist test_sigma[fixed] = sigma[fixed].copy() scaled_test_sigma = test_sigma.copy() scaled_test_sigma[~fixed] *= adaptive[~fixed] scaled_test_alpha = 2 * (scaled_test_sigma ** 2) if settings['relative_smooth']: scaled_test_alpha[fixed] = sigma[fixed].copy() else: scaled_test_alpha[fixed] = 2 * (sigma[fixed] ** 2) shaped = settings['shaped'] test_reduction = self.__call__( scale_coordinates(self.sample_tree.coordinates.copy(), self._radius, self._scale_offsets, reverse=True), smoothing=scaled_test_alpha, relative_smooth=True, adaptive_threshold=None, fit_threshold=0.0, # No fit checking cval=np.nan, # NaN on failure edge_threshold=0.0, # No edge checking error_weighting=settings['error_weighting'], order_algorithm=settings['order_algorithm'], estimate_covariance=settings['estimate_covariance'], get_error=False, get_counts=shaped, get_weights=False, get_distance_weights=shaped, get_rchi2=True, get_cross_derivatives=shaped, get_offset_variance=shaped, is_covar=False, jobs=settings['jobs'], adaptive_region_coordinates=settings[ 'adaptive_region_coordinates']) if shaped: counts = np.atleast_2d(test_reduction[1]) distance_weights = np.atleast_2d(test_reduction[2]) rchi2 = np.atleast_2d(test_reduction[3]) gradient_mscp = test_reduction[4] distribution_offset_variance = test_reduction[5] if gradient_mscp.ndim == 3: gradient_mscp = gradient_mscp[None] rho = relative_density(scaled_test_sigma, counts, distance_weights) else: rchi2 = np.atleast_2d(test_reduction[1]) gradient_mscp = None rho = None distribution_offset_variance = None # Here the Nyquist level sigma is used, thereby implementing the # requested scaling "adaptive" factor. if shaped: gmat = shaped_adaptive_weight_matrices( test_sigma, rchi2, gradient_mscp, density=rho, variance_offsets=distribution_offset_variance, fixed=fixed) else: gmat = scaled_adaptive_weight_matrices( test_sigma, rchi2, fixed=fixed) settings['adaptive_alpha'] = np.swapaxes(gmat, 0, 1).copy() self._fit_settings = settings
[docs] def pre_fit(self, settings, *args, adaptive_region_coordinates=None): """ Perform pre-fitting steps and build the fitting tree. Parameters ---------- settings : dict Settings calculated via `reduction_settings` to be applied if necessary. args : n-tuple The call input arguments. adaptive_region_coordinates : numpy.ndarray (float), optional The coordinates determined from a previous adaptive smoothing algorithm. Returns ------- None """ settings['adaptive_region_coordinates'] = args self.calculate_adaptive_smoothing(settings) super().pre_fit(settings, *args) if adaptive_region_coordinates is not None: region_grid = self.grid_class( *adaptive_region_coordinates, tree_shape=self.sample_tree.shape, build_tree=True, scale_factor=self._radius, scale_offset=self._scale_offsets, dtype=np.float64) skip_blocks = region_grid.tree.hood_population == 0 self.fit_tree.block_population[skip_blocks] = 0 if settings['order_symmetry']: o = settings['order'][0] else: o = settings['order'] self.fit_tree.set_order(o, fix_order=not settings['order_varies']) self.fit_tree.precalculate_phi_terms()
[docs] @classmethod def process_block(cls, args, block): r""" Run :func:`solve_fits` on each block. Utility function that parses the settings and tree objects to something usable by the numba JIT compiled resampling functions. This is not meant to be called directly. Parameters ---------- args : 2-tuple A tuple of form (filename, iteration) where the filename is a string pointing towards a previously saved pickle file containing the relevant information for the reduction if required. If set to `None`, the arguments are retrieved from the `_global_resampling_values` global parameter. block : int The block index to process. Returns ------- results : 9-tuple of numpy.ndarray The first element contains the fit point indices to be fit. For the remaining elements, please see :func:`solve_fits` return values. """ filename, iteration = args # Loading cannot be covered in tests as it occurs on other CPUs. load_args = False block_memory = _global_resampling_values.get('block_memory_usage') if not cls.sufficient_memory_for(block_memory): # pragma: no cover raise MemoryError( f"Insufficient memory to process block {block}. " f"Estimated: {block_memory} bytes. " f"Available: {psutil.virtual_memory().available} bytes.") if filename is not None: # pragma: no cover if 'args' not in _global_resampling_values: load_args = True elif 'iteration' not in _global_resampling_values: load_args = True elif iteration != _global_resampling_values.get('iteration'): load_args = True elif filename != _global_resampling_values.get('filename'): load_args = True if load_args: # pragma: no cover _global_resampling_values['args'], _ = unpickle_file(filename) _global_resampling_values['iteration'] = iteration _global_resampling_values['filename'] = filename (sample_values, sample_error, sample_mask, fit_tree, sample_tree, get_error, get_counts, get_weights, get_distance_weights, get_rchi2, get_cross_derivatives, get_offset_variance, settings) = _global_resampling_values['args'] if load_args: # pragma: no cover block_memory = settings.get('block_memory_usage') if not cls.sufficient_memory_for(block_memory): raise MemoryError( f"Insufficient memory to process {filename}. " f"Estimated: {block_memory} bytes. " f"Available: {psutil.virtual_memory().available} bytes.") fit_indices, fit_coordinates, fit_phi_terms = \ fit_tree.block_members(block, get_locations=True, get_terms=True) sample_indices = nb.typed.List(sample_tree.query_radius( fit_coordinates, 1.0, block=block, return_distance=False)) if not sample_tree.large_data: sample_phi_terms = sample_tree.phi_terms else: phi_indices = sample_tree.create_phi_terms_for(block) n_coeffs = sample_tree.exponents.shape[0] n = sample_tree.n_members sample_phi_terms = np.empty((n_coeffs, n), dtype=float) sample_phi_terms[:, phi_indices] = sample_tree.phi_terms return (fit_indices, *solve_fits( sample_indices, sample_tree.coordinates, sample_phi_terms, sample_values, sample_error, sample_mask, fit_coordinates, fit_phi_terms, settings['order'], settings['alpha'], settings['adaptive_alpha'], is_covar=settings['is_covar'], mean_fit=settings['mean_fit'], cval=settings['cval'], fit_threshold=settings['fit_threshold'], error_weighting=settings['error_weighting'], estimate_covariance=settings['estimate_covariance'], order_algorithm_idx=settings['order_algorithm_idx'], order_term_indices=sample_tree.term_indices, derivative_term_map=sample_tree.derivative_term_map, edge_algorithm_idx=settings['edge_algorithm_idx'], edge_threshold=settings['edge_threshold'], minimum_points=settings['order_minimum_points'], get_error=get_error, get_counts=get_counts, get_weights=get_weights, get_distance_weights=get_distance_weights, get_rchi2=get_rchi2, get_cross_derivatives=get_cross_derivatives, get_offset_variance=get_offset_variance))
def __call__(self, *args, smoothing=0.0, relative_smooth=False, adaptive_threshold=None, adaptive_algorithm='scaled', fit_threshold=0.0, cval=np.nan, edge_threshold=0.0, edge_algorithm='distribution', order_algorithm='bounded', error_weighting=True, estimate_covariance=False, is_covar=False, jobs=None, use_threading=None, use_processes=None, adaptive_region_coordinates=None, get_error=False, get_counts=False, get_weights=False, get_distance_weights=False, get_rchi2=False, get_cross_derivatives=False, get_offset_variance=False, **kwargs): """ Resample data defined during initialization to new coordinates. Parameters ---------- args : array_like or n-tuple of array_like args can take one of the following formats: - grid : n-tuple of array_like Here `n` is the number of dimensions and each array should be of shape (n_data,). This indicates that resampling should occur on a grid where the first argument corresponds to the coordinates along the first dimension. - single point : n-tuple of float `n` is the number of dimensions. The coordinate to resample onto. - irregular : array_like An array of shape (n_dimensions, n_ndata) defining a set of coordinates onto which to resample. smoothing : float or array_like of float, optional If set, weights the polynomial fitting by distance from the resampling coordinate to the sample coordinate. The weighting factor applied is exp(-dx^2 / `smoothing`). A value may be defined for each dimension by providing `smoothing` as an array of shape (n_dimensions,). However, if `adaptive_threshold` > 0 for a certain feature, the corresponding smoothing element has a different meaning. In this case, it gives the 1-sigma uncertainty in the coordinate position for that feature. As a reference, for astronomical observations using a Gaussian beam with known FWHM, `smoothing` should be set to `FWHM / (2 * sqrt(2 * log(2)))` so long as adaptive weighting is enabled. relative_smooth : bool, optional If `True`, the supplied `smoothing` value is defined in units of `window`. Otherwise, `smoothing` is in the same units as the supplied coordinates. adaptive_threshold : float or array_like (n_features,) If a single value is supplied, each feature will use this value. Otherwise, each feature should specify an adaptive threshold as an element of `adaptive_threshold`. If a non-zero value is supplied, adaptive weighting will be enabled for that feature. These values define the size of the initial weighting kernel used during fitting. The nominal value is 1. adaptive_algorithm : str, optional May be one of {'scaled', 'shaped', None}. `None` disables adaptive weighting. The "scaled" algorithm allows the weighting kernel to change in size only. The "shaped" algorithm allows the kernel to change in size, rotate, and stretch each primary axis. Please see :func:`resample_utils.scaled_adaptive_weight_matrix` and :func:`resample_utils.shaped_adaptive_weight_matrix` for further details. fit_threshold : float, optional If nonzero, rejects a polynomial fit if it deviates by `|fit_threshold|` * the RMS of the samples. If it is rejected, that value will be replaced by the mean (error weighted if set), if `fit_threshold > 0` or NaN if `fit_threshold < 0`. cval : float, optional During fitting, any fit that does not meet the `order_algorithm` requirement will be set to this value. This will be NaN by default. edge_threshold : float or array_like or float If set to a value > 0, edges of the fit will be masked out according to `edge_algorithm`. Values close to zero will result in a low degree of edge clipping, while values close to 1 clip edges to a greater extent. The exact definition of `edge_threshold` depends on the algorithm. For further details, please see :func:`resampling.resample_utils.check_edges`. edge_algorithm : str, optional Describes how to clip edges if edge_threshold is non-zero. The available algorithms are: - 'distribution' (default): Statistics on sample distributions are calculated, and if the resampling point is > 1/threshold standard deviations away from the sample mean, it will be clipped. - 'ellipsoid': If the samples used to fit a resampling point deviate from the resampling point location by more than this amount, it will be clipped. - 'box': If the flattened 1-dimensional distribution of samples center-of-mass deviates from the resampling point location in any dimension, it will be clipped. - 'range': Over each dimension, check the distribution of points is greater than edge_threshold to the "left" and "right" of the resampling point. order_algorithm : str, optional The type of check to perform on whether the sample distribution for each resampling point is adequate to derive a polynomial fit. Depending on `order` and `fix_order`, if the distribution does not meet the criteria for `order_algorithm`, either the fit will be aborted, returning a value of `cval`, or the fit order will be reduced. Available algorithms are: - 'bounded': Require that there are `order` samples in both the negative and positive directions of each feature from the resampling point. - 'counts': Require that there are (order + 1) ** n_features samples within the `window` of each resampling point. - 'extrapolate': Attempt to fit regardless of the sample distribution. Note that 'bounded' is the most robust mode as it ensures that no singular values will be encountered during the least-squares fitting of polynomial coefficients. error_weighting : bool, optional If `True` (default), weight polynomial fitting by the `error` values of each sample. estimate_covariance : bool, optional If `True`, calculate errors on the fit using :func:`resample_utils.estimated_covariance_matrix_inverse`. Otherwise, use :func:`resample_utils.covariance_matrix_inverse`. is_covar : bool, optional If True, the input data is treated as a covariance instead of a flux, and is propagated as if through a weighted mean. jobs : int, optional Specifies the maximum number of concurrently running jobs. An attempt will be made to parallel process using a thread-pool if available, but will otherwise revert to the "loky" backend. Values of 0 or 1 will result in serial processing. A negative value sets jobs to `n_cpus + 1 + jobs` such that -1 would use all cpus, and -2 would use all but one cpu. use_threading : bool, optional If `True`, force use of threads during multiprocessing. use_processes : bool, optional If `True`, force use of sub-processes during multiprocessing. get_error : bool, optional If `True`, If True returns the error which is given as the weighted RMS of the samples used for each resampling point. get_counts : bool, optional If `True` returns the number of samples used to fit each resampling point. get_weights : bool, optional If `True`, returns the sum of all sample weights (error and distance) used in the fit at each resampling point. get_distance_weights : bool, optional If `True`, returns the sum of all sample distance weights (not including error) used in the fit at each resampling point. get_rchi2 : bool, optional If `True`, returns the reduced chi-squared statistic of the fit at each resampling point. Note that this is only valid if errors were supplied during :func:`ResamplePolynomial.__init__`. get_cross_derivatives : bool, optional If `True`, returns the derivative mean-squared-cross-product of the fit derivatives at each resampling point. get_offset_variance : bool, optional If `True`, returns the offset of the resampling point from the center of the sample distribution used to generate the fit as a variance. i.e., a return value of 9 indicates a 3-sigma deviation of the resampling point from the sample distribution. Returns ------- fit_data, [fit_error], [fit_counts] The data fit at `args` and optionally, the error associated with the fit, and the number of samples used to generate each point. Will be of a shape determined by `args`. `fit_data` and `fit_error` will be of type np.float64, and `fit_counts` will be of type np.int64. """ return super().__call__( *args, smoothing=smoothing, relative_smooth=relative_smooth, adaptive_threshold=adaptive_threshold, adaptive_algorithm=adaptive_algorithm, fit_threshold=fit_threshold, cval=cval, edge_threshold=edge_threshold, edge_algorithm=edge_algorithm, order_algorithm=order_algorithm, error_weighting=error_weighting, estimate_covariance=estimate_covariance, is_covar=is_covar, jobs=jobs, adaptive_region_coordinates=adaptive_region_coordinates, use_threading=use_threading, use_processes=use_processes, get_error=get_error, get_counts=get_counts, get_weights=get_weights, get_distance_weights=get_distance_weights, get_rchi2=get_rchi2, get_cross_derivatives=get_cross_derivatives, get_offset_variance=get_offset_variance, **kwargs)
[docs] def resamp(coordinates, data, *locations, error=None, mask=None, window=None, order=1, fix_order=True, robust=None, negthresh=None, window_estimate_bins=10, window_estimate_percentile=50, window_estimate_oversample=2.0, leaf_size=40, large_data=False, smoothing=0.0, relative_smooth=False, adaptive_threshold=None, adaptive_algorithm='scaled', fit_threshold=0.0, cval=np.nan, edge_threshold=0.0, edge_algorithm='distribution', order_algorithm='bounded', error_weighting=True, estimate_covariance=False, is_covar=False, jobs=None, get_error=False, get_counts=False, get_weights=False, get_distance_weights=False, get_rchi2=False, get_cross_derivatives=False, get_offset_variance=False, **distance_kwargs): r""" ResamplePolynomial data using local polynomial fitting. Initializes and then calls the :class:`ResamplePolynomial` class. For further details on all available parameters, please see :func:`ResamplePolynomial.__init__` and :func:`ResamplePolynomial.__call__`. Parameters ---------- coordinates data locations error mask window order fix_order robust negthresh window_estimate_bins window_estimate_percentile window_estimate_oversample leaf_size large_data smoothing relative_smooth adaptive_threshold adaptive_algorithm fit_threshold cval edge_threshold edge_algorithm order_algorithm error_weighting estimate_covariance is_covar jobs get_error get_counts get_weights get_distance_weights get_rchi2 get_cross_derivatives get_offset_variance distance_kwargs Returns ------- results : float or numpy.ndarray or n-tuple of (float or numpy.ndarray) If a fit is performed at a single location, the output will consist of int or float scalar values. Multiple fits result in numpy arrays. The exact output shape depends on the number of data sets, number of fitted points, dimensions of the fit locations. Assuming that all get_* keywords are set to `True`, the output order is: results[0] = fitted values results[1] = error on the fit results[2] = sample counts for each fit results[3] = total weight of all samples in fit results[4] = total distance weight sum of all samples in fit results[5] = reduced chi-squared statistic of the fit results[6] = derivative mean squared cross products results[7] = offset variance of fit from sample distribution """ resampler = ResamplePolynomial( coordinates, data, error=error, mask=mask, window=window, order=order, fix_order=fix_order, robust=robust, negthresh=negthresh, window_estimate_bins=window_estimate_bins, window_estimate_percentile=window_estimate_percentile, window_estimate_oversample=window_estimate_oversample, leaf_size=leaf_size, large_data=large_data, **distance_kwargs) return resampler(*locations, smoothing=smoothing, relative_smooth=relative_smooth, adaptive_threshold=adaptive_threshold, adaptive_algorithm=adaptive_algorithm, fit_threshold=fit_threshold, cval=cval, edge_threshold=edge_threshold, edge_algorithm=edge_algorithm, order_algorithm=order_algorithm, error_weighting=error_weighting, estimate_covariance=estimate_covariance, is_covar=is_covar, jobs=jobs, get_error=get_error, get_counts=get_counts, get_weights=get_weights, get_distance_weights=get_distance_weights, get_rchi2=get_rchi2, get_cross_derivatives=get_cross_derivatives, get_offset_variance=get_offset_variance)