Source code for bayesmark.expected_max

# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute expected maximum or minimum from iid samples.
"""
import numpy as np
from scipy.special import gammaln, logsumexp


[docs]def get_expected_max_weights(n, m): """Get the L-estimator weights for computing unbiased estimator of expected ``max(x[1:m])`` on a data set. Parameters ---------- n : int Number of data points in data set ``len(x)``. Must be ``>= 1``. m : `int` or :class:`numpy:numpy.ndarray` with dtype `int` This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that case. Returns ------- pdf : :class:`numpy:numpy.ndarray`, shape (n,) The weights for L-estimator. Will be positive and sum to one. """ assert np.ndim(n) == 0 assert n >= 1 # otherwise makes no sense m = np.asarray(m) # Must be np type for broadcasting # We could also check dtype is int, but not bothering here assert np.all(m >= 1) # otherwise makes no sense m = m[..., None] kk = 1 + np.arange(n) lpdf = gammaln(kk) - gammaln(kk - (m - 1)) pdf = np.exp(lpdf - logsumexp(lpdf, axis=-1, keepdims=True)) # expect nan for m > n assert np.all((m > n) | np.isclose(np.sum(pdf, axis=-1, keepdims=True), 1.0)) return pdf
[docs]def expected_max(x, m): """Compute unbiased estimator of expected ``max(x[1:m])`` on a data set. Parameters ---------- x : :class:`numpy:numpy.ndarray` of shape (n,) Data set we would like expected ``max(x[1:m])`` on. m : `int` or :class:`numpy:numpy.ndarray` with dtype `int` This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that case. Returns ------- E_max_x : float Unbiased estimate of mean max of `m` draws from distribution on `x`. """ assert np.ndim(x) == 1 # m is validated by get_expected_max_weights # Get order stats for L-estimator x = np.array(x, copy=True) # we will modify in place x.sort() # in place!! # Now get estimator weights n, = x.shape if n == 0: return np.full(np.shape(m), np.nan) pdf = get_expected_max_weights(n, m) # Compute L-estimator E_max_x = np.sum(x * pdf, axis=-1) return E_max_x
[docs]def expected_min(x, m): """Compute unbiased estimator of expected ``min(x[1:m])`` on a data set. Parameters ---------- x : :class:`numpy:numpy.ndarray` of shape (n,) Data set we would like expected ``min(x[1:m])`` on. Require ``len(x) >= 1``. m : `int` or :class:`numpy:numpy.ndarray` with dtype `int` This function is for estimating the expected minimum over `m` iid draws. Require ``m >= 1``. This can be broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that case. Returns ------- E_min_x : float Unbiased estimate of mean min of `m` draws from distribution on `x`. """ x = np.asarray(x) E_min_x = -expected_max(-x, m) return E_min_x