Source code for bayesmark.expected_max

# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute expected maximum or minimum from iid samples.
"""
import numpy as np
from scipy.special import gammaln, logsumexp


[docs]def get_expected_max_weights(n, m):
    """Get the L-estimator weights for computing unbiased estimator of expected ``max(x[1:m])`` on a data set.

    Parameters
    ----------
    n : int
        Number of data points in data set ``len(x)``. Must be ``>= 1``.
    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
        This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be
        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
        case.

    Returns
    -------
    pdf : :class:`numpy:numpy.ndarray`, shape (n,)
        The weights for L-estimator. Will be positive and sum to one.
    """
    assert np.ndim(n) == 0
    assert n >= 1  # otherwise makes no sense

    m = np.asarray(m)  # Must be np type for broadcasting
    # We could also check dtype is int, but not bothering here
    assert np.all(m >= 1)  # otherwise makes no sense
    m = m[..., None]

    kk = 1 + np.arange(n)
    lpdf = gammaln(kk) - gammaln(kk - (m - 1))
    pdf = np.exp(lpdf - logsumexp(lpdf, axis=-1, keepdims=True))
    # expect nan for m > n
    assert np.all((m > n) | np.isclose(np.sum(pdf, axis=-1, keepdims=True), 1.0))
    return pdf


[docs]def expected_max(x, m):
    """Compute unbiased estimator of expected ``max(x[1:m])`` on a data set.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray` of shape (n,)
        Data set we would like expected ``max(x[1:m])`` on.
    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
        This function is for estimating the expected maximum over `m` iid draws. Require ``m >= 1``. This can be
        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
        case.

    Returns
    -------
    E_max_x : float
        Unbiased estimate of mean max of `m` draws from distribution on `x`.
    """
    assert np.ndim(x) == 1
    # m is validated by get_expected_max_weights

    # Get order stats for L-estimator
    x = np.array(x, copy=True)  # we will modify in place
    x.sort()  # in place!!

    # Now get estimator weights
    n, = x.shape
    if n == 0:
        return np.full(np.shape(m), np.nan)
    pdf = get_expected_max_weights(n, m)

    # Compute L-estimator
    E_max_x = np.sum(x * pdf, axis=-1)
    return E_max_x


[docs]def expected_min(x, m):
    """Compute unbiased estimator of expected ``min(x[1:m])`` on a data set.

    Parameters
    ----------
    x : :class:`numpy:numpy.ndarray` of shape (n,)
        Data set we would like expected ``min(x[1:m])`` on. Require ``len(x) >= 1``.
    m : `int` or :class:`numpy:numpy.ndarray` with dtype `int`
        This function is for estimating the expected minimum over `m` iid draws. Require ``m >= 1``. This can be
        broadcasted. If ``m > n``, the weights will be nan, because there is no way to get unbiased estimate in that
        case.

    Returns
    -------
    E_min_x : float
        Unbiased estimate of mean min of `m` draws from distribution on `x`.
    """
    x = np.asarray(x)
    E_min_x = -expected_max(-x, m)
    return E_min_x