Source code for medpy.features.histogram

# Copyright (C) 2013 Oskar Maier
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# author Oskar Maier
# version r0.1.3
# since 2012-03-01
# status Release

# build-in modules
import math

# third-party modules
import scipy.stats

# own modules

# constants
# the available membership functions for fuzzy histogram calculation
__MBS = ['triangular', 'trapezoid', 'gaussian', 'sigmoid']

# code
[docs]def fuzzy_histogram(a, bins=10, range=None, normed=False, membership='triangular', smoothness=None, guarantee=False):
    r"""Compute a fuzzy histogram.
    The percentage of a value's membership in a bin is computed using the selected
    membership function. This functions stays as near as possible to the `numpy.histogram`
    behaviour.
    
    Parameters
    ----------
    a : array_like
        Input data; The histogram is computed over the flattened array (with ravel()).
    bins : int
        The number of equal-width bins in the given range (10, by default).
    range : (float, float)
        The lower and upper range of the bins; If not provided, range is simply
        (a.min(), a.max()); Values outside the range are ignored.
    normed : bool
        If False, the result will contain the number of samples in each bin; If
        True, the result is the value of the probability density function at
        the bin, normalized such that the integral over the range is 1.
    membership : string
        Select the type of the fuzzy membership function; See package
        description for available options.
    smoothness : float
        The smoothness of the fuzzy function; See package
        description and the membership functions for more details.
    guarantee : bool
        Guarantee that all values contribute equally to the histogram; when this value is
        set, the range term is ignored; see package descriptions for details.
    
    Returns
    -------
    hist : array
        The values of the histogram. See normed and weights for a description of the possible semantics.
    bin_edges : array of dtype float
        Return the bin edges (length(hist)+1).
    
    Notes
    -----
    See package description for more details on the usage.
    
    Examples
    --------
    >>> import numpy as np
    >>> from medpy.features import fuzzy_histogram
    >>> a = np.asarray([1,2,3,3.2,3.4,3.5,7.5,7.6,7.8,8,9,10])
    >>> np.histogram(a, bins=4)
    (array([4, 2, 2, 4]), array([  1.  ,   3.25,   5.5 ,   7.75,  10.  ]))
    >>> fuzzy_histogram(a, bins=4)
    (array([ 3.4       ,  2.04444444,  2.04444444,  3.4       ]), array([  1.  ,   3.25,   5.5 ,   7.75,  10.  ]))
    >>> fuzzy_histogram(a, bins=4, membership='sigmoid')
    (array([ 3.34304743,  2.15613626,  2.15613626,  3.34304743]), array([  1.  ,   3.25,   5.5 ,   7.75,  10.  ]))
    
    """
    # check and prepare parameters
    a = scipy.asarray(a).ravel()
    if None == range: range = (a.min(), a.max())
    if range[1] <= range[0]: raise AttributeError('max must be larger than min in range parameter.')
    if not int == type(bins): raise AttributeError('bins must an integer.')
    if bins <= 0: raise AttributeError('bins must greater than zero.')
    if membership not in __MBS: raise AttributeError('Unknown type: {}. Must be one of {}.'.format(membership, __MBS))
    if not None == smoothness and smoothness <= 0.0: raise AttributeError('smoothness must be greater than zero.')
    
    # set default smoothness values
    if None == smoothness:
        smoothness = 0.25 if 'trapezoid' == membership else 0.5
    
    if not guarantee: # compute bin distribution in no guarantee case
        binw = (range[1] - range[0]) / float(bins)
        bins = scipy.asarray([i * binw + range[0] for i in scipy.arange(bins + 1)])
    else: # compute bin distribution for guarantee case
        bins_core = bins - 2 * int(math.ceil(smoothness))
        if bins_core <= 0: raise AttributeError('bins to few to guarantee removing boundary effect.')
        binw = (range[1] - range[0]) / float(bins_core)
        range = (range[0] - int(math.ceil(smoothness)) * binw, range[1] + int(math.ceil(smoothness)) * binw)
        bins = scipy.asarray([i * binw + range[0] for i in scipy.arange(bins + 1)])
    
    # create membership function (centered at 0)
    if 'triangular' == membership:
        membership = triangular_membership(0, binw, smoothness)
    elif 'trapezoid' == membership:
        membership = trapezoid_membership(0, binw, smoothness)
    elif 'gaussian' == membership:
        membership = gaussian_membership(0, binw, smoothness)
    elif 'sigmoid' == membership:
        membership = sigmoidal_difference_membership(0, binw, smoothness)

    # compute histogram i.e. memberships of values across neighbourhood (determined by smoothness)
    neighbourhood = int(math.ceil(smoothness))
    l = len(bins) - 2
    histogram = scipy.zeros(l + 1)
    m = range[0]
    for v in a: # for each value
        idx = min(l, int((v - m) / binw))
        for i in scipy.arange(max(0, idx - neighbourhood), min(l + 1, idx + neighbourhood + 1)): # for crips bin neighbourhood
            start = bins[i]
            histogram[i] += membership(v - start - 0.5 * binw) # adjust v for evaluation on zero-centered membership function

    # normalize
    if normed: histogram /= float(sum(histogram))
    
    return histogram, bins
    
# //////////////////// #
# Membership functions #
# //////////////////// #
# see http://www.atp.ruhr-uni-bochum.de/rt1/syscontrol/node117.html for graphs    

[docs]def triangular_membership(bin_center, bin_width, smoothness = 0.5):
    r"""
    Create a triangular membership function for a fuzzy histogram bin.
    
    Parameters
    ----------
    bin_center : number
        The center of the bin of which to compute the membership function.
    bin_width : number
        The width of a single bin (all expected to be of equal width).
    smoothness : number, optional
        The smoothness of the function; determines the neighbourhood affected.
        See below and `fuzzy_histogram` for a more detailed explanation
    
    Returns
    -------
    triangular_membership : function
        A triangular membership function centered on the bin.
    
    Notes
    -----
    For the triangular function the smoothness factor has to be 0.5. Lower values
    are accepted, but then the function assumes the shape of the trapezium membership
    function. Higher values lead to an exception.
    
    The triangular membership function is defined as

    .. math::
        
        \mu_{\triangle}(x) =
          \left\{
            \begin{array}{ll}
              0, & x<a, x>c\\
              \frac{x-a}{b-a}, & a\leq x\leq b \\
              \frac{c-x}{c-b}, & b<x\leq c\\
            \end{array}
          \right.

    where :math:`a` is the left border, :math:`c` the right border and :math:`b` the center of the triangular
    function. The height of the triangle is chosen such, that all values contribute with
    exactly one.
    
    The standard triangular function (:math:`smoothness = 0.5`) is displayed in the following
    figure
    
    .. .. image:: images/triangular_01.png
    
    "Triangular functions (1)"
    
    where the bin width is :math:`2` with centers at :math:`-2`, :math:`0` and :math:`2`.
    """
    if smoothness > 0.5: raise AttributeError('the triangular/trapezium membership functions supports only smoothnesses between 1/10 and 1/2.')
    if smoothness < 0.5: return trapezoid_membership(bin_center, bin_width, smoothness)
    
    a = bin_center - bin_width
    b = float(bin_center)
    c = bin_center + bin_width
    
    def fun(x):
        if x < a or x > c: return 0
        elif x <= b: return (x-a)/(b-a)
        else: return (c-x)/(c-b)
    return fun
    
[docs]def trapezoid_membership(bin_center, bin_width, smoothness):
    r"""Create a trapezium membership function for a fuzzy histogram bin.
    
    Parameters
    ----------
    bin_center : number
        The center of the bin of which to compute the membership function.
    bin_width : number
        The width of a single bin (all expected to be of equal width).
    smoothness : number, optional
        The smoothness of the function; determines the neighbourhood affected.
        See below and `fuzzy_histogram` for a more detailed explanation
        
    Returns
    -------
    trapezoid_membership : function
        A trapezoidal membership function centered on the bin.
    
    Notes
    -----
    For the trapezium function the smoothness factor can be between >0.0 and <0.5.
    Higher values are excepted, but then the function assumes the shape of the triangular
    membership function. A value of 0.0 would make the histogram behave like a crisp one.
    
    The trapezium membership function is defined as
    
    .. math::
    
        \mu_{trapez}(x) =
          \left\{
            \begin{array}{ll}
              0, & x<a, x>d\\
              \frac{x-a}{b-a}, & a\leq x\leq b \\
              1, & b<x<c\\
              \frac{d-x}{d-c}, & c\leq x\leq d\\
            \end{array}
          \right.
    
    where :math:`a` is the left lower border, :math:`b` the left upper border, :math:`c` the right upper border
    and :math:`d` the right lower border of the trapezium.
    
    A smoothness term of 0.1 makes the trapezium function reach by :math:`0.1 * bin\_width` into
    the areas of the adjunct bins, as can be observed in the following figure
    
    .. .. image:: images/trapezium_02.png
    
    "Trapezium functions (1)"
    
    where the bin width is 2 with centers at -2, 0 and 2.
    
    Increasing the smoothness term toward 0.5, the function starts to resemble the
    triangular membership function, which in fact it becomes for any :math:`smoothness >= 0.5`.
    The behavior can be observed in the following graph with :math:`smoothness=0.4` 
    
    .. .. image:: images/trapezium_01.png
    
    "Trapezium functions (2)"
    
    Lowering the smoothness toward 0.0, on the other hand, leads the trapezium function
    to behave more and more like a crisp histogram membership, which in fact it becomes
    at a smoothness of 0.0. The following figure, where the smoothness term is near zero,
    illustrates this behaviour
    
    .. .. image:: images/trapezium_03.png
    
    "Trapezium functions (3)"
    
    """
    # special case of high smoothness
    if smoothness < 1./10: raise AttributeError('the triangular/trapezium membership functions supports only smoothnesses between 1/10 and 1/2.')
    if smoothness >= 0.5: return triangular_membership(bin_center, bin_width, smoothness)

    a = bin_center - (smoothness + 0.5) * bin_width 
    b = bin_center - (0.5 - smoothness) * bin_width
    c = bin_center + (0.5 - smoothness) * bin_width
    d = bin_center + (smoothness + 0.5) * bin_width
     
    def fun(x):
        if x < a or x > d: return 0
        elif x <= b: return (x-a)/float(b-a)
        elif x <= c: return 1
        else: return (d-x)/float(d-c)
    return fun

[docs]def gaussian_membership(bin_center, bin_width, smoothness):
    r"""Create a gaussian membership function for a fuzzy histogram bin.
    
    Parameters
    ----------
    bin_center : number
        The center of the bin of which to compute the membership function.
    bin_width : number
        The width of a single bin (all expected to be of equal width).
    smoothness : number, optional
        The smoothness of the function; determines the neighbourhood affected.
        See below and `fuzzy_histogram` for a more detailed explanation
        
    Returns
    -------
    gaussian_membership : function
        The cumulative density function of the desired gaussian.

    Notes
    -----
    Since the gaussian membership function is infinite, it is
    not actually true that it does not contribute to bins outside of the neighbourhood
    range. But the contribution is so marginal (:math:`eps <= 0.001` per value) that it can be
    safely ignored.
    
    The gaussian membership function is defined as
    
    .. math::
    
        \mu_{gauss}(x) = \frac{1}{\sigma\sqrt{2\pi}} e^{-\frac{(x-\zeta)^2}{2\sigma^2}}

    Since the gaussian distributions can not be formed to sum up to one at each point of
    the x-axis, their cumulative density functions (CDF) are used instead. For more
    details on CDF see http://en.wikipedia.org/wiki/Normal_distribution .
    
    The gaussian and therefore the CDF are centered above the requested value instead of
    the bin center. Then the CDF value for the left side of the bin is subtracted from
    the CDF value returned for the right side. The result is the integral under the
    gaussian with :math:`\mu/\zeta = value` with the bin-sides as the integral borders.
    
    This approach might seem a little bit unintuitive, but is the best possible for
    gaussian membership functions. The following graph gives a graphical example of the
    computation of each values bin membership
    
    .. .. image:: images/gaussian_01.png 
    
    "Trapezium functions (1)"
    
    where the bin_width is 1, one bin between each of the x tics (e.g. [-1, 0], [0, 1],
    etc.). The value which membership should be computed is marked by a yellow bar at
    :math:`x = 0.3`. Its membership in each bin is defined by the integral under the gaussian
    centered at the value (i.e. 0.3). The purple area therefore defines its membership in
    the [-2,-1] bin, the red area its membership in the [-1,0] bin, etc.
    Since the gaussian is guaranteed to have an infinite integral of 1, the some of the
    contributions of a value to all bins is one.

    For computation the function normalizes all values to a bin_width of 1, which can
    introduce marginal rounding errors.
    """
    if smoothness > 10 or smoothness < 1./10: raise AttributeError('the gaussian membership function supports only smoothnesses between 1/10 and 5.')
    
    bin_width = float(bin_width)
    bin_center = bin_center / bin_width
    start = bin_center - 0.5
    end = bin_center + 0.5
    sigma = _gaussian_membership_sigma(smoothness)

    def fun(x):
        return scipy.stats.norm.cdf(end, x / bin_width, sigma) - scipy.stats.norm.cdf(start, x / bin_width, sigma) # x, mu, sigma
    
    return fun

def _gaussian_membership_sigma(smoothness, eps = 0.0005): # 275us @ smothness=10
    r"""Compute the sigma required for a gaussian, such that in a neighbourhood of
    smoothness the maximum error is 'eps'.
    The error is here the difference between the clipped integral and one.
    """
    error = 0
    deltas = [0.1, 0.01, 0.001, 0.0001]
    sigma = smoothness * 0.3
    point = -1. * (smoothness + 0.5)
    for delta in deltas:
        while error < eps:
            sigma += delta
            error = scipy.stats.norm.cdf(0.5, point, sigma) - scipy.stats.norm.cdf(-0.5, point, sigma) # x, mu, sigma
        sigma -= delta
    return sigma

[docs]def sigmoidal_difference_membership(bin_center, bin_width, smoothness):
    r"""Create the difference of two sigmoids as membership function for a fuzzy histogram bin.
    
    Parameters
    ----------
    bin_center : number
        The center of the bin of which to compute the membership function.
    bin_width : number
        The width of a single bin (all expected to be of equal width).
    smoothness : number, optional
        The smoothness of the function; determines the neighbourhood affected.
        See below and `fuzzy_histogram` for a more detailed explanation
        
    Returns
    -------
    sigmoidal_difference_membership : function
        A sigmoidal difference membership function centered on the bin.
    
    Notes
    -----
    Since the sigmoidal membership function is infinite, it is
    not actually true that it does not contribute to bins outside of the neighbourhood
    range. But the contribution is so marginal (eps <= 0.001 per value) that it can be
    safely ignored.
    
    The sigmoidal membership function is defined as
    
    .. math::
        
        \mu_{sigmoid}(x) = \left[1+e^{-\alpha_1 (x-\zeta_1)}\right]^{-1} - \left[1+e^{-\alpha_2 (x-\zeta_2)}\right]^{-1}

    where :math:`\alpha_1 = \alpha_2 = \alpha` is computed throught the smoothness term
    and :math:`\zeta_1` and :math:`\zeta_2` constitute the left resp. right borders of the bin.
    
    The following figure shows three sigmoidal membership functions for bins at the
    centers -2, -0 and 2 with a bin width of 2 and a smoothness of 2:
    
    .. .. image:: images/sigmoid_01.png
    
    "Sigmoidal functions (1)"
    
    The central (green) membership functions extends to its up till the second bin
    (centered around -4) and the same to the right (until the bin centered around +4).
    Therefore all values from -5 to +5 are considered for membership in this bin. Values
    out of this range would only contribute marginally to this bin.
    Furthermore it is inteligable that the sum of all membership functions at each point
    is equal to 1, therefore all values are equally represented (i.e. contribute with 1
    to the overall histogram).
    
    The influence of the smoothness term can be observed in the following figure:
    
    .. .. image:: images/sigmoid_02.png
    
    "Sigmoidal functions (2)"
    
    Here smoothness has been chosen to be 1. The green function therefore extends just
    into the directly adjunct bins to its left and right.
    
    """
    if smoothness > 10 or smoothness < 1./10: raise AttributeError('the sigmoidal membership function supports only smoothnesses between 1/10 and 10.')
    
    # compute the alpha that will give a contribution to the next bins right and left
    alpha_nbh1 = 8. / bin_width # experimental value
    # compute the alpha that results in the desired smoothness level 
    alpha = alpha_nbh1 / smoothness
    
    def fun(x):
        sigmoid1 = 1 + math.exp(-1. * alpha * (x - (bin_center - 0.5 * bin_width)))
        sigmoid2 = 1 + math.exp(-1. * alpha * (x - (bin_center + 0.5 * bin_width)))
        return math.pow(sigmoid1, -1) - math.pow(sigmoid2, -1)
    return fun
    
#def generalized_bell_membership(alpha, beta, zeta):
#    """
#    Create a generalized bell function as membership function for a fuzzy histogram bin.
#    
#    @param alpha controls the width of the plateau
#    @param beta controls the width of the base
#    @param zeta the center of the function
#    
#    Recommended values are:
#        - alpha: bin-width/2
#        - beta: bin-width/2
#        - zeta: bin center
#    
#    The bell membership function is defined as    
#    \f[
#     \mu_{bell}(x) = \left[1+\left|\frac{x-\zeta}{\alpha}\right|^{2\beta}\right]^{-1}
#    \f]
#    """
#    def fun(x):
#        try:
#            return math.pow(1 + math.pow(abs((x - zeta)/float(alpha)), 2. * beta), -1)
#        except Exception as e:
#            print x, zeta, alpha, beta
#            raise e
#    return fun
Quick search

Source code for medpy.features.histogram