Source code for medpy.neighbours.knn

# Copyright (C) 2013 Oskar Maier
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# author Oskar Maier
# version r0.1.0
# since 2014-10-15
# status Release

# build-in modules
from itertools import combinations
import warnings

# third-party modules
import numpy
from scipy.sparse.csr import csr_matrix

# own modules

# constants

# code
[docs]def mkneighbors_graph(observations, n_neighbours, metric, mode='connectivity', metric_params = None): """ Computes the (weighted) graph of mutual k-Neighbors for observations. Notes ----- The distance between an observation and itself is never computed and instead set to ``numpy.inf``. I.e. only in the case of k>=n_observations or when the ``metric`` returns ``numpy.inf``, the returned graph can contain loops. Parameters ---------- observations : sequence Sequence of observations. n_neighbours : int Maximum number of neighbours for each sample. metric : function The distance metric taking two observations and returning a numeric value > 0. mode : {'connectivity', 'distance', 'both'}, optional Type of returned matrix: 'connectivity' will return the connectivity matrix with ones and zeros, in 'distance' the edges are distances between points, while 'both' returns a (connectivity, distance) tuple. metric_params : dict, optional (default = None) Additional keyword arguments for the metric function. Returns ------- mkneighbors_graph : ndarray Sparse matrix in CSR format, shape = [n_observations, n_observations]. mkneighbors_graph[i, j] is assigned the weight of edge that connects i to j. Might contain ``numpy.inf`` values. """ # compute their pairwise-distances pdists = pdist(observations, metric) # get the k nearest neighbours for each patch k_nearest_nbhs = numpy.argsort(pdists)[:,:n_neighbours] # create a mask denoting the k nearest neighbours in image_pdist k_nearest_mutual_nbhs_mask = numpy.zeros(pdists.shape, numpy.bool) for _mask_row, _nbhs_row in zip(k_nearest_mutual_nbhs_mask, k_nearest_nbhs): _mask_row[_nbhs_row] = True # and with transposed to remove non-mutual nearest neighbours k_nearest_mutual_nbhs_mask &= k_nearest_mutual_nbhs_mask.T # set distance not in the mutual k nearest neighbour set to zero pdists[~k_nearest_mutual_nbhs_mask] = 0 # check for edges with zero-weight if numpy.any(pdists[k_nearest_mutual_nbhs_mask] == 0): warnings.warn('The graph contains at least one edge with a weight of "0".') if 'connectivity' == mode: return csr_matrix(k_nearest_mutual_nbhs_mask) elif 'distance' == mode: return csr_matrix(pdists) else: return csr_matrix(k_nearest_mutual_nbhs_mask), csr_matrix(pdists)
[docs]def pdist(objects, dmeasure, diagval = numpy.inf): """ Compute the pair-wise distances between arbitrary objects. Notes ----- ``dmeasure`` is assumed to be *symmetry* i.e. between object *a* and object *b* the function will be called only ones. Parameters ---------- objects : sequence A sequence of objects of length *n*. dmeasure : function A callable function that takes two objects as input at returns a number. diagval : number The diagonal values of the resulting array. Returns ------- pdists : ndarray An *nxn* symmetric float array containing the pair-wise distances. """ out = numpy.zeros([len(objects)] * 2, numpy.float) numpy.fill_diagonal(out, diagval) for idx1, idx2 in combinations(list(range(len(objects))), 2): out[idx1, idx2] = dmeasure(objects[idx1], objects[idx2]) return out + out.T