Source code for medpy.features.utilities

# Copyright (C) 2013 Oskar Maier
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# author Oskar Maier
# version r0.1.1
# since 2013-08-24
# status Release

# build-in modules

# third-party modules
import numpy

# own modules

# code


[docs]def normalize(vector, cutoffp = (0, 100), model = False): r""" Returns a feature-wise normalized version of the supplied vector. Normalization is achieved to [0,1] over the complete vector using shifting and scaling. When cut-off percentile (cutoffp) values other than (0, 100) are supplied, the values lying before or behind the supplied percentiles are cut-off i.e. shifted to fit the range. When model is set to True, an additional model describing the normalization is returned, that can at a later point be passed to the `normalize_with_model` function to normalize other feature vectors accordingly to the one passed. The vector is expected to have the form samples*features i.e.:: s1 s2 s3 [...] f1 f2 [...] Therefore a supplied vector:: s1 s2 s3 f1 1.5 1 2 f2 -1 0 1 would result in the returned vector:: s1 s2 s3 f1 0.50 0.00 1.00 f2 0.00 0.50 1.00 Parameters ---------- vector : sequence A sequence of feature vectors to normalize. cutoffp : (float, float) Cut-off percentiles. model : bool Whether to return the learned normalization model. Returns ------- normalized_feature_vectors : ndarray The normalized versions of the input vectors. model : tuple, optional The learned normalization model. """ vector = numpy.array(vector, dtype=numpy.float) # add a singleton dimension if required if 1 == vector.ndim: vector = vector[:, None] # compute lower and upper range border of each row using the supplied percentiles minp, maxp = numpy.percentile(vector, cutoffp, 0) # shift outliers to fit range for i in range(vector.shape[1]): vector[:,i][vector[:,i] < minp[i]] = minp[i] vector[:,i][vector[:,i] > maxp[i]] = maxp[i] # normalize minv = vector.min(0) vector -= minv maxv = vector.max(0) vector /= maxv if not model: return vector else: return vector, (minp, maxp, minv, maxv)
[docs]def normalize_with_model(vector, model): r""" Normalize as with `normalize`, but not based on the data of the passed feature vector, but rather on a learned model created with `normalize`. Thus formerly unseen query data can be normalized according to the training data. Parameters ---------- vector : sequence A sequence of feature vectors to normalize. model : tuple A normalization model created with `normalize`. Returns ------- normalize : ndarray The normalized versions of the input vectors. """ vector = numpy.array(vector, dtype=numpy.float) # unpack model minp, maxp, minv, maxv = model # add a singleton dimension if required if 1 == vector.ndim: vector = vector[:, None] # shift outliers to fit range for i in range(vector.shape[1]): vector[:,i][vector[:,i] < minp[i]] = minp[i] vector[:,i][vector[:,i] > maxp[i]] = maxp[i] # normalize vector -= minv vector /= maxv return vector
[docs]def append(*vectors): r""" Takes an arbitrary number of vectors containing features and append them (horizontally). E.g. taking a 100 and a 200 sample vector with 7 features each, a 300x7 vector is returned. The vectors are expected to have the form samples*features i.e.:: s1 s2 s3 [...] f1 f2 [...] Parameters ---------- *vectors : sequences A number of vectors with the same number and type of features. Returns ------- vector : ndarray The appended vectors. """ # check supplied arguments if len(vectors) < 2: return vectors[0] # process supplied arguments vectors = list(vectors) for i in range(len(vectors)): vectors[i] = numpy.asarray(vectors[i]) if vectors[i].ndim == 1: vectors[i] = numpy.asarray([vectors[i]]).T return numpy.squeeze(numpy.concatenate(vectors, 0))
[docs]def join(*vectors): r""" Takes an arbitrary number of aligned vectors of the same length and combines them into a single vector (vertically). E.g. taking two 100-sample feature vectors of once 5 and once 7 features, a 100x12 feature vector is created and returned. The feature vectors are expected to have the form samples*features i.e.:: s1 s2 s3 [...] f1 f2 [...] Parameters ---------- *vectors : sequences A number of vectors with the same number of samples. Returns ------- vector : ndarray The combined vectors. """ # check supplied arguments if len(vectors) < 2: return vectors[0] # process supplied arguments vectors = list(vectors) for i in range(len(vectors)): vectors[i] = numpy.array(vectors[i], copy=False) if vectors[i].ndim == 1: vectors[i] = numpy.array([vectors[i]], copy=False).T # treat single-value cases special (no squeezing) if 1 == len(vectors[0]): return numpy.concatenate(vectors, 1) return numpy.squeeze(numpy.concatenate(vectors, 1))