Source code for medpy.features.utilities
# Copyright (C) 2013 Oskar Maier
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# author Oskar Maier
# version r0.1.1
# since 2013-08-24
# status Release
# build-in modules
# third-party modules
import numpy
# own modules
# code
[docs]def normalize(vector, cutoffp = (0, 100), model = False):
r"""
Returns a feature-wise normalized version of the supplied vector. Normalization is
achieved to [0,1] over the complete vector using shifting and scaling.
When cut-off percentile (cutoffp) values other than (0, 100) are supplied, the values
lying before or behind the supplied percentiles are cut-off i.e. shifted to fit the
range.
When model is set to True, an additional model describing the normalization is
returned, that can at a later point be passed to the `normalize_with_model` function
to normalize other feature vectors accordingly to the one passed.
The vector is expected to have the form samples*features i.e.::
s1 s2 s3 [...]
f1
f2
[...]
Therefore a supplied vector::
s1 s2 s3
f1 1.5 1 2
f2 -1 0 1
would result in the returned vector::
s1 s2 s3
f1 0.50 0.00 1.00
f2 0.00 0.50 1.00
Parameters
----------
vector : sequence
A sequence of feature vectors to normalize.
cutoffp : (float, float)
Cut-off percentiles.
model : bool
Whether to return the learned normalization model.
Returns
-------
normalized_feature_vectors : ndarray
The normalized versions of the input vectors.
model : tuple, optional
The learned normalization model.
"""
vector = numpy.array(vector, dtype=numpy.float)
# add a singleton dimension if required
if 1 == vector.ndim:
vector = vector[:, None]
# compute lower and upper range border of each row using the supplied percentiles
minp, maxp = numpy.percentile(vector, cutoffp, 0)
# shift outliers to fit range
for i in range(vector.shape[1]):
vector[:,i][vector[:,i] < minp[i]] = minp[i]
vector[:,i][vector[:,i] > maxp[i]] = maxp[i]
# normalize
minv = vector.min(0)
vector -= minv
maxv = vector.max(0)
vector /= maxv
if not model:
return vector
else:
return vector, (minp, maxp, minv, maxv)
[docs]def normalize_with_model(vector, model):
r"""
Normalize as with `normalize`, but not based on the data of the passed feature
vector, but rather on a learned model created with `normalize`. Thus formerly
unseen query data can be normalized according to the training data.
Parameters
----------
vector : sequence
A sequence of feature vectors to normalize.
model : tuple
A normalization model created with `normalize`.
Returns
-------
normalize : ndarray
The normalized versions of the input vectors.
"""
vector = numpy.array(vector, dtype=numpy.float)
# unpack model
minp, maxp, minv, maxv = model
# add a singleton dimension if required
if 1 == vector.ndim:
vector = vector[:, None]
# shift outliers to fit range
for i in range(vector.shape[1]):
vector[:,i][vector[:,i] < minp[i]] = minp[i]
vector[:,i][vector[:,i] > maxp[i]] = maxp[i]
# normalize
vector -= minv
vector /= maxv
return vector
[docs]def append(*vectors):
r"""
Takes an arbitrary number of vectors containing features and append them
(horizontally).
E.g. taking a 100 and a 200 sample vector with 7 features each, a 300x7
vector is returned.
The vectors are expected to have the form samples*features i.e.::
s1 s2 s3 [...]
f1
f2
[...]
Parameters
----------
*vectors : sequences
A number of vectors with the same number and type of features.
Returns
-------
vector : ndarray
The appended vectors.
"""
# check supplied arguments
if len(vectors) < 2:
return vectors[0]
# process supplied arguments
vectors = list(vectors)
for i in range(len(vectors)):
vectors[i] = numpy.asarray(vectors[i])
if vectors[i].ndim == 1:
vectors[i] = numpy.asarray([vectors[i]]).T
return numpy.squeeze(numpy.concatenate(vectors, 0))
[docs]def join(*vectors):
r"""
Takes an arbitrary number of aligned vectors of the same length and combines
them into a single vector (vertically).
E.g. taking two 100-sample feature vectors of once 5 and once 7 features, a 100x12
feature vector is created and returned.
The feature vectors are expected to have the form samples*features i.e.::
s1 s2 s3 [...]
f1
f2
[...]
Parameters
----------
*vectors : sequences
A number of vectors with the same number of samples.
Returns
-------
vector : ndarray
The combined vectors.
"""
# check supplied arguments
if len(vectors) < 2:
return vectors[0]
# process supplied arguments
vectors = list(vectors)
for i in range(len(vectors)):
vectors[i] = numpy.array(vectors[i], copy=False)
if vectors[i].ndim == 1:
vectors[i] = numpy.array([vectors[i]], copy=False).T
# treat single-value cases special (no squeezing)
if 1 == len(vectors[0]):
return numpy.concatenate(vectors, 1)
return numpy.squeeze(numpy.concatenate(vectors, 1))