Skip to content

Commit dc13d25

Browse files
committed
added PCA helper class to mlab and deprecated prepca
svn path=/trunk/matplotlib/; revision=7926
1 parent ae377a2 commit dc13d25

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed

lib/matplotlib/mlab.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,9 @@ def longest_ones(x):
759759

760760
def prepca(P, frac=0):
761761
"""
762+
763+
WARNING: this function is deprecated -- please see class PCA instead
764+
762765
Compute the principal components of *P*. *P* is a (*numVars*,
763766
*numObs*) array. *frac* is the minimum fraction of variance that a
764767
component must contain to be included.
@@ -778,6 +781,7 @@ def prepca(P, frac=0):
778781
R13 Neural Network Toolbox but is not found in later versions;
779782
its successor seems to be called "processpcs".
780783
"""
784+
warnings.warn('This function is deprecated -- see class PCA instead')
781785
U,s,v = np.linalg.svd(P)
782786
varEach = s**2/P.shape[1]
783787
totVar = varEach.sum()
@@ -789,6 +793,83 @@ def prepca(P, frac=0):
789793
Pcomponents = np.dot(Trans,P)
790794
return Pcomponents, Trans, fracVar[ind]
791795

796+
797+
class PCA:
798+
def __init__(self, a):
799+
"""
800+
compute the SVD of a and store data for PCA. Use project to
801+
project the data onto a reduced set of dimensions
802+
803+
Inputs:
804+
805+
*a*: a numobservations x numdims array
806+
807+
Attrs:
808+
809+
*a* a centered unit sigma version of input a
810+
811+
*numrows*, *numcols*: the dimensions of a
812+
813+
*mu* : a numdims array of means of a
814+
815+
*sigma* : a numdims array of atandard deviation of a
816+
817+
*fracs* : the proportion of variance of each of the principal components
818+
819+
*Wt* : the weight vector for projecting a numdims point or array into PCA space
820+
821+
*Y* : a projected into PCA space
822+
823+
"""
824+
n, m = a.shape
825+
if n<m:
826+
raise RuntimeError('we assume data in a is organized with numrows>numcols')
827+
828+
self.numrows, self.numcols = n, m
829+
self.mu = a.mean(axis=0)
830+
self.sigma = a.std(axis=0)
831+
832+
a = self.center(a)
833+
834+
self.a = a
835+
836+
U, s, Vh = np.linalg.svd(a, full_matrices=False)
837+
838+
839+
Y = np.dot(Vh, a.T).T
840+
841+
vars = s**2/float(len(s))
842+
self.fracs = vars/vars.sum()
843+
844+
845+
self.Wt = Vh
846+
self.Y = Y
847+
848+
849+
def project(self, x, minfrac=0.):
850+
'project x onto the principle axes, dropping any axes where fraction of variance<minfrac'
851+
x = np.asarray(x)
852+
853+
ndims = len(x.shape)
854+
855+
if (x.shape[-1]!=self.numcols):
856+
raise ValueError('Expected an array with dims[-1]==%d'%self.numcols)
857+
858+
859+
Y = np.dot(self.Wt, self.center(x).T).T
860+
mask = self.fracs>=minfrac
861+
if ndims==2:
862+
Yreduced = Y[:,mask]
863+
else:
864+
Yreduced = Y[mask]
865+
return Yreduced
866+
867+
868+
869+
def center(self, x):
870+
'center the data using the mean and sigma from training set a'
871+
return (x - self.mu)/self.sigma
872+
792873
def prctile(x, p = (0.0, 25.0, 50.0, 75.0, 100.0)):
793874
"""
794875
Return the percentiles of *x*. *p* can either be a sequence of

0 commit comments

Comments
 (0)