@@ -759,6 +759,9 @@ def longest_ones(x):
759
759
760
760
def prepca (P , frac = 0 ):
761
761
"""
762
+
763
+ WARNING: this function is deprecated -- please see class PCA instead
764
+
762
765
Compute the principal components of *P*. *P* is a (*numVars*,
763
766
*numObs*) array. *frac* is the minimum fraction of variance that a
764
767
component must contain to be included.
@@ -778,6 +781,7 @@ def prepca(P, frac=0):
778
781
R13 Neural Network Toolbox but is not found in later versions;
779
782
its successor seems to be called "processpcs".
780
783
"""
784
+ warnings .warn ('This function is deprecated -- see class PCA instead' )
781
785
U ,s ,v = np .linalg .svd (P )
782
786
varEach = s ** 2 / P .shape [1 ]
783
787
totVar = varEach .sum ()
@@ -789,6 +793,83 @@ def prepca(P, frac=0):
789
793
Pcomponents = np .dot (Trans ,P )
790
794
return Pcomponents , Trans , fracVar [ind ]
791
795
796
+
797
+ class PCA :
798
+ def __init__ (self , a ):
799
+ """
800
+ compute the SVD of a and store data for PCA. Use project to
801
+ project the data onto a reduced set of dimensions
802
+
803
+ Inputs:
804
+
805
+ *a*: a numobservations x numdims array
806
+
807
+ Attrs:
808
+
809
+ *a* a centered unit sigma version of input a
810
+
811
+ *numrows*, *numcols*: the dimensions of a
812
+
813
+ *mu* : a numdims array of means of a
814
+
815
+ *sigma* : a numdims array of atandard deviation of a
816
+
817
+ *fracs* : the proportion of variance of each of the principal components
818
+
819
+ *Wt* : the weight vector for projecting a numdims point or array into PCA space
820
+
821
+ *Y* : a projected into PCA space
822
+
823
+ """
824
+ n , m = a .shape
825
+ if n < m :
826
+ raise RuntimeError ('we assume data in a is organized with numrows>numcols' )
827
+
828
+ self .numrows , self .numcols = n , m
829
+ self .mu = a .mean (axis = 0 )
830
+ self .sigma = a .std (axis = 0 )
831
+
832
+ a = self .center (a )
833
+
834
+ self .a = a
835
+
836
+ U , s , Vh = np .linalg .svd (a , full_matrices = False )
837
+
838
+
839
+ Y = np .dot (Vh , a .T ).T
840
+
841
+ vars = s ** 2 / float (len (s ))
842
+ self .fracs = vars / vars .sum ()
843
+
844
+
845
+ self .Wt = Vh
846
+ self .Y = Y
847
+
848
+
849
+ def project (self , x , minfrac = 0. ):
850
+ 'project x onto the principle axes, dropping any axes where fraction of variance<minfrac'
851
+ x = np .asarray (x )
852
+
853
+ ndims = len (x .shape )
854
+
855
+ if (x .shape [- 1 ]!= self .numcols ):
856
+ raise ValueError ('Expected an array with dims[-1]==%d' % self .numcols )
857
+
858
+
859
+ Y = np .dot (self .Wt , self .center (x ).T ).T
860
+ mask = self .fracs >= minfrac
861
+ if ndims == 2 :
862
+ Yreduced = Y [:,mask ]
863
+ else :
864
+ Yreduced = Y [mask ]
865
+ return Yreduced
866
+
867
+
868
+
869
+ def center (self , x ):
870
+ 'center the data using the mean and sigma from training set a'
871
+ return (x - self .mu )/ self .sigma
872
+
792
873
def prctile (x , p = (0.0 , 25.0 , 50.0 , 75.0 , 100.0 )):
793
874
"""
794
875
Return the percentiles of *x*. *p* can either be a sequence of
0 commit comments