Matrix completion and feature imputation algorithms
A variety of matrix completion and imputation algorithms implemented in Python.
from fancyimpute import BiScaler, KNN, NuclearNormMinimization, SoftImpute # X is the complete data matrix # X_incomplete has the same values as X except a subset have been replace with NaN # Use 3 nearest rows which have a feature to fill in each row's missing features X_filled_knn = KNN(k=3).complete(X_incomplete) # matrix completion using convex optimization to find low-rank solution # that still matches observed values. Slow! X_filled_nnm = NuclearNormMinimization().complete(X_incomplete) # Instead of solving the nuclear norm objective directly, instead # induce sparsity using singular value thresholding X_filled_softimpute = SoftImpute().complete(X_incomplete_normalized) # print mean squared error for the three imputation methods above nnm_mse = ((X_filled_nnm[missing_mask] - X[missing_mask]) ** 2).mean() print("Nuclear norm minimization MSE: %f" % nnm_mse) softImpute_mse = ((X_filled_softimpute[missing_mask] - X[missing_mask]) ** 2).mean() print("SoftImpute MSE: %f" % softImpute_mse) knn_mse = ((X_filled_knn[missing_mask] - X[missing_mask]) ** 2).mean() print("knnImpute MSE: %f" % knn_mse)