// PLSAT.cpp : Defines the entry point for the console application. // Quick DEMO of non-negative matrix factorization and // probabilistic latent semantic analysis. Andrew Polar, ezcodesample.com, semanticsearchart.com. // Oct. 2011 #include "stdafx.h" #include #include #include #include #include const int numberOfCategories = 2; const int numberOfRows = 5; const int numberOfColumns = 6; static float data[numberOfRows][numberOfColumns] = { {9.0, 2.0, 1.0, 1.0, 1.0, 0.0}, {8.0, 3.0, 2.0, 1.0, 0.0, 0.0}, {3.0, 0.0, 0.0, 1.0, 2.0, 8.0}, {0.0, 1.0, 0.0, 2.0, 4.0, 7.0}, {2.0, 1.0, 1.0, 0.0, 1.0, 3.0}, }; //////////////Reusable functions///////////////////// void printVector(char* title, float* Vector, int n) { printf("%s\n\n", title); for (int i=0; i Matrix[i][j]) min = Matrix[i][j]; } if (min < 0.0) { for (int j=0; j 0.0) { for (int j=0; j 0.0) { float s = 0.0; for (int k = 0; k 0.0) { likelihood += data[i][j] * log(s); } } } } return likelihood; } ////////End/////////////////////////// //Next is non-negative matrix factorization //Cholesky matrix inversion void choldc1(int n, float** a, float* p) { int i,j,k; float sum; for (i = 0; i= 0; k--) { sum -= a[i][k] * a[j][k]; } if (i == j) { if(sum <= 0) { p[i] = 1.0; //regularization } else { p[i] = sqrt(sum); } } else { a[j][i] = sum / p[i]; } } } } void choldc(int n, float** A, float** a) { int i,j; float* p = (float*)malloc(n * sizeof(float)); for (i = 0; i= 0; --i) { float sum = 0.0; for (int j = size - 1; j>i; --j) { sum += T[i][j] * M[j][loop]; } M[i][loop] = (v[i] - sum) / T[i][i]; } } if (e) free(e); if (v) free(v); if (T) { for (int i = 0; i 0.0) { D[i][nWhich] += N[i][j] * W[nWhich][j]; } } } } //copy D into N for (int i=0; i 0.0) { W[nWhich][j] += N[i][j] * D[i][nWhich]; } } } } //copy W into N for (int i=0; i 0) { theoreticalLimitOfLikelihood += data[i][j] * log(N[i][j]); } } } printf("Start pLSA NMF, theoretical limit for likelihood %f\n\n", theoreticalLimitOfLikelihood); initializeMatrix(D, nRows, nCats); initializeMatrix(W, nCats, nCols); normalizeMatrix(D, nRows, nCats); normalizeMatrix(W, nCats, nCols); while (!makeApproximationStepNMF(D, W, N, nRows, nCols, nCats)) {} printf("\n"); printMatrix("Document-category", D, nRows, nCats); printMatrix("Category-word", W, nCats, nCols); printf("End pLSA NMF \n\n"); /////////////////////////////End pLSA/////////////////////////////// //Deallocate memory if (N) { for (int i=0; i 0.0) { float s = 0.0; for (int k = 0; k 0.0) { N[i][j] = data[i][j] / s; } else { N[i][j] = 0.0; } } else { N[i][j] = 0.0; } } } } void getNewD(float** D2, float** D1, float** W2, float** W1, float** N, float* Z, int nRows, int nCols, int nCats) { //it is D = N * WT multiplication for (int nWhich=0; nWhich 0.0) { D2[i][nWhich] += N[i][j] * W1[nWhich][j]; } } } } for (int i=0; i 0.0) { W2[nWhich][j] += N[i][j] * D1[i][nWhich]; } } } } for(int nWhich=0; nWhich 0.0) { Z[nWhich] += data[i][j] * D[i][nWhich]; } } } } float s = 0.0; for (int i=0; i= 300) break; } printf("\n\n"); normalizeMatrix(D2, nRows, nCats); normalizeMatrix(W2, nCats, nCols); printMatrix("Document-category", D2, nRows, nCats); printMatrix("Word-category", W2, nCats, nCols); printf("End PLSA\n\n"); /////////////////////////////End pLSA/////////////////////////////// //Deallocate memory if (Z) free(Z); if (N) { for (int i=0; i