/*
 * Decompiled with CFR 0.152.
 */
package gov.sandia.cognition.text.topic;

import gov.sandia.cognition.algorithm.IterativeAlgorithm;
import gov.sandia.cognition.algorithm.event.AbstractIterativeAlgorithmListener;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.evaluator.Evaluator;
import gov.sandia.cognition.learning.algorithm.AbstractAnytimeBatchLearner;
import gov.sandia.cognition.learning.data.DatasetUtil;
import gov.sandia.cognition.math.Ring;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
import gov.sandia.cognition.math.matrix.VectorFactoryContainer;
import gov.sandia.cognition.math.matrix.VectorInputEvaluator;
import gov.sandia.cognition.math.matrix.VectorOutputEvaluator;
import gov.sandia.cognition.math.matrix.VectorUtil;
import gov.sandia.cognition.math.matrix.Vectorizable;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import gov.sandia.cognition.util.ArgumentChecker;
import gov.sandia.cognition.util.Randomized;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Collection;
import java.util.Random;

@PublicationReferences(references={@PublicationReference(author={"Thomas Hofmann"}, title="Probabilistic Latent Semantic Analysis", year=1999, type=PublicationType.Conference, publication="Proceedings of the Fifteenth Conference on Uncertainty in Artificial Intelligence (UAI)", pages={289, 296}, url="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.33.1187"), @PublicationReference(author={"Thomas Hofmann"}, title="Probabilistic Latent Semantic Indexing", year=1999, type=PublicationType.Conference, publication="Proceedings of the 22nd Conference of the ACM Special Interest Group on Information Retreival (SIGIR)", pages={50, 57}, url="http://portal.acm.org/citation.cfm?id=312649"), @PublicationReference(author={"Thomas Hofmann"}, title="Unsupervised Learning by Probabilistic Latent Semantic Analysis", year=2001, type=PublicationType.Journal, publication="Machine Learning", pages={177, 196}, url="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.130.6341")})
public class ProbabilisticLatentSemanticAnalysis
extends AbstractAnytimeBatchLearner<Collection<? extends Vectorizable>, Result>
implements Randomized,
VectorFactoryContainer {
    public static final int DEFAULT_REQUESTED_RANK = 10;
    public static final int DEFAULT_MAX_ITERATIONS = 250;
    public static final double DEFAULT_MINIMUM_CHANGE = 1.0E-10;
    protected int requestedRank;
    protected double minimumChange;
    protected Random random;
    protected VectorFactory<? extends Vector> vectorFactory;
    protected MatrixFactory<? extends Matrix> matrixFactory;
    protected transient Matrix documentsByTerms;
    protected transient int termCount;
    protected transient int documentCount;
    protected transient int latentCount;
    protected transient LatentData[] latents;
    protected transient double logLikelihood;
    protected transient double changeOfLogLikelihood;
    protected transient Result result;

    public ProbabilisticLatentSemanticAnalysis() {
        this(10);
    }

    public ProbabilisticLatentSemanticAnalysis(Random random) {
        this(10, 1.0E-10, random);
    }

    public ProbabilisticLatentSemanticAnalysis(int requestedRank) {
        this(requestedRank, 1.0E-10, new Random());
    }

    public ProbabilisticLatentSemanticAnalysis(int requestedRank, double minimumChange, Random random) {
        super(250);
        this.setRequestedRank(requestedRank);
        this.setRandom(random);
        this.setMinimumChange(minimumChange);
        this.setVectorFactory((VectorFactory<? extends Vector>)VectorFactory.getDefault());
        this.setMatrixFactory((MatrixFactory<? extends Matrix>)MatrixFactory.getDefault());
    }

    protected boolean initializeAlgorithm() {
        Collection documents = (Collection)this.getData();
        this.documentsByTerms = this.getMatrixFactory().copyRowVectors(documents);
        this.termCount = DatasetUtil.getDimensionality((Iterable)documents);
        this.documentCount = documents.size();
        this.latentCount = Math.min(this.documentCount, this.getRequestedRank());
        this.latents = new LatentData[this.latentCount];
        int i = 0;
        while (i < this.latentCount) {
            LatentData latent;
            this.latents[i] = latent = new LatentData();
            latent.index = i++;
            latent.pLatentGivenDocumentTerm = this.getMatrixFactory().createMatrix(this.documentCount, this.termCount);
            latent.pTermGivenLatent = this.getVectorFactory().createUniformRandom(this.termCount, 0.0, 1.0, this.getRandom());
            VectorUtil.divideByNorm1Equals((Vector)latent.pTermGivenLatent);
            latent.pDocumentGivenLatent = this.getVectorFactory().createUniformRandom(this.documentCount, 0.0, 1.0, this.getRandom());
            VectorUtil.divideByNorm1Equals((Vector)latent.pDocumentGivenLatent);
            latent.pLatent = 1.0 / (double)this.latentCount;
        }
        this.logLikelihood = Double.MIN_VALUE;
        this.changeOfLogLikelihood = 0.0;
        this.result = new Result(this.termCount, this.latents);
        return true;
    }

    protected boolean step() {
        double change;
        for (int i = 0; i < this.documentCount; ++i) {
            for (int j = 0; j < this.termCount; ++j) {
                double value;
                double sum = 0.0;
                for (LatentData latent : this.latents) {
                    value = latent.pLatent * latent.pDocumentGivenLatent.getElement(i) * latent.pTermGivenLatent.getElement(j);
                    latent.pLatentGivenDocumentTerm.setElement(i, j, value);
                    sum += value;
                }
                if (sum == 0.0) continue;
                for (LatentData latent : this.latents) {
                    value = latent.pLatentGivenDocumentTerm.getElement(i, j);
                    latent.pLatentGivenDocumentTerm.setElement(i, j, value /= sum);
                }
            }
        }
        double pLatentSum = 0.0;
        for (LatentData latent : this.latents) {
            Matrix countsTimesProbabilities = (Matrix)this.documentsByTerms.dotTimes((Ring)latent.pLatentGivenDocumentTerm);
            latent.pTermGivenLatent = countsTimesProbabilities.sumOfRows();
            latent.pDocumentGivenLatent = countsTimesProbabilities.sumOfColumns();
            latent.pLatent = latent.pDocumentGivenLatent.sum();
            VectorUtil.divideByNorm1Equals((Vector)latent.pTermGivenLatent);
            VectorUtil.divideByNorm1Equals((Vector)latent.pDocumentGivenLatent);
            pLatentSum += latent.pLatent;
        }
        if (pLatentSum != 0.0) {
            for (LatentData latent : this.latents) {
                latent.pLatent /= pLatentSum;
            }
        }
        double previousLogLikelihood = this.logLikelihood;
        this.logLikelihood = 0.0;
        for (int i = 0; i < this.documentCount; ++i) {
            for (int j = 0; j < this.termCount; ++j) {
                double pDocumentTerm = 0.0;
                for (LatentData latent : this.latents) {
                    pDocumentTerm += latent.pLatent * latent.pDocumentGivenLatent.getElement(i) * latent.pTermGivenLatent.getElement(j);
                }
                if (pDocumentTerm == 0.0) continue;
                this.logLikelihood += this.documentsByTerms.getElement(i, j) * Math.log(pDocumentTerm);
            }
        }
        this.changeOfLogLikelihood = change = this.logLikelihood - previousLogLikelihood;
        return Math.abs(change) > this.getMinimumChange();
    }

    protected void cleanupAlgorithm() {
        this.latents = null;
        this.documentsByTerms = null;
    }

    public Result getResult() {
        return this.result;
    }

    public Random getRandom() {
        return this.random;
    }

    public void setRandom(Random random) {
        this.random = random;
    }

    public VectorFactory<? extends Vector> getVectorFactory() {
        return this.vectorFactory;
    }

    public void setVectorFactory(VectorFactory<? extends Vector> vectorFactory) {
        this.vectorFactory = vectorFactory;
    }

    public MatrixFactory<? extends Matrix> getMatrixFactory() {
        return this.matrixFactory;
    }

    public void setMatrixFactory(MatrixFactory<? extends Matrix> matrixFactory) {
        this.matrixFactory = matrixFactory;
    }

    public int getRequestedRank() {
        return this.requestedRank;
    }

    public void setRequestedRank(int requestedRank) {
        ArgumentChecker.assertIsPositive((String)"requestedRank", (int)requestedRank);
        this.requestedRank = requestedRank;
    }

    public double getMinimumChange() {
        return this.minimumChange;
    }

    public void setMinimumChange(double minimumChange) {
        ArgumentChecker.assertIsNonNegative((String)"minimumChange", (double)minimumChange);
        this.minimumChange = minimumChange;
    }

    public static class StatusPrinter
    extends AbstractIterativeAlgorithmListener {
        protected PrintStream out;

        public StatusPrinter() {
            this(System.out);
        }

        public StatusPrinter(PrintStream out) {
            this.out = out;
        }

        public void stepStarted(IterativeAlgorithm algorithm) {
            ProbabilisticLatentSemanticAnalysis plsa = (ProbabilisticLatentSemanticAnalysis)algorithm;
            DecimalFormat format = new DecimalFormat("0.00");
            this.out.println("Iteration " + plsa.getIteration());
            for (LatentData latent : plsa.result.latents) {
                this.out.println("    Latent " + latent.index);
                this.out.println("        p(z)   = " + format.format(latent.pLatent));
                this.out.println("        p(t|z) = " + latent.pTermGivenLatent.toString((NumberFormat)format));
                this.out.println("        p(d|z) = " + latent.pDocumentGivenLatent.toString((NumberFormat)format));
            }
        }

        public void stepEnded(IterativeAlgorithm algorithm) {
            ProbabilisticLatentSemanticAnalysis plsa = (ProbabilisticLatentSemanticAnalysis)algorithm;
            this.out.println("Log likelihood: " + plsa.logLikelihood);
            this.out.println("Change: " + plsa.changeOfLogLikelihood);
        }
    }

    public static class Result
    extends AbstractCloneableSerializable
    implements Evaluator<Vectorizable, Vector>,
    VectorInputEvaluator<Vectorizable, Vector>,
    VectorOutputEvaluator<Vectorizable, Vector> {
        protected int termCount;
        protected int latentCount;
        protected LatentData[] latents;
        protected int maxIterations;
        protected double minimumChange;

        public Result(int termCount, LatentData[] latents) {
            this.termCount = termCount;
            this.latentCount = latents.length;
            this.latents = latents;
            this.maxIterations = 250;
            this.minimumChange = 1.0E-10;
        }

        public Vector evaluate(Vectorizable input) {
            Vector query = input.convertToVector();
            Matrix pLatentGivenQueryTerm = MatrixFactory.getDefault().createMatrix(this.latentCount, this.termCount);
            Vector pQueryGivenLatent = VectorFactory.getDefault().createVector(this.latentCount, 1.0 / (double)this.latentCount);
            double logLikelihood = Double.MIN_VALUE;
            for (int iteration = 1; iteration <= this.maxIterations; ++iteration) {
                double previousLogLikelihood = logLikelihood;
                logLikelihood = this.step(query, pLatentGivenQueryTerm, pQueryGivenLatent);
                double change = logLikelihood - previousLogLikelihood;
                if (Math.abs(change) <= this.minimumChange) break;
            }
            return pQueryGivenLatent;
        }

        protected double step(Vector query, Matrix pLatentGivenQueryTerm, Vector pQueryGivenLatent) {
            double value;
            for (int j = 0; j < this.termCount; ++j) {
                double sum = 0.0;
                for (LatentData latent : this.latents) {
                    int k = latent.index;
                    value = latent.pLatent * pQueryGivenLatent.getElement(k) * latent.pTermGivenLatent.getElement(j);
                    pLatentGivenQueryTerm.setElement(k, j, value);
                    sum += value;
                }
                if (sum == 0.0) continue;
                for (LatentData latent : this.latents) {
                    int i = latent.index;
                    value = pLatentGivenQueryTerm.getElement(i, j);
                    pLatentGivenQueryTerm.setElement(i, j, value /= sum);
                }
            }
            for (LatentData latent : this.latents) {
                int k = latent.index;
                double sum = 0.0;
                for (int j = 0; j < this.termCount; ++j) {
                    value = query.getElement(j) * pLatentGivenQueryTerm.getElement(k, j);
                    sum += value;
                }
                pQueryGivenLatent.setElement(k, sum);
            }
            VectorUtil.divideByNorm1Equals((Vector)pQueryGivenLatent);
            double logLikelihood = 0.0;
            for (int j = 0; j < this.termCount; ++j) {
                double pQueryTerm = 0.0;
                for (LatentData latent : this.latents) {
                    int k = latent.index;
                    pQueryTerm += latent.pLatent * latent.pTermGivenLatent.getElement(j) * pQueryGivenLatent.getElement(k);
                }
                if (pQueryTerm == 0.0) continue;
                logLikelihood += query.getElement(j) * Math.log(pQueryTerm);
            }
            return logLikelihood;
        }

        public int getInputDimensionality() {
            return this.termCount;
        }

        public int getOutputDimensionality() {
            return this.latents.length;
        }
    }

    public static class LatentData {
        int index;
        Matrix pLatentGivenDocumentTerm;
        Vector pTermGivenLatent;
        Vector pDocumentGivenLatent;
        double pLatent;
    }
}

