// (C) Copyright 2004, David M. Blei (blei [at] cs [dot] cmu [dot] edu)

// This file is part of LDA-C.

// LDA-C is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or (at your
// option) any later version.

// LDA-C is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
// USA

#ifndef USE_RUBY
#define USE_RUBY
#endif

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include <string.h>
#include <time.h>

#include "lda.h"
#include "lda-data.h"
#include "lda-inference.h"
#include "lda-model.h"
#include "utils.h"
#include "cokus.h"

#ifdef USE_RUBY
#include "ruby.h"

VALUE rb_mLda;
VALUE rb_cLda;
VALUE rb_cLdaCorpus;
VALUE rb_cLdaDocument;
#endif


/*
 * variational inference
 */

double lda_inference(document* doc, lda_model* model, double* var_gamma, double** phi) {
	double converged = 1;
	double phisum = 0, likelihood = 0;
	double likelihood_old = 0, oldphi[model->num_topics];
	int k, n, var_iter;
	double digamma_gam[model->num_topics];

		// compute posterior dirichlet

	for (k = 0; k < model->num_topics; k++)
	{
		var_gamma[k] = model->alpha + (doc->total/((double) model->num_topics));
		digamma_gam[k] = digamma(var_gamma[k]);
		for (n = 0; n < doc->length; n++)
			phi[n][k] = 1.0/model->num_topics;
	}
	var_iter = 0;

	while ((converged > VAR_CONVERGED) &&
		((var_iter < VAR_MAX_ITER) || (VAR_MAX_ITER == -1)))
	{
		var_iter++;
		for (n = 0; n < doc->length; n++)
		{
			phisum = 0;
			for (k = 0; k < model->num_topics; k++)
			{
				oldphi[k] = phi[n][k];
				phi[n][k] =
					digamma_gam[k] +
					model->log_prob_w[k][doc->words[n]];

				if (k > 0)
					phisum = log_sum(phisum, phi[n][k]);
				else
					phisum = phi[n][k]; // note, phi is in log space
			}

			for (k = 0; k < model->num_topics; k++)
			{
				phi[n][k] = exp(phi[n][k] - phisum);
				var_gamma[k] =
					var_gamma[k] + doc->counts[n]*(phi[n][k] - oldphi[k]);
								// !!! a lot of extra digamma's here because of how we're computing it
								// !!! but its more automatically updated too.
				digamma_gam[k] = digamma(var_gamma[k]);
			}
		}

		likelihood = compute_likelihood(doc, model, phi, var_gamma);
		assert(!isnan(likelihood));
		converged = (likelihood_old - likelihood) / likelihood_old;
		likelihood_old = likelihood;

				// printf("[LDA INF] %8.5f %1.3e\n", likelihood, converged);
	}
	return(likelihood);
}


/*
 * compute likelihood bound
 */

double compute_likelihood(document* doc, lda_model* model, double** phi, double* var_gamma) {
	double likelihood = 0, digsum = 0, var_gamma_sum = 0, dig[model->num_topics];
	int k, n;

	for (k = 0; k < model->num_topics; k++)
	{
		dig[k] = digamma(var_gamma[k]);
		var_gamma_sum += var_gamma[k];
	}
	digsum = digamma(var_gamma_sum);

	likelihood = lgamma(model->alpha * model -> num_topics) - model -> num_topics * lgamma(model->alpha) - (lgamma(var_gamma_sum));

	for (k = 0; k < model->num_topics; k++)
	{
		likelihood += (model->alpha - 1)*(dig[k] - digsum) + lgamma(var_gamma[k]) - (var_gamma[k] - 1)*(dig[k] - digsum);
		
		for (n = 0; n < doc->length; n++)
		{
			if (phi[n][k] > 0)
			{
				likelihood += doc->counts[n]*
					(phi[n][k]*((dig[k] - digsum) - log(phi[n][k])
					+ model->log_prob_w[k][doc->words[n]]));
			}
		}
	}
	return(likelihood);
}


double doc_e_step(document* doc, double* gamma, double** phi, lda_model* model, lda_suffstats* ss) {
	double likelihood;
	int n, k;

		// posterior inference

	likelihood = lda_inference(doc, model, gamma, phi);

		// update sufficient statistics

	double gamma_sum = 0;
	for (k = 0; k < model->num_topics; k++)
	{
		gamma_sum += gamma[k];
		ss->alpha_suffstats += digamma(gamma[k]);
	}
	ss->alpha_suffstats -= model->num_topics * digamma(gamma_sum);

	for (n = 0; n < doc->length; n++)
	{
		for (k = 0; k < model->num_topics; k++)
		{
			ss->class_word[k][doc->words[n]] += doc->counts[n]*phi[n][k];
			ss->class_total[k] += doc->counts[n]*phi[n][k];
		}
	}

	ss->num_docs = ss->num_docs + 1;

	return(likelihood);
}


/*
 * writes the word assignments line for a document to a file
 */

void write_word_assignment(FILE* f, document* doc, double** phi, lda_model* model) {
	int n;

	fprintf(f, "%03d", doc->length);
	for (n = 0; n < doc->length; n++) {
		fprintf(f, " %04d:%02d", doc->words[n], argmax(phi[n], model->num_topics));
	}
	fprintf(f, "\n");
	fflush(f);
}


/*
 * saves the gamma parameters of the current dataset
 */

void save_gamma(char* filename, double** gamma, int num_docs, int num_topics) {
	FILE* fileptr;
	int d, k;
	fileptr = fopen(filename, "w");

	for (d = 0; d < num_docs; d++) {
		fprintf(fileptr, "%5.10f", gamma[d][0]);
		for (k = 1; k < num_topics; k++) {
			fprintf(fileptr, " %5.10f", gamma[d][k]);
		}
		fprintf(fileptr, "\n");
	}
	fclose(fileptr);
}


void run_em(char* start, char* directory, corpus* corpus) {
	int d, n;
	lda_model *model = NULL;
	double **var_gamma, **phi;

	// allocate variational parameters

	var_gamma = malloc(sizeof(double*)*(corpus->num_docs));
	for (d = 0; d < corpus->num_docs; d++)
		var_gamma[d] = malloc(sizeof(double) * NTOPICS);

	int max_length = max_corpus_length(corpus);
	phi = malloc(sizeof(double*)*max_length);
	for (n = 0; n < max_length; n++)
		phi[n] = malloc(sizeof(double) * NTOPICS);

	// initialize model

	char filename[100];

	lda_suffstats* ss = NULL;
	if (strcmp(start, "seeded")==0) {
		model = new_lda_model(corpus->num_terms, NTOPICS);
		ss = new_lda_suffstats(model);
		corpus_initialize_ss(ss, model, corpus);
		if (VERBOSE) {
		    lda_mle(model, ss, 0);
	    } else {
            quiet_lda_mle(model, ss, 0);
	    }
		    
		model->alpha = INITIAL_ALPHA;
	} else if (strcmp(start, "random")==0) {
		model = new_lda_model(corpus->num_terms, NTOPICS);
		ss = new_lda_suffstats(model);
		random_initialize_ss(ss, model);
		if (VERBOSE) {
		    lda_mle(model, ss, 0);
	    } else {
	        quiet_lda_mle(model, ss, 0);
	    }
		model->alpha = INITIAL_ALPHA;
	} else {
		model = load_lda_model(start);
		ss = new_lda_suffstats(model);
	}

	sprintf(filename,"%s/000",directory);
	save_lda_model(model, filename);

	// run expectation maximization

	int i = 0;
	double likelihood, likelihood_old = 0, converged = 1;
	sprintf(filename, "%s/likelihood.dat", directory);
	FILE* likelihood_file = fopen(filename, "w");

	while (((converged < 0) || (converged > EM_CONVERGED) || (i <= 2)) && (i <= EM_MAX_ITER)) {
		i++;
		if (VERBOSE)
		    printf("**** em iteration %d ****\n", i);
		likelihood = 0;
		zero_initialize_ss(ss, model);

		// e-step

		for (d = 0; d < corpus->num_docs; d++) {
			if ((d % 1000) == 0 && VERBOSE) printf("document %d\n",d);
			likelihood += doc_e_step(&(corpus->docs[d]), var_gamma[d], phi, model, ss);
		}

		// m-step

        if (VERBOSE) {
            lda_mle(model, ss, ESTIMATE_ALPHA);
        } else {
            quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
        }
		

		// check for convergence

		converged = (likelihood_old - likelihood) / (likelihood_old);
		if (converged < 0) VAR_MAX_ITER = VAR_MAX_ITER * 2;
		likelihood_old = likelihood;

		// output model and likelihood

		fprintf(likelihood_file, "%10.10f\t%5.5e\n", likelihood, converged);
		fflush(likelihood_file);
		if ((i % LAG) == 0)
		{
			sprintf(filename,"%s/%03d",directory, i);
			save_lda_model(model, filename);
			sprintf(filename,"%s/%03d.gamma",directory, i);
			save_gamma(filename, var_gamma, corpus->num_docs, model->num_topics);
		}
	}

		// output the final model

	sprintf(filename,"%s/final",directory);
	save_lda_model(model, filename);
	sprintf(filename,"%s/final.gamma",directory);
	save_gamma(filename, var_gamma, corpus->num_docs, model->num_topics);

		// output the word assignments (for visualization)

	sprintf(filename, "%s/word-assignments.dat", directory);
	FILE* w_asgn_file = fopen(filename, "w");
	for (d = 0; d < corpus->num_docs; d++)
	{
		if ((d % 100) == 0 && VERBOSE) printf("final e step document %d\n",d);
		likelihood += lda_inference(&(corpus->docs[d]), model, var_gamma[d], phi);
		write_word_assignment(w_asgn_file, &(corpus->docs[d]), phi, model);
	}
	fclose(w_asgn_file);
	fclose(likelihood_file);
}


/*
 * read settings.
 */

void read_settings(char* filename) {
	FILE* fileptr;
	char alpha_action[100];
	fileptr = fopen(filename, "r");
	fscanf(fileptr, "var max iter %d\n", &VAR_MAX_ITER);
	fscanf(fileptr, "var convergence %f\n", &VAR_CONVERGED);
	fscanf(fileptr, "em max iter %d\n", &EM_MAX_ITER);
	fscanf(fileptr, "em convergence %f\n", &EM_CONVERGED);
	fscanf(fileptr, "alpha %s", alpha_action);
	if (strcmp(alpha_action, "fixed")==0)
	{
		ESTIMATE_ALPHA = 0;
	}
	else
	{
		ESTIMATE_ALPHA = 1;
	}
	fclose(fileptr);
}


/*
* inference only
	*
*/

void infer(char* model_root, char* save, corpus* corpus) {
	FILE* fileptr;
	char filename[100];
	int i, d, n;
	lda_model *model;
	double **var_gamma, likelihood, **phi;
	document* doc;

	model = load_lda_model(model_root);
	var_gamma = malloc(sizeof(double*)*(corpus->num_docs));
	for (i = 0; i < corpus->num_docs; i++)
		var_gamma[i] = malloc(sizeof(double)*model->num_topics);
	sprintf(filename, "%s-lda-lhood.dat", save);
	fileptr = fopen(filename, "w");
	for (d = 0; d < corpus->num_docs; d++) {
		if (((d % 100) == 0) && (d>0) && VERBOSE) printf("document %d\n",d);

		doc = &(corpus->docs[d]);
		phi = (double**) malloc(sizeof(double*) * doc->length);
		for (n = 0; n < doc->length; n++)
			phi[n] = (double*) malloc(sizeof(double) * model->num_topics);
		likelihood = lda_inference(doc, model, var_gamma[d], phi);

		fprintf(fileptr, "%5.5f\n", likelihood);
	}
	fclose(fileptr);
	sprintf(filename, "%s-gamma.dat", save);
	save_gamma(filename, var_gamma, corpus->num_docs, model->num_topics);
}


/*
 * update sufficient statistics
 *
 */


/*
 * main
 *
 */

int main(int argc, char* argv[]) {
	corpus* corpus;

	long t1;
	(void) time(&t1);
	seedMT(t1);
		// seedMT(4357U);

	if (argc > 1)
	{
		if (strcmp(argv[1], "est")==0)
		{
			INITIAL_ALPHA = atof(argv[2]);
			NTOPICS = atoi(argv[3]);
			read_settings(argv[4]);
			corpus = read_data(argv[5]);
			make_directory(argv[7]);
			run_em(argv[6], argv[7], corpus);
		}
		if (strcmp(argv[1], "inf")==0)
		{
			read_settings(argv[2]);
			corpus = read_data(argv[4]);
			infer(argv[3], argv[5], corpus);
		}
	}
	else
	{
		printf("usage : lda est [initial alpha] [k] [settings] [data] [random/seeded/*] [directory]\n");
		printf("        lda inf [settings] [model] [data] [name]\n");
	}
	return(0);
}

#ifdef USE_RUBY

/* */
void run_quiet_em(char* start, corpus* corpus) {
	int d, n;
	lda_model *model = NULL;
	double **var_gamma, **phi;

	// allocate variational parameters

	var_gamma = malloc(sizeof(double*)*(corpus->num_docs));
	for (d = 0; d < corpus->num_docs; d++)
		var_gamma[d] = malloc(sizeof(double) * NTOPICS);

	int max_length = max_corpus_length(corpus);
	phi = malloc(sizeof(double*)*max_length);
	for (n = 0; n < max_length; n++)
		phi[n] = malloc(sizeof(double) * NTOPICS);

	// initialize model

	lda_suffstats* ss = NULL;
	if (strcmp(start, "seeded")==0) {
		model = new_lda_model(corpus->num_terms, NTOPICS);
		ss = new_lda_suffstats(model);
		corpus_initialize_ss(ss, model, corpus);
		if (VERBOSE) {
		    lda_mle(model, ss, 0);
		} else {
		    quiet_lda_mle(model, ss, 0);
		}
		model->alpha = INITIAL_ALPHA;
	} else if (strcmp(start, "random")==0) {
		model = new_lda_model(corpus->num_terms, NTOPICS);
		ss = new_lda_suffstats(model);
		random_initialize_ss(ss, model);
		if (VERBOSE) {
		    lda_mle(model, ss, 0);
		} else {
		    quiet_lda_mle(model, ss, 0);
		}
		model->alpha = INITIAL_ALPHA;
	} else {
		model = load_lda_model(start);
		ss = new_lda_suffstats(model);
	}

	// save the model in the last_model global
	last_model = model;
	model_loaded = TRUE;

	// run expectation maximization

	int i = 0;
	double likelihood, likelihood_old = 0, converged = 1;

	while (((converged < 0) || (converged > EM_CONVERGED) || (i <= 2)) && (i <= EM_MAX_ITER)) {
		i++;
		if (VERBOSE)
		    printf("**** em iteration %d ****\n", i);
		likelihood = 0;
		zero_initialize_ss(ss, model);

		// e-step

		for (d = 0; d < corpus->num_docs; d++) {
			if ((d % 1000) == 0 && VERBOSE) printf("document %d\n",d);
			likelihood += doc_e_step(&(corpus->docs[d]), var_gamma[d], phi, model, ss);
		}

		// m-step

        if (VERBOSE) {
            lda_mle(model, ss, ESTIMATE_ALPHA);
        } else {
            quiet_lda_mle(model, ss, ESTIMATE_ALPHA);
        }

		// check for convergence

		converged = (likelihood_old - likelihood) / (likelihood_old);
		if (converged < 0) VAR_MAX_ITER = VAR_MAX_ITER * 2;
		likelihood_old = likelihood;

		// store model and likelihood

		last_model = model;
		last_gamma = var_gamma;
	}

	// output the final model

	last_model = model;
	last_gamma = var_gamma;

	// output the word assignments (for visualization)
	/*
	char filename[100];
	sprintf(filename, "%s/word-assignments.dat", directory);
	FILE* w_asgn_file = fopen(filename, "w");
	for (d = 0; d < corpus->num_docs; d++) {
		if ((d % 100) == 0)
			printf("final e step document %d\n",d);
		likelihood += lda_inference(&(corpus->docs[d]), model, var_gamma[d], phi);
		write_word_assignment(w_asgn_file, &(corpus->docs[d]), phi, model);
	}
	fclose(w_asgn_file);
	*/
}


/*
 * Set all of the settings in one command:
 * 
 *  * init_alpha
 *  * num_topics
 *  * max_iter
 *  * convergence
 *  * em_max_iter
 *  * em_convergence
 *  * est_alpha
 */
static VALUE wrap_set_config(VALUE self, VALUE init_alpha, VALUE num_topics, VALUE max_iter, VALUE convergence, VALUE em_max_iter, VALUE em_convergence, VALUE est_alpha) {
	INITIAL_ALPHA = NUM2DBL(init_alpha);
	NTOPICS = NUM2INT(num_topics);
	VAR_MAX_ITER = NUM2INT(max_iter);
	VAR_CONVERGED = (float)NUM2DBL(convergence);
	EM_MAX_ITER = NUM2INT(em_max_iter);
	EM_CONVERGED = (float)NUM2DBL(em_convergence);
	ESTIMATE_ALPHA = NUM2INT(est_alpha);

	return Qtrue;
}

/*
 * Get the maximum iterations.
 */
static VALUE wrap_get_max_iter(VALUE self) {
	return rb_int_new(VAR_MAX_ITER);
}

/*
 * Set the maximum iterations.
 */
static VALUE wrap_set_max_iter(VALUE self, VALUE max_iter) {
	VAR_MAX_ITER = NUM2INT(max_iter);

	return max_iter;
}

/*
 * Get the convergence setting.
 */
static VALUE wrap_get_converged(VALUE self) {
	return rb_float_new(VAR_CONVERGED);
}

/*
 * Set the convergence setting.
 */
static VALUE wrap_set_converged(VALUE self, VALUE converged) {
	VAR_CONVERGED = (float)NUM2DBL(converged);

	return converged;
}

/*
 * Get the max iterations for the EM algorithm.
 */
static VALUE wrap_get_em_max_iter(VALUE self) {
	return rb_int_new(EM_MAX_ITER);
}

/*
 * Set the max iterations for the EM algorithm.
 */
static VALUE wrap_set_em_max_iter(VALUE self, VALUE em_max_iter) {
	EM_MAX_ITER = NUM2INT(em_max_iter);

	return em_max_iter;
}

/*
 * Get the convergence value for EM.
 */
static VALUE wrap_get_em_converged(VALUE self) {
	return rb_float_new(EM_CONVERGED);
}

/*
 * Set the convergence value for EM.
 */
static VALUE wrap_set_em_converged(VALUE self, VALUE em_converged) {
	EM_CONVERGED = (float)NUM2DBL(em_converged);

	return em_converged;
}

/*
 * Get the initial alpha value.
 */
static VALUE wrap_get_initial_alpha(VALUE self) {
	return rb_float_new(INITIAL_ALPHA);
}

/*
 * Get the number of topics being clustered.
 */
static VALUE wrap_get_num_topics(VALUE self) {
	return rb_int_new(NTOPICS);
}

/*
 * Set the initial value of alpha.
 */
static VALUE wrap_set_initial_alpha(VALUE self, VALUE initial_alpha) {
	INITIAL_ALPHA = (float)NUM2DBL(initial_alpha);
	
	return initial_alpha;
}

/*
 * Set the number of topics to be clustered.
 */
static VALUE wrap_set_num_topics(VALUE self, VALUE ntopics) {
	NTOPICS = NUM2INT(ntopics);
	
	return ntopics;
}

/*
 * Get the estimate alpha value (fixed = 0).
 */
static VALUE wrap_get_estimate_alpha(VALUE self) {
	return rb_int_new(ESTIMATE_ALPHA);
}

/*
 * Set the estimate alpha value (fixed = 0).
 */
static VALUE wrap_set_estimate_alpha(VALUE self, VALUE est_alpha) {
	ESTIMATE_ALPHA = NUM2INT(est_alpha);
	
	return est_alpha;
}

/*
 * Get the verbosity setting.
 */
static VALUE wrap_get_verbosity(VALUE self) {
    if (VERBOSE) {
        return Qtrue;
    } else {
        return Qfalse;
    }
}


/*
 * Set the verbosity level (true, false).
 */
static VALUE wrap_set_verbosity(VALUE self, VALUE verbosity) {
    if (verbosity == Qtrue) {
        VERBOSE = TRUE;
    } else {
        VERBOSE = FALSE;
    }
    
    return verbosity;
}


/*
 * Run the EM algorithm with the loaded corpus and using the current
 * configuration settings.  The +start+ parameter can take the following
 * values:
 *  * random - starting alpha are randomized
 *  * seeded - loaded based on the corpus values
 *  * <filename> - path to the file containing the model
 */
static VALUE wrap_em(VALUE self, VALUE start) {
	if (!corpus_loaded)
		return Qnil;
	
	run_quiet_em(STR2CSTR(start), last_corpus);

	return Qnil;
}


/*
 * Load settings from the given file.
 */
static VALUE wrap_load_settings(VALUE self, VALUE settings_file) {
	read_settings(STR2CSTR(settings_file));

	return Qtrue;
}

/*
 * Load the corpus from the given file.  This will not create
 * a +Corpus+ object that is accessible, but it will load the corpus
 * much faster.
 */
static VALUE wrap_load_corpus(VALUE self, VALUE filename) {
	if (!corpus_loaded) {
		last_corpus = read_data(STR2CSTR(filename));
		corpus_loaded = TRUE;
		return Qtrue;
	} else {
		return Qtrue;
	}
}

/*
 * Set the corpus.
 */
static VALUE wrap_ruby_corpus(VALUE self, VALUE rcorpus) {
	corpus* c;
	int i = 0;
	int j = 0;
	
	c = malloc(sizeof(corpus));
	c->num_terms = NUM2INT(rb_iv_get(rcorpus, "@num_terms"));
	c->num_docs = NUM2INT(rb_iv_get(rcorpus, "@num_docs"));
	c->docs = (document*) malloc(sizeof(document) * c->num_docs);
	VALUE doc_ary = rb_iv_get(rcorpus, "@documents");
	for (i = 0; i < c->num_docs; i++) {
		VALUE one_doc = rb_ary_entry(doc_ary, i);
		VALUE words = rb_iv_get(one_doc, "@words");
		VALUE counts = rb_iv_get(one_doc, "@counts");
		
		c->docs[i].length = NUM2INT(rb_iv_get(one_doc, "@length"));
		c->docs[i].total = NUM2INT(rb_iv_get(one_doc, "@total"));
		c->docs[i].words = malloc(sizeof(int) * c->docs[i].length);
		c->docs[i].counts = malloc(sizeof(int) * c->docs[i].length);
		for (j = 0; j < c->docs[i].length; j++) {
			VALUE one_word = NUM2INT(rb_ary_entry(words, j));
			VALUE one_count = NUM2INT(rb_ary_entry(counts, j));
			c->docs[i].words[j] = one_word;
			c->docs[i].counts[j] = one_count;
		}
	}
	
	last_corpus = c;
	corpus_loaded = TRUE;
	
	rb_iv_set(self, "@corpus", rcorpus);
	
	return Qtrue;
}


/*
 * Get the gamma values after the model has been run.
 */
static VALUE wrap_get_gamma(VALUE self) {
	if (!model_loaded)
		return Qnil;
	
	// last_gamma is a double[num_docs][num_topics]
	VALUE arr;
	int i = 0, j = 0;
	
	arr = rb_ary_new2(last_corpus->num_docs);
	for (i = 0; i < last_corpus->num_docs; i++) {
		VALUE arr2 = rb_ary_new2(last_model->num_topics);
		for (j = 0; j < last_model->num_topics; j++) {
			rb_ary_store(arr2, j, rb_float_new(last_gamma[i][j]));
		}
		rb_ary_store(arr, i, arr2);
	}
	
	return arr;
}

/*
 * Get the beta matrix after the model has been run.
 */
static VALUE wrap_get_model_beta(VALUE self) {
	if (!model_loaded)
		return Qnil;
		
	// beta is a double[num_topics][num_terms]
	VALUE arr;
	int i = 0, j = 0;
	
	arr = rb_ary_new2(last_model->num_topics);
	for (i = 0; i < last_model->num_topics; i++) {
		VALUE arr2 = rb_ary_new2(last_model->num_terms);
		for (j = 0; j < last_model->num_terms; j++) {
			rb_ary_store(arr2, j, rb_float_new(last_model->log_prob_w[i][j]));
		}
		rb_ary_store(arr, i, arr2);
	}
	
	return arr;
}


/*
 * Get the settings used for the model.
 */
static VALUE wrap_get_model_settings(VALUE self) {
	if (!model_loaded)
		return Qnil;

	VALUE arr;
	
	arr = rb_ary_new();
	rb_ary_push(arr, rb_int_new(last_model->num_topics));
	rb_ary_push(arr, rb_int_new(last_model->num_terms));
	rb_ary_push(arr, rb_float_new(last_model->alpha));
	
	return arr;		//	[num_topics, num_terms, alpha]
}


void Init_lda_ext() {
	corpus_loaded = FALSE;
	model_loaded = FALSE;
    VERBOSE = TRUE;
	
	rb_require("lda");
	
	/*
	 * The Latent Dirichlet Allocation algorithm by Blei et al (2003).  Ruby wrapper based on 
	 * lda-c code by David Blei (available at http://www.cs.princeton.edu/~blei/lda-c).
	 */
	rb_mLda = rb_define_module("Lda");
	
	/*
	 * Class that handles most of the functionality of LDA.
	 */
	rb_cLda = rb_define_class_under(rb_mLda, "Lda", rb_cObject);
	
	/*
	 * Class that represents a corpus.
	 */
	rb_cLdaCorpus = rb_define_class_under(rb_mLda, "Corpus", rb_cObject);
	
	/*
	 * Class that represents a single document.
	 */
	rb_cLdaDocument = rb_define_class_under(rb_mLda, "Document", rb_cObject);
	
	
	// method to load the corpus
	rb_define_method(rb_cLda, "fast_load_corpus_from_file", wrap_load_corpus, 1);
	rb_define_method(rb_cLda, "corpus=", wrap_ruby_corpus, 1);
	
	// method to run em
	rb_define_method(rb_cLda, "em", wrap_em, 1);

	// method to load settings from file
	rb_define_method(rb_cLda, "load_settings", wrap_load_settings, 1);

	// method to set all the config options at once
	rb_define_method(rb_cLda, "set_config", wrap_set_config, 5);

	// accessor stuff for main settings
	rb_define_method(rb_cLda, "max_iter", wrap_get_max_iter, 0);
	rb_define_method(rb_cLda, "max_iter=", wrap_set_max_iter, 1);
	rb_define_method(rb_cLda, "convergence", wrap_get_converged, 0);
	rb_define_method(rb_cLda, "convergence=", wrap_set_converged, 1);
	rb_define_method(rb_cLda, "em_max_iter", wrap_get_em_max_iter, 0);
	rb_define_method(rb_cLda, "em_max_iter=", wrap_set_em_max_iter, 1);
	rb_define_method(rb_cLda, "em_convergence", wrap_get_em_converged, 0);
	rb_define_method(rb_cLda, "em_convergence=", wrap_set_em_converged, 1);	
	rb_define_method(rb_cLda, "init_alpha=", wrap_set_initial_alpha, 1);
	rb_define_method(rb_cLda, "init_alpha", wrap_get_initial_alpha, 0);
	rb_define_method(rb_cLda, "est_alpha=", wrap_set_estimate_alpha, 1);
	rb_define_method(rb_cLda, "est_alpha", wrap_get_estimate_alpha, 0);
	rb_define_method(rb_cLda, "num_topics", wrap_get_num_topics, 0);
	rb_define_method(rb_cLda, "num_topics=", wrap_set_num_topics, 1);
    rb_define_method(rb_cLda, "verbose", wrap_get_verbosity, 0);
    rb_define_method(rb_cLda, "verbose=", wrap_set_verbosity, 1);
	
	// retrieve model and gamma
	rb_define_method(rb_cLda, "beta", wrap_get_model_beta, 0);
	rb_define_method(rb_cLda, "gamma", wrap_get_gamma, 0);
	rb_define_method(rb_cLda, "model", wrap_get_model_settings, 0);
}

#endif