/* experiment.c - read the experiment control files
 *
 * Copyright (C) 2006  Jochen Voss, Andreas Voss.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301 USA.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "fast-dm.h"


#define  FNAME_MAX_LEN  1024

const char *default_log_template = "%03d.log";


/**********************************************************************
 * auxiliary functions
 */

static int
string_to_int (const char *str, long int *res)
/* return 1 on success and 0 on error */
{
	char *tail;
	long int  x;

	x = strtol (str, &tail, 0);
	if (tail == str)  return 0;
	if (*tail != '\0')  return 0;
	*res = x;
	return 1;
}

static int
string_to_double (const char *str, double *res)
/* return 1 on success and 0 on error */
{
	char *tail;
	double  x;

	x = strtod (str, &tail);
	if (tail == str)  return 0;
	if (*tail != '\0')  return 0;
	*res = x;
	return 1;
}

static void
check_one_template (const char *tmpl)
{
	char  buffer1[FNAME_MAX_LEN], buffer2[FNAME_MAX_LEN];
	int  error = 0;
	int  rc;

	rc = snprintf (buffer1, FNAME_MAX_LEN, tmpl, 1);
	if (rc<0) {
		error = 1;
	} else {
		/* check for "%%.dat" etc. */
		snprintf (buffer2, FNAME_MAX_LEN, tmpl, 2);
		if (strcmp (buffer1, buffer2) == 0)  error = 1;
	}
	if (error) {
		fprintf (stderr,
			 "invalid format string '%s', aborting\n",
			 tmpl);
		exit(1);
	}
}

static void
check_templates (const char *template, const char *log_template)
{
	int  has_perc, has_log_perc;

	has_perc = (strchr(template, '%') != NULL);
	has_log_perc = log_template ? (strchr(log_template, '%') != NULL) : 0;

	if (has_perc)  check_one_template (template);
	if (has_log_perc)  check_one_template (log_template);

	if (has_perc && has_log_perc)  return;
	if (has_perc && ! log_template)  return;
	if (! has_perc && ! has_log_perc)  return;

	fprintf (stderr,
		 "incompatible format strings '%s' and '%s', aborting\n",
		 template, log_template);
	exit(1);
}

static void
str_add (char **s1, const char *s2, int *used, int *alloc)
{
	int  l = strlen(s2);

	if (*used+l+1 >= *alloc) {
		*alloc += 64;
		*s1 = xrenew(char, *s1, *alloc);
	}
	memcpy (*s1+*used, s2, l);
	*used += l;
	(*s1)[*used] = '\0';
}

/**********************************************************************
 * information about one experiment
 */

struct param_info {
	const char *name;
	int  idx;
	struct set *depends;
	char *value;
};

static const  struct param_info  default_params[] = {
	{ "a", p_a, NULL, NULL},
	{ "z", -1, NULL, NULL},
	{ "v", p_v, NULL, NULL},
	{ "t0", p_t0, NULL, NULL},
	{ "sz", p_sz, NULL, NULL},
	{ "sv", p_sv, NULL, NULL},
	{ "st0", p_st0, NULL, NULL},
	{ NULL, 0, NULL, NULL}
};

struct experiment {
	char *fname;

	/* precision for computing the CDF */
	double  precision;

	/* model parameters */
	struct param_info *params;

	/* experimental conditions */
	struct set *cond;

	/* data files */
	char *data_template;
	int nf;
	struct fi {
		enum fi_type { t_time, t_response, t_ignore, t_cond } type;
		int cond_idx;
	} *fi;

	/* per data-set log files */
	char *log_template;

	/* common log file */
	char *save_file_name;
	struct array *save_fields;
};

struct experiment *
new_experiment (const char *fname)
/* Read information for a new experiment from the input file 'fname'
 * and return it as a structure.  The format of experiment control
 * files is described in the users' guide (file "MANUAL" of the source
 * code distribution)
 */
{
	struct file *file;
	struct experiment *ex;
	struct param_info *params;
	const char *const *tokens;
	int depends_seen = 0;
	int format_seen = 0;
	int i, j, n;

	file = new_file (fname);
	if (!file)
		return NULL;

	ex = xnew(struct experiment, 1);
	ex->fname = xstrdup(fname);
	ex->precision = 3;
	ex->params = params = xmalloc(sizeof (default_params));
	memcpy (params, default_params, sizeof (default_params));
	for (i = 0; params[i].name; ++i) {
		params[i].depends = new_set ();
	}
	ex->cond = new_set();
	ex->data_template = NULL;
	ex->log_template = NULL;
	ex->save_file_name = NULL;
	ex->save_fields = NULL;

	while (file_read (file, &tokens, &n)) {
		if (strcmp (tokens[0], "set") == 0) {
			if (depends_seen)
				file_error (file, "'set' after 'depends'");
			if (n != 3)
				file_error (file, "syntax error");

			for (i = 0; params[i].name; ++i) {
				if (strcmp (params[i].name, tokens[1]) == 0)
					break;
			}
			if (!params[i].name)
				file_error (file, "invalid parameter '%s'",
					    tokens[1]);
			if (params[i].value)
				file_error (file, "parameter '%s' set twice",
					    tokens[1]);
			params[i].value = xstrdup(tokens[2]);
		} else if (strcmp (tokens[0], "depends") == 0) {
			if (format_seen)
				file_error (file, "'depends' after 'format'");
			if (n < 3)
				file_error (file, "not enough arguments");

			for (i = 0; params[i].name; ++i) {
				if (strcmp (params[i].name, tokens[1]) == 0)
					break;
			}
			if (!params[i].name)
				file_error (file, "invalid parameter '%s'",
					    tokens[1]);
			if (params[i].value)
				file_error (file, "parameter '%s' already set",
					    tokens[1]);
			for (j = 2; j < n; ++j) {
				if (strcmp(tokens[j],"RESPONSE") == 0
				    || strcmp(tokens[j],"TIME") == 0
				    || strcmp(tokens[j],"*") == 0) {
					file_error (file,
						    "invalid condition '%s'",
						    tokens[j]);
				}
				set_item (params[i].depends, tokens[j], 1);
				set_item (ex->cond, tokens[j], 1);
			}
		} else if (strcmp (tokens[0], "format") == 0) {
			struct set *seen = new_set ();

			if (ex->data_template)
				file_error (file, "'format' after 'load'");
			if (format_seen)
				file_error (file, "more than one 'format'");
			format_seen = 1;

			ex->nf = n-1;
			ex->fi = xnew(struct fi, ex->nf);
			for (i = 1; i < n; ++i) {
				const char *field = tokens[i];
				enum fi_type  ft = t_ignore;

				j = set_item (ex->cond, field, 0);
				if (strcmp (field, "TIME") == 0) {
					ft = t_time;
				} else if (strcmp (field, "RESPONSE") == 0) {
					ft = t_response;
				} else if (j>=0) {
					ft = t_cond;
				}
				ex->fi[i-1].type = ft;
				if (ft == t_ignore)  continue;
				if (set_item (seen, field, 0) >= 0)
					file_error (file, "duplicate field %s",
						    field);
				set_item (seen, field, 1);
				if (ft == t_cond)
					ex->fi[i-1].cond_idx = j;
			}
			if (set_item (seen, "TIME", 0) < 0)
				file_error (file, "missing field 'TIME'");
			if (set_item (seen, "RESPONSE", 0) < 0)
				file_error (file, "missing field 'RESPONSE'");
			for (i=0; i<ex->cond->used; ++i) {
				if (set_item (seen, ex->cond->item[i], 0) < 0)
					file_error (file,
						    "unknown condition '%s'",
						    ex->cond->item[i]);
			}
			delete_set (seen);
		} else if (strcmp (tokens[0], "load") == 0) {
			if (! format_seen)
				file_error (file,
					    "data format not specified");
			if (ex->data_template)
				file_error (file, "more than one 'load'");
			if (n != 2)
				file_error (file, "wrong number of arguments");
			ex->data_template = xstrdup(tokens[1]);
		} else if (strcmp (tokens[0], "log") == 0) {
			if (n != 2)
				file_error (file, "wrong number of arguments");
			ex->log_template = xstrdup(tokens[1]);
		} else if (strcmp (tokens[0], "save") == 0) {
			if (n != 2)
				file_error (file, "wrong number of arguments");
			ex->save_file_name = xstrdup(tokens[1]);
		} else if (strcmp (tokens[0], "precision") == 0) {
			if (n != 2)
				file_error (file, "wrong number of arguments");
			if (! string_to_double (tokens[1], &ex->precision)
			    || ex->precision < 2)
				file_error (file, "invalid precision '%s'",
					    tokens[1]);
		} else {
			file_error (file, "unknown command '%s'", tokens[0]);
		}
	}
	if (! ex->data_template)
		file_error (file, "missing 'load'");
	check_templates (ex->data_template, ex->log_template);

	delete_file (file);

	return ex;
}

void
delete_experiment (struct experiment *ex)
{
	int i;

	if (ex->save_fields)
		delete_array(ex->save_fields);
	xfree(ex->save_file_name);
	xfree(ex->log_template);
	xfree(ex->fi);
	xfree(ex->data_template);
	delete_set(ex->cond);
	for (i = 0; ex->params[i].name; ++i) {
		delete_set(ex->params[i].depends);
		xfree(ex->params[i].value);
	}
	xfree(ex->params);
	xfree(ex->fname);
	xfree(ex);
}

void
experiment_print (const struct experiment *ex)
/* Print all information about the experiment "ex" to stdout.
 *
 * In detail, the names and the format of the data files are printed.
 * Additionally, it is specified for each parameter of the diffusion
 * model whether it is being optimised, or whether it is fixed to a
 * given value, and--where necessary--on which condition-variables of
 * the data file it depends.
 */
{
	int  i, j, first;

	printf ("experiment %s:\n", ex->fname);

	printf ("  precision: %g\n", ex->precision);

	printf ("  format of %s:", ex->data_template);
	for (i=0; i<ex->nf; ++i) {
		switch (ex->fi[i].type) {
		case t_time:
			printf (" TIME");
			break;
		case t_response:
			printf (" RESPONSE");
			break;
		case t_ignore:
			printf (" *");
			break;
		case t_cond:
			printf (" %s", ex->cond->item[ex->fi[i].cond_idx]);
			break;
		}
	}
	putchar ('\n');

	first = 1;
	for (i=0; ex->params[i].name; ++i) {
		if (ex->params[i].value)  continue;
		if (first) {
			printf ("  optimised parameters: ");
			first = 0;
		} else {
			printf (", ");
		}
		printf ("%s", ex->params[i].name);
		for (j=0; j<ex->params[i].depends->used; ++j) {
			printf ("_%s",ex->params[i].depends->item[j]);
		}
	}
	if (! first)  putchar ('\n');

	first = 1;
	for (i=0; ex->params[i].name; ++i) {
		if (! ex->params[i].value)  continue;
		if (first) {
			printf ("  fixed parameters: ");
			first = 0;
		} else {
			printf (", ");
		}
		printf ("%s=%s", ex->params[i].name, ex->params[i].value);
	}
	if (! first)  putchar ('\n');
}

static struct file *
experiment_open_dataset (const struct experiment *ex, int i)
{
	char  buffer [FNAME_MAX_LEN];
	const char *fname;

	if (strchr(ex->data_template, '%')) {
		snprintf (buffer, FNAME_MAX_LEN, ex->data_template, i+1);
		fname = buffer;
	} else if (i == 0) {
		fname = ex->data_template;
	} else {
		return  NULL;
	}

	return  new_file (fname);
}

static char *
experiment_logfile_name (const struct experiment *ex, int i)
{
	char  buffer [FNAME_MAX_LEN];
	const char *fname;

	if (ex->log_template) {
		if (strchr(ex->log_template, '%')) {
			snprintf(buffer, FNAME_MAX_LEN, ex->log_template, i+1);
			fname = buffer;
		} else if (i == 0) {
			fname = ex->log_template;
		} else {
			return  NULL;
		}
	} else {
		snprintf (buffer, FNAME_MAX_LEN, default_log_template, i+1);
		fname = buffer;
	}

	return  xstrdup(fname);
}

static void
dataset_init_param (struct dataset *ds, struct param_info *param,
		    const struct dict *condv)
/* Add the necessary commands to 'ds' to initalise parameter 'param'.
 *
 * The dictionary 'condv' maps parameter names to experimental conditions
 * where required.
 */
{
	int  name_used, name_alloc;
	char *name;
	int  j;

	name_alloc = 64;
	name = xnew(char, name_alloc);
	name_used = 0;
	*name = '\0';

	str_add (&name, param->name, &name_used, &name_alloc);
	for (j=0; j<param->depends->used; ++j) {
		const char *cv;

		cv = dict_lookup (condv, param->depends->item[j]);
		str_add (&name, "_", &name_used, &name_alloc);
		str_add (&name, cv, &name_used, &name_alloc);
	}

	if (param->value) {
		double  x;
		if (! string_to_double (param->value, &x)) {
			fprintf (stderr, "invalid value '%s' for '%s'\n",
				 param->value, name);
			exit (1);
		}
		dataset_add_cmd (ds, c_copy_const, param->idx,
				 dataset_add_const (ds, x));
	} else {
		if (param->idx >= 0) {
			dataset_add_cmd (ds, c_copy_param, param->idx,
					 dataset_add_param (ds, name));
		} else {
			dataset_add_cmd (ds, c_copy_param, param->idx,
					 dataset_add_z (ds, name));
		}
	}
	xfree(name);
}

struct dataset *
experiment_get_dataset (const struct experiment *ex, int dsn)
/* Read dataset number 'dsn'.
 *
 * The file name of the data file is defined by the field "template"
 * in "ex", where "dsn" is inserted for the number of the data set
 * (e.g. "%d.dat" => "1.dat").  The data-set structure contains the
 * parameters to be optimised ('param'), values for the parameters
 * fixed to a given constant ('consts'), and arrays of responses and
 * reaction times ('samples').  If different experimental conditions
 * are defined for the experiment, different samples are allocated
 * within the returned structure.  If the data file does not fit to the
 * format specified in the 'ex' structure, the program is aborted.
 *
 * As a side-effect this function sets the computational precision for
 * computing the CDF.
 */
{
	struct dataset *ds;
	struct file *file;
	const char *const* words;
	struct dict *condv;
	int  sample_name_used, sample_name_alloc;
	char *sample_name;
	int  i, n;

	file = experiment_open_dataset (ex, dsn);
	if (! file)  return NULL;

	ds = new_dataset ();
	ds->fname = xstrdup(file_name (file));
	ds->logname = experiment_logfile_name (ex, dsn);

	condv = new_dict ();

	sample_name_alloc = 80;
	sample_name = xnew(char, sample_name_alloc);
	sample_name_used = 1;
	sample_name[0] = '\0';

	while (file_read (file, &words, &n)) {
		struct samples *samples;
		double  t;
		long int  resp;
		int  idx;

		/* analyse a line from the data file */
		if (n != ex->nf)
			file_error (file, "wrong number of fields");
		dict_clear (condv);
		for (i=0; i<n; ++i) {
			switch (ex->fi[i].type) {
			case t_time:
				if (! string_to_double(words[i], &t))
					file_error (file,
						    "invalid number '%s'",
						    words[i]);
				break;
			case t_response:
				if (! string_to_int(words[i], &resp)
				    || (resp != 0 && resp != 1))
					file_error (file,
						    "invalid response '%s'",
						    words[i]);
				break;
			case t_ignore:
				break;
			case t_cond:
				idx = ex->fi[i].cond_idx;
				dict_add (condv, ex->cond->item[idx],
					  words[i]);
				break;
			}
		}

		/* get the sample set name */
		sample_name_used = 0;
		sample_name[0] = '\0';
		str_add (&sample_name, ds->fname,
			 &sample_name_used, &sample_name_alloc);
		for (i=0; i<ex->cond->used; ++i) {
			const char *val;

			val = dict_lookup (condv, ex->cond->item[i]);
			str_add (&sample_name, "_",
				 &sample_name_used, &sample_name_alloc);
			str_add (&sample_name, val,
				 &sample_name_used, &sample_name_alloc);
		}

		/* create new sample sets as needed */
		idx = dataset_samples_idx (ds, sample_name, 0);
		if (idx < 0) {
			idx = dataset_samples_idx (ds, sample_name, 1);
			for (i=0; ex->params[i].name; ++i) {
				dataset_init_param (ds, ex->params+i, condv);
			}
			dataset_add_cmd (ds, c_run, idx, 0);
		}
		samples = ds->samples[idx];

		/* register the sample data */
		samples_add_sample (samples, t, resp);
	}
	delete_file(file);
	xfree(sample_name);
	delete_dict(condv);

	for (i=0; i<ds->samples_used; ++i)
		samples_sort (ds->samples[i]);

	set_precision (ex->precision);

	return  ds;
}

static int
file_find_headers (char *fname, struct array *fields)
/* Search through the file 'fname' for the last header line.
 *
 * The header fields are stored in the array 'fields'.
 *
 * Return '-1' if the file does not exist, '0' if no (correct) header is
 * found, and '1', if everything is ok.
 */
{
	struct file *save;
	const char *const* words;
	int  n, found;

	if (!(save = new_file(fname)))
		return -1;

	found = 0;
	while (file_read (save, &words, &n)) {
		int  i;
		
		if (strcmp(words[0], "sub") != 0 || n<2)  continue;

		found = 1;
		array_clear (fields);
		for (i=0; i<n; ++i)
			array_append (fields, words[i]);
	}
	delete_file(save);

	return  found;
}

static void
dataset_default_headers (const struct dataset *ds, struct array *fields)
{
	int i;

	array_clear(fields);
	array_append(fields, "sub");
	for (i=0; i<ds->z->used; ++i){
		array_append(fields, ds->z->entry[i]);
	}
	for (i=0; i<ds->param->used; ++i) {
		array_append(fields, ds->param->entry[i]);
	}
	array_append(fields, "fit");
	array_append(fields, "time");
}

static int
dataset_get_value (const struct dataset *ds, const char *name,
		   int sub, const double *param, const double *z,
		   double fit, double time, double *value_ret)
{
	int  i;
	
	if (strcmp(name, "sub") == 0) {
		*value_ret = sub;
		return  1;
	} else if (strcmp(name, "fit") == 0) {
		*value_ret = fit;
		return  1;
	} else if (strcmp(name, "time") ==0 ) {
		*value_ret = time;
		return  1;
	}

	for (i=0; i<ds->param->used; i++) {
		if (strcmp(name, ds->param->entry[i])==0) {
			*value_ret = param[i];
			return  1;
		}
	}
	
	for (i=0; i<ds->z->used; i++) {
		if (strcmp(name, ds->z->entry[i])==0) {
			*value_ret = z[i];
			return  1;
		}
	}

	return  0;
}

int
experiment_save (struct experiment *ex, const struct dataset *ds,
		 double *values, int sub, double  *z, double fit, double time)
/* Save the results of the parameter estimation to a global save file.
 *
 * If 'ex->save_file_name' is defined, values for all Parameters are
 * appended to the output file.  The function returns '1' if data has been
 * saved, '0' if the template is not defined, and '-1' if the file could
 * not be opend.
 */
{
	struct array *fields;
	int  needs_header_check = 1;
	int  needs_header_line = 0;
	FILE *fd;
	double  value;
	int  found, i;
	
	if (! ex->save_file_name)  return 0;

	if (! ex->save_fields) {
		ex->save_fields = new_array();
		if (! file_find_headers(ex->save_file_name,
					ex->save_fields)) {
			dataset_default_headers (ds, ex->save_fields);
			needs_header_check = 0;
			needs_header_line = 1;
		}
	}

	fields = ex->save_fields;

	if (needs_header_check) {
		int  bad_header = 0;
		struct array *target;

		target = new_array ();
		dataset_default_headers (ds, target);
		for (i=0; i<target->used; ++i) {
			if (array_find(fields,target->entry[i]) == -1) {
				bad_header = 1;
				break;
			}
		}

		if (bad_header) {
			delete_array (ex->save_fields);
			ex->save_fields = target;
			fields = target;
			needs_header_line = 1;
		} else {
			delete_array (target);
		}
	}
	
	fd = fopen (ex->save_file_name, "a");
	if (! fd)  return -1;
	if (needs_header_line) {
		int i;

		for (i=0; i<fields->used; ++i)
			fprintf (fd, " %7s", fields->entry[i]);
		fputc ('\n', fd);
	}

	for (i=0; i<fields->used; i++) {
		found = dataset_get_value (ds, fields->entry[i],
					   sub, values, z, fit, time, &value);
                if (found)
			fprintf (fd, " %7.3f", value);
		else
			fprintf (fd, " %7s", "-");
	}
	fputc ('\n', fd);

	fclose (fd);

	return  0;
}

/*
 * Local Variables:
 * c-file-style: "linux"
 * End:
 */
