/* import.c
 *
 * Copyright (C) 2005, 2007 Stephane Germain
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
   \file
   \brief Functions to read data files.
   \author Stephane Germain <germste@gmail.com>
*/

#include "libirt.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/**
   \brief The maximum allowed length of a line.
*/
#define MAX_LINE_LENGTH 2000

/**
   \brief remove the newline (and possibly carriage return).

   @param[in,out] line The line to chomp.

   \return The new length of the line.
*/
int
chomp (char *line)
{
  int length = strlen (line);
  if (line[length - 1] == '\n')
    line[length-- - 1] = 0;
  if (line[length - 1] == '\r')
    line[length-- - 1] = 0;
  return length;
}

/**
   \brief Count the number of line in \em file and then rewind.

   @param[in] file The file.

   \return The number of lines.
*/
int
count_lines (FILE * file)
{
  int nbr_line = 0;
  char line[MAX_LINE_LENGTH];

  while (fgets (line, MAX_LINE_LENGTH, file))
    {
      nbr_line++;
    }

  rewind (file);
  return nbr_line;
}

/**
   \brief Count the number of delimiter in the (\em skip+1) line
   of \em file and then rewind.

   @param[in] file The file.
   @param[in] skip The number of line to skip.
   @param[in] delimiter The delimiter.

   \return The number of delimiter in the line.
*/
int
count_fields (FILE * file, int skip, char delimiter)
{
  int nbr_field = 1, pos;
  char line[MAX_LINE_LENGTH];

  while (fgets (line, MAX_LINE_LENGTH, file) && skip > 0)
    {
      skip--;
    }

  pos = 0;
  while (line[pos])
    {
      if (line[pos] == delimiter)
	nbr_field++;
      pos++;
    }

  rewind (file);
  return nbr_field;
}

/**
   \brief remove the spaces in a string.

   @param[in,out] str The string to trim.

   \return The new length of the string.
*/
int
trim (char *str)
{
  int length = 0;
  char *ptr;

  ptr = str;
  while (*str)
    {
      if (*str != ' ')
	{
	  *ptr = *str;
	  ptr++;
	  length++;
	}
      str++;
    }
  *ptr = *str;
  return length;
}

/**
   \brief Convert the response string \em resp_str to integer.

   @param[in] resp_str The response string.
   @param[in] true_string The string representing a success.
   @param[in] false_string The string representing a failure or NULL.
   @param[in] blank_string The string representing a non response or NULL.

   \return If \em resp_str is \em true_str return TRUE
   else if \em resp_str is \em blank_str return BLANK
   else if \em resp_str is \em false_str return FALSE
   else if \em false_str is not NULL return BLANK
   else return FALSE.
*/
int
convert_delimited_bin (char *resp_str, char *true_string,
		       char *false_string, char *blank_string)
{
  if (true_string && strcmp (resp_str, true_string) == 0)
    return TRUE;
  if (blank_string && strcmp (resp_str, blank_string) == 0)
    return BLANK;
  if (false_string && strcmp (resp_str, false_string) == 0)
    return FALSE;
  if (false_string)
    return BLANK;
  else
    return FALSE;
}

/**
   \brief Read binary patterns from a file in delimited format.

   @param[in] file A open file descriptor with a rewind capability (not stdin).
   @param[in] nbr_item The number of item to read or 0 for autodetection.
   @param[in] nbr_subject The number of pattern to read or 0 for autodetection.
   @param[in] skip The number of line to skip at the start of the file.
   @param[in] delimiter The delimiter to use.
   @param[in] offset The number of field to skip at the start of each line.
   @param[in] extra The number of field between the items.
   @param[in] true_string The string representing a success.
   @param[in] false_string The string representing a failure.
   If NULL then anything not a success nor a non response will be assume to be a failure.
   @param[in] blank_string The string representing a non response.
   If NULL then anything not a success nor a failure will be assume to be a non response.
   A empty field is always assumed to be a non response.
   @param[in] trimming Enable the trimming of spaces.
   @param[out] patterns A matrix(patterns x items) of binary responses.
 
   \return 1 for success and 0 for failure.

   \warning This function allocate the memory for \em patterns.
*/
int
read_bin_patterns_delimited (FILE * file, int nbr_item,
			     int nbr_subject, int skip,
			     char delimiter, int offset, int extra,
			     char *true_string, char *false_string,
			     char *blank_string, int trimming,
			     gsl_matrix_int ** patterns)
{
  int nbr_line, i, j, k, resp, length, pos, nbr_ignored;
  char line[MAX_LINE_LENGTH], resp_str[MAX_LINE_LENGTH];

  nbr_line = count_lines (file);

  if (nbr_subject == 0)
    nbr_subject = nbr_line - skip;

  if (nbr_item == 0)
    nbr_item = (count_fields (file, skip, delimiter) - offset) / (1 + extra);

  if (nbr_subject < 1)
    {
      if (libirt_verbose > 0)
	printf ("Error : not enought patterns found.\n");
      return 0;
    }

  if (nbr_item < 1)
    {
      if (libirt_verbose > 0)
	printf ("Error : not enought items found.\n");
      return 0;
    }

  *patterns = gsl_matrix_int_alloc (nbr_subject, nbr_item);

  while (skip > 0 && fgets (line, MAX_LINE_LENGTH, file))
    {
      skip--;
    }

  j = 0;
  while (fgets (line, MAX_LINE_LENGTH, file) && j < nbr_subject)
    {
      length = chomp (line);

      pos = 0;
      nbr_ignored = 0;
      while (pos < length && nbr_ignored < offset)
	{
	  if (line[pos] == delimiter)
	    nbr_ignored++;
	  pos++;
	}
      for (i = 0; i < nbr_item; i++)
	{
	  if (pos < length)
	    {
	      k = 0;
	      while (pos < length && line[pos] != delimiter)
		{
		  resp_str[k] = line[pos];
		  k++;
		  pos++;
		}
	      resp_str[k] = 0;
	      if (trimming)
		trim (resp_str);
	      resp =
		convert_delimited_bin (resp_str,
				       true_string ? true_string : "1",
				       (!false_string
					&& !blank_string) ? "0" :
				       false_string, blank_string);
	    }
	  else
	    {
	      if (libirt_verbose > 0)
		printf
		  ("Warning : not enought characters at line %d item %d.\n",
		   j + 1, i + 1);
	      resp = BLANK;
	    }
	  gsl_matrix_int_set (*patterns, j, i, resp);
	  pos++;
	}
      j++;
    }

  return 1;
}

/**
   \brief Convert the multiple choice response string \em resp_str to integer.

   @param[in] resp_str The response string.

   \return If \em resp_str is a positive integer return this integer
   else return BLANK.
*/
int
convert_delimited_mc (char *resp_str)
{
  int result;
  char *ptr;

  result = strtol(resp_str, &ptr, 10);

  if(*resp_str && !*ptr && result > 0) {
    /* the conversion was successful */
    return result;
  } else {
    return BLANK;
  }
}

/**
   \brief Read multiple choice patterns from a file in delimited format.

   The responses are assumed to be positive integer, anything else
   will be assumed to be a non response.

   @param[in] file A open file descriptor with a rewind capability (not stdin).
   @param[in] nbr_item The number of item to read or 0 for autodetection.
   @param[in] nbr_subject The number of pattern to read or 0 for autodetection.
   @param[in] skip The number of line to skip at the start of the file.
   @param[in] delimiter The delimiter to use.
   @param[in] offset The number of field to skip at the start of each line.
   @param[in] extra The number of field to ignore after each items.
   @param[in] trimming Enable the trimming of spaces.
   @param[out] patterns A matrix(patterns x items) of mc responses.
   @param[out] nbr_options A vector(items) with the number of option of each item
   in patterns (and probs).
 
   \return 1 for success and 0 for failure.

   \warning This function allocate the memory for \em patterns and \em nbr_options.
*/
int
read_mc_patterns_delimited (FILE * file, int nbr_item,
			    int nbr_subject, int skip,
			    char delimiter, int offset, int extra,
			    int trimming,
			    gsl_matrix_int ** patterns, gsl_vector_int ** nbr_options)
{
  int nbr_line, i, j, k, resp, length, pos, nbr_ignored;
  char line[MAX_LINE_LENGTH], resp_str[MAX_LINE_LENGTH];

  nbr_line = count_lines (file);

  if (nbr_subject == 0)
    nbr_subject = nbr_line - skip;

  if (nbr_item == 0)
    nbr_item = (count_fields (file, skip, delimiter) - offset) / (1 + extra);

  if (nbr_subject < 1)
    {
      if (libirt_verbose > 0)
	printf ("Error : not enought patterns found.\n");
      return 0;
    }

  if (nbr_item < 1)
    {
      if (libirt_verbose > 0)
	printf ("Error : not enought items found.\n");
      return 0;
    }

  *patterns = gsl_matrix_int_alloc (nbr_subject, nbr_item);
  *nbr_options = gsl_vector_int_alloc (nbr_item);
  gsl_vector_int_set_all (*nbr_options, 0);

  while (skip > 0 && fgets (line, MAX_LINE_LENGTH, file))
    {
      skip--;
    }

  j = 0;
  while (fgets (line, MAX_LINE_LENGTH, file) && j < nbr_subject)
    {
      length = chomp (line);

      pos = 0;
      nbr_ignored = 0;
      while (pos < length && nbr_ignored < offset)
	{
	  if (line[pos] == delimiter)
	    nbr_ignored++;
	  pos++;
	}
      for (i = 0; i < nbr_item; i++)
	{
	  if (pos < length)
	    {
	      k = 0;
	      while (pos < length && line[pos] != delimiter)
		{
		  resp_str[k] = line[pos];
		  k++;
		  pos++;
		}
	      resp_str[k] = 0;
	      if (trimming)
		trim (resp_str);
	      resp =
		convert_delimited_mc (resp_str);
	    }
	  else
	    {
	      if (libirt_verbose > 0)
		printf
		  ("Warning : not enought characters at line %d item %d.\n",
		   j + 1, i + 1);
	      resp = BLANK;
	    }
	  gsl_matrix_int_set (*patterns, j, i, resp);
	  if(resp > gsl_vector_int_get(*nbr_options, i))
	    gsl_vector_int_set(*nbr_options, i, resp);
	  pos++;
	}
      j++;
    }

  return 1;
}

#undef MAX_LINE_LENGTH
