/*
 *  Program: extract -- a Portable Game Notation (PGN) extractor.
 *  Copyright (C) 1994 David Barnes
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 1, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *  David Barnes may be contacted as D.J.Barnes@ukc.ac.uk
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "bool.h"
#include "defs.h"
#include "typedef.h"
#include "lists.h"
#include "taglist.h"

	/* Definitions for maintaining arrays of tag strings.
	 * These arrays are used for various purposes:
	 *	lists of white/black players to extract on.
	 *	lists of other criteria to extract on.
	 */
typedef struct {
    /* How many elements we have currently allocated for.
     * If this is > 0 then we should always allocated exactly
     * one more than this number to simplify the (char **)NULL
     * termination of the list.
     */
    unsigned num_allocated_elements;
    /* num_used_elements should always be <= num_allocated elements. */
    unsigned num_used_elements;
    /* The list of elements.
     * Elements 0 .. num_used_elements point to null-terminated strings.
     * list[num_used_elements] == (char **)NULL once the list is complete.
     */
    char **tag_strings;
} StringArray;

	/* Functions to allow creation of string lists. */
/* Allow a string list for every known tag.
 * It is important that these lists should be initialised to
 * 	{ 0, 0, (char **)NULL }
 * which happens by default, anyway.
 */
static StringArray TagLists[NUM_TAGS];

static char *soundex(const char *str);
static Boolean CheckList(int tag,const char *tag_string,StringArray *list);

	/* Add str to the list of strings in list.
	 * List may be a new list, in which case space is allocated
	 * for it.
	 * Return TRUE on success, FALSE on failure.
	 */
static Boolean
AddToList(const char *str,StringArray *list)
{   Boolean everything_ok = TRUE;

    if(list->num_allocated_elements == list->num_used_elements){
	/* We need more space. */
	if(list->num_allocated_elements == 0){
	    /* No elements in the list. */
	    list->tag_strings = (char **)malloc((INIT_LIST_SPACE+1)*sizeof(char *));
	    if(list->tag_strings != NULL){
		list->num_allocated_elements = INIT_LIST_SPACE;
		list->num_used_elements = 0;
	    }
	    else{
		everything_ok = FALSE;
	    }
	}
	else{
	    list->tag_strings = (char **)realloc((void *)list->tag_strings,
				(list->num_allocated_elements+MORE_LIST_SPACE+1)*
							sizeof(char *));
	    if(list->tag_strings != NULL){
		list->num_allocated_elements += MORE_LIST_SPACE;
	    }
	    else{
		everything_ok = FALSE;
	    }
	}
    }
    if(everything_ok){
	/* There is space. */
	unsigned index = list->num_used_elements;

	list->tag_strings[index] = (char *) malloc(strlen(str)+1);
	if(list->tag_strings[index] != NULL){
	    strcpy(list->tag_strings[index],str);
	    list->num_used_elements++;
	    /* Make sure that the list is properly terminated at all times. */
	    list->tag_strings[index+1] = NULL;
	}
	else{
	    everything_ok = FALSE;
	}
    }
    return everything_ok;
}

    /* Simple soundex code supplied by 1994 John Brogan
     * (jwbrogan@unix2.netaxs.com), 26th Aug 1994.
     * John writes:
     * "In recognition of the large number of strong players from countries
     * with Slavic based languages, I tried to tailor the soundex code
     * to match any reasonable transliteration of a Slavic name into
     * English.  Thus, Nimzovich will match Nimsowitsch, Tal will match
     * Talj, etc.  Unfortunately, in order to be sure not to miss any
     * valid matches, I had to make the code so tolerant that it sometimes
     * comes up with some wildly false matches.  This, to me, is better
     * than missing some games, but your mileage may differ."
     *
     * This looks like it was originally derived from the public domain
     * version released by N. Dean Pentcheff, 1989, which was, itself,
     * based on that in D.E. Knuth's "The art of computer programming.",
     * Volume 3: Sorting and searching.  Addison-Wesley Publishing Company:
     * Reading, Mass. Page 392.
     * Amended by David Barnes, 2nd Sep 1994.
     */

/* Define a maximum length for the soundex result. */
#define MAXSOUNDEX 50

	/* Calculate a soundex string for instr.
	 * The space used is statically allocated, so the caller
	 * will have to allocate its own for the result if it
	 * to be retained across different calls.
	 */
static char *
soundex(const char *str)
{   static char sbuf[MAXSOUNDEX+1];
    /* An index into sbuf. */
    unsigned sindex = 0;
    /* Keep track of the last character to compress repeated letters. */
    char lastc = ' ';
    /*                     ABCDEFGHIJKLMNOPQRSTUVWXYZ */
    const char *mapping = "01230120002455012622011202";
    char initial_letter = *str;

    /* Special case for names that begin with 'J',
     * otherwise Janosevic == Nimzovich.
     * In addition, we really want Yusupov to match Jusupov.
     */
    if(islower(initial_letter)){
	initial_letter = toupper(initial_letter);
    }
    if((initial_letter == 'Y') || (initial_letter == 'J')){
	  sbuf[sindex] = '7';
	  str++;
	  sindex++;
    }

    while((*str != '\0') && (sindex < MAXSOUNDEX)){
        char ch = *str;

	/* We are only interested in alphabetics, and duplicate
	 * characters are reduced to singletons.
	 */
        if(isalpha(ch) && (ch != lastc)){
	     char translation;

	     if(islower(ch)){
		 ch = toupper(ch);
	     }
	     /* Pick up the translation. */
	     translation = mapping[ch - 'A'];
	     if((translation != '0') && (translation != lastc)){
		sbuf[sindex] = translation;
		sindex++;
		lastc = translation;
	     }
	}
	str++; 
    }
    sbuf[sindex] = '\0';
    return(sbuf);
}

	/* Return TRUE if tag is one on which soundex matching should
	 * be used, if requested.
	 */
static Boolean
soundex_tag(int tag)
{   Boolean use_soundex = FALSE;

    switch(tag){
	case WHITE_TAG:
	case BLACK_TAG:
	case PSEUDO_PLAYER_TAG:
	case EVENT_TAG:
	case SITE_TAG:
	case ANNOTATOR_TAG:
	    use_soundex = TRUE;
	    break;
    }
    return use_soundex;
}

	/* Add tagstr to the list of tags to be matched.
	 * If we are using soundex matching, then store
	 * its soundex version rather than its plain text.
	 */
void
add_tag_to_list(int tag,const char *tagstr)
{
    if((tag >= 0) && (tag < NUM_TAGS)){
	const char *string_to_store = tagstr;

	if(GlobalState.use_soundex){
	    if(soundex_tag(tag)){
		string_to_store = soundex(tagstr);
	    }
	}
	AddToList(string_to_store,&TagLists[tag]);
    }
    else{
	fprintf(stderr,"Illegal tag number %d in add_tag_to_list.\n",tag);
    }
}

	/* Argstr is an extraction argument.
	 * The type of argument is indicated by the first letter of
	 * argstr:
	 *	b -- player of the black pieces
	 *	d -- date of the game.
	 *	p -- player of either colour.
	 *	r -- result
	 *	w -- player of the white pieces
	 * The remainder of argstr is the argument string to be entered
	 * into the appropriate list.
	 */
void
extract_argument(const char *argstr)
{   const char *arg = &(argstr[1]);

    switch(*argstr){
	case 'b':
	    add_tag_to_list(BLACK_TAG,arg);
	    break;
	case 'd':
	    add_tag_to_list(DATE_TAG,arg);
	    break;
	case 'p':
	    add_tag_to_list(PSEUDO_PLAYER_TAG,arg);
	    break;
	case 'r':
	    add_tag_to_list(RESULT_TAG,arg);
	    break;
	case 'w':
	    add_tag_to_list(WHITE_TAG,arg);
	    break;
	default:
	    fprintf(stderr,"Unknown type of extraction argument: %s\n",
			argstr);
	    break;
    }
}

	/* Check for one of list->strings matching the tag.
	 * Return TRUE on match, FALSE on failure.
	 * It is only necessary for a prefix of tag to match
	 * the whole of the string.
	 */
static Boolean
CheckList(int tag,const char *tag_string,StringArray *list)
{   unsigned list_index;
    Boolean wanted = FALSE;
    const char *search_str;

    if(GlobalState.use_soundex && soundex_tag(tag)){
	search_str = soundex(tag_string);
    }
    else{
	search_str = tag_string;
    }
    for(list_index = 0; (list_index < list->num_used_elements) && !wanted;
				list_index++){
	const char *list_string = list->tag_strings[list_index];

	if(strncmp(search_str,list_string,strlen(list_string)) == 0){
	    wanted = TRUE;
	}
    }
    return wanted;
}

	/* Check the Details of this current game against those in
	 * the wanted lists.
	 * An empty list implies that there is no restriction on
	 * the values in the corresponding tag field.
	 * In consequence, completely empty lists imply that all
	 * games reaching this far are wanted.
	 * Return TRUE if wanted, FALSE otherwise.
	 */
Boolean
CheckDetails(char *Details[])
{   Boolean wanted = TRUE;
    int tag;

    /* PSEUDO_PLAYER_TAG is treated differently, since it has the effect of
     * or-ing together the WHITE_TAG and BLACK_TAG lists.
     * Otherwise, different tag lists are and-ed.
     */
    if(TagLists[PSEUDO_PLAYER_TAG].num_used_elements != 0){
	/* Check both the White and Black lists. */
	if(Details[WHITE_TAG] != NULL){
	    wanted = CheckList(WHITE_TAG,Details[WHITE_TAG],
				&TagLists[PSEUDO_PLAYER_TAG]);
	    /* If we didn't find a player there, try for the opponent. */
	    if(!wanted && (Details[BLACK_TAG] != NULL)){
		wanted = CheckList(BLACK_TAG,Details[BLACK_TAG],
				&TagLists[PSEUDO_PLAYER_TAG]);
	    }
	}
	else if(Details[BLACK_TAG] != NULL){
	    wanted = CheckList(BLACK_TAG,Details[BLACK_TAG],
				&TagLists[PSEUDO_PLAYER_TAG]);
	}
    }
    else{
        /* No PSEUDO_PLAYER_TAG info to check. */
    }

    /* Check the remaining tags in turn as long as we still have a match. */
    for(tag = 0; (tag < NUM_TAGS) && wanted; tag++){
	if((TagLists[tag].num_used_elements != 0) && (Details[tag] != NULL)){
	    wanted = CheckList(tag,Details[tag],&TagLists[tag]);
	}
    }
    return wanted;
}
