/** @file xapian_hunspell_stemmer.cpp
 *  @brief Implementation of Xapian::Stem API class which uses Hunspell
 *  instead of Snowball.
 */
/* Copyright (C) 2010 Eugene Sizikov
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 *
 * Authors: Eugene Sizikov <esizikov@gmail.com>
 */

#ifdef CONVERT_HUNSPELL_DIC
#ifdef CONVERT_USING_PYTHON_C_API
#include <Python.h>
#else
#include <iconv.h>
#endif
#include <cstdio>
#endif

#include <xapian/error.h>
#include <hunspell.h>

#include <cassert>
#include <string>
#include <cstring>
#include <iostream>

#include "steminternal.h"

#include "xapian_hunspell_stemmer.h"

#define CREATE_SIZE 16

extern symbol * create_s() {
    void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
    if (mem == NULL) throw std::bad_alloc();
    symbol * p = reinterpret_cast<symbol*>(HEAD + static_cast<char *>(mem));
    SET_CAPACITY(p, CREATE_SIZE);
    SET_SIZE(p, CREATE_SIZE);
    return p;
}

#ifdef CONVERT_HUNSPELL_DIC
int convert(const char *from, const char *to,
		    const char *in, size_t insize,
		    const char *out, size_t *outsize)
{
#ifdef CONVERT_USING_PYTHON_C_API
	char * outptr = const_cast<char *>(out);

	PyObject *obj_u = PyUnicode_Decode(in, insize, from, NULL);
	if (!obj_u)
	{ // can't convert
		perror("unicode.decode");
		return 0;
	}
	PyObject *obj_s = PyUnicode_AsEncodedString(obj_u, to, NULL);
	if (!obj_s)
	{ // can't convert
		perror("unicode.encode");
		return 0;
	}

#define min(A, B) A < B ? A : B
	Py_ssize_t size = min(PyString_GET_SIZE(obj_s), *outsize);
	memmove(outptr, PyString_AS_STRING(obj_s), size);
	outptr[size] = 0;

	Py_DECREF(obj_s);
	Py_DECREF(obj_u);

	return size;
#else
	iconv_t cd = iconv_open(to, from);

	if ((iconv_t)-1 == cd)
	{ // can't convert
		perror("iconv_open");
		return 0;
	}

	char * inptr = const_cast<char *>(in);
	char * outptr = const_cast<char *>(out);
	size_t tmp_insize = insize;
	size_t nconv = iconv(cd, &inptr, &tmp_insize, &outptr, outsize);
	if ((size_t)-1 == nconv)
	{ // can't convert
		perror("iconv");
		return 0;
	}
	*outptr = 0;

	iconv_close(cd);

	return int(outptr - out);
#endif
}
#endif

namespace Xapian {

class InternalStemHunspell : public Stem::Internal {
  public:
    InternalStemHunspell(const std::string & apath,
    		             const std::string & dpath);
    ~InternalStemHunspell();

#ifdef HAVE_PATCHED_XAPIAN
    std::string operator()(const std::string & word);
#endif

    int stem();
    const char * get_description() const;

  private:
    Hunhandle *handle;
#ifdef CONVERT_HUNSPELL_DIC
	char outbuf[BUFSIZ];
#endif

    InternalStemHunspell();
};

InternalStemHunspell::InternalStemHunspell(const std::string & apath,
		                                   const std::string & dpath)
	: Stem::Internal(), handle(NULL) {
	handle = Hunspell_create(apath.c_str(), dpath.c_str());
}

InternalStemHunspell::~InternalStemHunspell() {
	Hunspell_destroy(handle);
}

#ifdef HAVE_PATCHED_XAPIAN
std::string
InternalStemHunspell::operator()(const std::string & word) {
	char **slist;
#ifdef CONVERT_HUNSPELL_DIC
	std::string outbuf(BUFSIZ, 0);
    const char * dic_enc = Hunspell_get_dic_encoding(handle);
	size_t avail = outbuf.size();
	convert("UTF-8", dic_enc, word.data(), word.size(), outbuf.data(), &avail);
    int num_slist = Hunspell_stem(handle, &slist, outbuf.data());
#else
	std::string outbuf;
	int num_slist = Hunspell_stem(handle, &slist, word.c_str());
#endif
	for (int i = 0; i < num_slist; i++) {
		const char * c_ptr = slist[i];
#ifdef CONVERT_HUNSPELL_DIC
		size_t avail = outbuf.size();
		convert(dic_enc, "UTF-8", c_ptr, strlen(c_ptr), outbuf.data(), &avail);
#else
		outbuf = c_ptr;
#endif
		break;
	}
	Hunspell_free_list(handle, &slist, num_slist);
	return outbuf;
}
#endif

int
InternalStemHunspell::stem() {
#ifdef HAVE_PATCHED_XAPIAN
	return -1;
#else
	char **slist;
#ifdef CONVERT_HUNSPELL_DIC
    const char * dic_enc = Hunspell_get_dic_encoding(handle);
	size_t avail = BUFSIZ;
	convert("UTF-8", dic_enc, reinterpret_cast<const char *>(p), l, outbuf, &avail);
    int num_slist = Hunspell_stem(handle, &slist, outbuf);
#else
    std::string s(reinterpret_cast<const char *>(p), l);
    int num_slist = Hunspell_stem(handle, &slist, s.c_str());
#endif
	for (int i = 0; i < num_slist; i++) {
		const char * c_ptr = slist[i];
#ifdef CONVERT_HUNSPELL_DIC
		size_t avail = BUFSIZ;
		convert(dic_enc, "UTF-8", c_ptr, strlen(c_ptr), outbuf, &avail);
		replace_s(0, l, strlen(outbuf), reinterpret_cast<const symbol *>(outbuf));
#else
		replace_s(0, l, strlen(c_ptr), reinterpret_cast<const symbol *>(c_ptr));
#endif
		break;
	}
	Hunspell_free_list(handle, &slist, num_slist);
	return l;
#endif
}

const char *
InternalStemHunspell::get_description() const {
	return "Hunspell";
}


HunspellStem::HunspellStem(const HunspellStem & o) {
    internal = o.internal;
}

void HunspellStem::operator=(const HunspellStem & o) {
    internal = o.internal;
}

HunspellStem::HunspellStem()
	: Stem() { }

HunspellStem::HunspellStem(const std::string &language)
	: Stem() {
    if (language.empty()) {
    	return;
    }

    switch (language[0]) {
	case 'e':
	    if (language == "en" || language == "english") {
			internal = new InternalStemHunspell("/usr/share/myspell/en_US.aff",
												"/usr/share/myspell/en_US.dic");
			return;
	    }
	    break;
	case 'r':
	    if (language == "ru" || language == "russian") {
#ifdef CONVERT_HUNSPELL_DIC
			internal = new InternalStemHunspell("/usr/share/myspell/ru_RU.aff",
												"/usr/share/myspell/ru_RU.dic");
#else
			internal = new InternalStemHunspell("../spell/ru.aff",
												"../spell/ru.dic");
#endif
			return;
	    }
	    break;
	default:
		throw Xapian::InvalidArgumentError("Language code " + language + " unknown");
	    break;
    }
}

HunspellStem::~HunspellStem() { }

std::string
HunspellStem::operator()(const std::string &word) const
{
	return Stem::operator()(word);
}

}
