<?php
// Require Xapian
require_once "/usr/local/share/php5/xapian.php";

/**
* Title weight multiplier
* @todo move Title weight multiplier to global config
*/
define('X_TITLE_WEIGHT_MULTIPLIER', 2);

/**
* Xapian lib
*
* An interface to Xapian functions
*
*/
class XapianLib {

	var $database = null;
	var $dbpath = null;
	var $stemmer = null;
	var $indexer = null;
	var $language = null;
	var $stopper = null;
	var $syndb = null;
	var $use_stopwords = false;
	var $debug = false;

	function db_delete($dbpath){
		shell_exec('rm -rf ' . str_replace('.', '', $dbpath));
	}

	function __construct($dbpath, $language, $delete = false, $debug = false){
		$this->language = $language;
		$this->debug 	= $debug;
		$this->dbpath 	= $dbpath;
		/*
		if($delete){
			$this->db_delete($this->dbpath);
		}
		*/
		$this->db_create($dbpath, $delete);
		$this->stemmer 	= new XapianStem($language);
		$this->indexer 	= new XapianTermGenerator();
		$this->stopper  = new XapianSimpleStopper();
		$this->indexer->set_stemmer($this->stemmer);
	}

	/**
	* Load stopwords file
	*/
	function load_stopwords($swfile){
		if(file_exists($swfile) && is_readable($swfile)){
			$this->stopper  = new XapianSimpleStopper();
			foreach(file($swfile) as $sw){
				$this->stopper->add($sw);
			}
		}
	}


	/**
	* Set synonym DB
	*/
	function set_synonym_database($syndbpath, $delete = false){
		if($delete){
			$this->db_delete($syndbpath);
		}
		return $this->syndb = new XapianWritableDatabase($syndbpath, Xapian::DB_CREATE_OR_OPEN);
	}

	/**
	* Add synonym in stemmed form
	*/
	function add_synonym($term, $synonym){
		if(!$this->syndb){
			throw Exception('XapianLib::add_synonym() : Synonym DB is not set');
		}
		$this->syndb->add_synonym('Z'.$this->stem($term), 'Z'.$this->stem($synonym));
	}

	/**
	* Return a stemmed word
	*/
	function stem($word){
		$qp 			= new XapianQueryParser();
		$qp->set_stemmer($this->stemmer);
		$qp->set_stemming_strategy(XapianQueryParser::STEM_ALL);
		$qdesc = $qp->parse_query($word)->get_description();
		preg_match('|Xapian::Query\((.*?):\(|', $qdesc, $matches);
		return $matches[1];
	}


	/**
	* Open the database for update, creating a new database if necessary.
	* @note can be called statically
	* @return new db handle
	* @param string $dbpath the db folder path
	* @param boolean $delete if the old DB should be overwritten
	*/
	function &db_create($dbpath, $delete){
		return $this->database = new XapianWritableDatabase( $dbpath, $delete ? Xapian::DB_CREATE_OR_OVERWRITE : Xapian::DB_CREATE_OR_OPEN);
	}


	function &advanced_query($query_string, $num_results, $slot, $criteria, $flags = null){
		$result = array();
		// Start an enquire session.
		$enquire 		= new XapianEnquire($this->database);

		$qp 			= new XapianQueryParser();
		if($this->use_stopwords){
			$qp->set_stopper($this->stopper);
		}

		$qp->set_stemmer($this->stemmer);

		if($this->syndb){
			$qp->set_database($this->syndb);
		}
		$qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);

		if($slot && $criteria){
   			$proc = new XapianNumberValueRangeProcessor($slot);
    		$qp->add_valuerangeprocessor($proc);
    		$query_string = $query_string . ' ' . $criteria;
    	}

		if($flags){
			$query 			= $qp->parse_query($query_string,  $flags);
		} else {
			$query 			= $qp->parse_query($query_string);
		}
		$this->print_out( "Query : " . $query->get_description() );

		$enquire->set_query($query);
		$this->matches 	= $enquire->get_mset(0, $num_results);

		$i 				= $this->matches->begin();
		while (!$i->equals($this->matches->end())) {
			$j = 0;
			$values = array();
			while( $j <  $i->get_document()->values_count()){
				$values[$j] = $i->get_document()->get_value($j);
				$j++;
			}
			$result[$i->get_docid()] = array(
				  'data' 	=> $i->get_document()->get_data()
				, 'percent' => $i->get_percent()
				, 'values' 	=> $values
			);
			$i->next();
		}
		return $result;
	}

	function &query($query_string, $num_results){
		return $this->advanced_query($query_string, $num_results, null, null);
	}


	function &syn_query($query_string, $num_results){
		return $this->advanced_query($query_string, $num_results, null, null, XapianQueryParser::FLAG_AUTO_SYNONYMS);
	}


	function &get_document($docid){
		return $this->database->get_document($docid);
	}

	function similarity(&$a, &$b){
		$ds = new XapianDocSimCosine();
		$tfs =  new XapianDatabaseTermFreqSource($this->database);
		$ds->set_termfreqsource($tfs);
		return $ds->similarity($a->termlist_begin(), $a->termlist_end(), $b->termlist_begin(), $b->termlist_end());
	}

	function add_document($body, $title, $docid = null, $values = array()){
		$doc = new XapianDocument();
		$doc->set_data($body);
		$i = 0;
		foreach($values as $v){
			$doc->add_value($i++, Xapian::sortable_serialise($v));
		}
		$this->indexer->set_document($doc);
		if($body){
			$this->indexer->index_text($body);
		}
		if($title){
			$this->indexer->index_text($title, X_TITLE_WEIGHT_MULTIPLIER);
		}
		// Set data
		$doc->set_data('<title>' .$title . '</title><body>' . $body . '</body>');
		if($docid){
			return $this->database->replace_document((int) $docid, $doc);
		} else {
			return $this->database->add_document($doc);
		}
	}

	function delete_document($docid){
		$this->database->delete_document((int) $docid);
	}

	function cluster($num_clusters){

		$cl = new XapianClusterSingleLink();
		$docsim = new XapianDocSimCosine();
		$docsource = new XapianMSetDocumentSource($this->matches);
		$clusters = new XapianClusterAssignments();

		$cl->cluster($this->database, $clusters, $docsim, $docsource, $num_clusters);
		$i = $this->matches->begin();
		$cluster_result = array();
		while (!$i->equals($this->matches->end())) {
			$n = $i->get_rank() + 1;
			$values = array();
			while( $j <  $i->get_document()->values_count()){
				$values[$j] = $i->get_document()->get_value($j);
				$j++;
			}
			$cluster_result[$clusters->cluster($i->get_docid())][$i->get_docid()] = array(
				  'data' 	=> $i->get_document()->get_data()
				, 'percent' => $i->get_percent()
				, 'values' 	=> $values
			);
			$i->next();
		}
		return $cluster_result;

	}

	function cfr_query($query_string, $num_results, $slot, $operator, $divider){

		$enquire 		= new XapianEnquire($this->database);

		$qp 			= new XapianQueryParser();
		if($this->use_stopwords){
			$qp->set_stopper($this->stopper);
		}
		$qp->set_stemmer($this->stemmer);
		$qp->set_database($this->database);
		$qp->set_stemming_strategy(XapianQueryParser::STEM_SOME);

		if($slot && $criteria){
   			$proc = new XapianNumberValueRangeProcessor($slot);
    		$qp->add_valuerangeprocessor($proc);
    		$query_string = $query_string . ' ' . $criteria;
    	}

		$query 			= $qp->parse_query($query_string);
		$query 			= new XapianQuery(XapianQuery::OP_FILTER, $query, new XapianQuery($operator, $slot, Xapian::sortable_serialise($divider)));

		$enquire->set_query($query);
		$this->matches 	= $enquire->get_mset(0, $num_results);

		$i 				= $this->matches->begin();
		while (!$i->equals($this->matches->end())) {
			$j = 0;
			$values = array();
			while( $j <  $i->get_document()->values_count()){
				$values[$j] = $i->get_document()->get_value($j);
				$j++;
			}
			$result[$i->get_docid()] = array(
				  'data' 	=> $i->get_document()->get_data()
				, 'percent' => $i->get_percent()
				, 'values' 	=> $values
			);
			$i->next();
		}
		return $result;
	}


	function dump($docs){
		foreach($docs as $docid => $r){
			$excerpt = substr(strip_tags($r['data']), 0, 60);
			echo "{$r['percent']}% - $docid - {$r['values']['0']} - $excerpt \n";
		}
	}

	/**
	* Flush database
	*/
	function __destruct(){
		$this->database = null;
		if($this->syndb){
			$this->syndb = null;
		}
	}

	/**
	* Print a debug line if debug is activated
	*/
	function print_out($text){
		if($this->debug){
			print "XapianLib - $text \n";
		}
	}

	/**
	* Get a document list
	*/
	function  get_document_list(){
		$result = array();
		$i = $this->database->postlist_begin('');
		$dc = $this->database->get_doccount();
		for($j = 0 ; $j < $dc; $j++) {
			$result[] = $i->get_docid();
			$i->next();
		}
		return $result;
	}

	/**
	* Delete all documents from the database
	*/
	function delete_all_sigsegv(){
		$onemore = false;
		$i = $this->database->postlist_begin('');
		print "First : " . $i->get_docid() . "\n";
		print "Last : " . $this->database->get_lastdocid() . "\n";
		while(!$i->equals($this->database->postlist_end(''))){
			$this->delete_document($i->get_docid());
			print("del : ".$i->get_docid() . "\n");
			$i->next();
			$onemore = true;
			print("next : " .$i->get_docid() . "\n");
		}
		if($onemore){
			$this->delete_document($i->get_docid());
		}
	}


	/**
	* Delete all documents from the database
	*/
	function delete_all(){
		foreach($this->get_document_list() as $docid){
			$this->delete_document($docid);
		}
	}


}
?>