Changeset 60

Show
Ignore:
Timestamp:
1999-09-17 15:36:27 (9 years ago)
Author:
richard
Message:

Calculates weights for terms and documents.
Assumes 1000 docs in collection, since can't currently calculate this.

Location:
trunk/xapian-core
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • trunk/xapian-core/backends/da/da_database.cc

    r58 r60  
    22 
    33#include <string.h> 
     4#include <stdio.h> 
    45#include <errno.h> 
     6#include <math.h> 
    57#include <string> 
    68 
     
    911#include "daread.h" 
    1012 
    11 DAPostList::DAPostList(struct postings *pl, doccount tf) { 
     13DAPostList::DAPostList(struct postings *pl, doccount tf, doccount size) { 
    1214    termfreq = tf; 
    1315    postlist = pl; 
     16    termweight = log((size - tf) / tf); 
     17 
     18    printf("(dbsize, termfreq) = (%4d, %4d)\t=> termweight = %f\n", 
     19           size, tf, termweight); 
     20 
    1421    DAreadpostings(postlist, 0, 0); 
    1522} 
     
    2835} 
    2936 
     37/* This is the biggie */ 
    3038weight DAPostList::get_weight() { 
    3139    if(at_end()) throw OmError("Attempt to access beyond end of postlist."); 
    32     return postlist->wdf; 
     40    doccount wdf; 
     41    weight wt; 
     42 
     43    wdf = postlist->wdf; 
     44 
     45    printf("(wdf, termweight)  = (%4d, %4.2f)", wdf, termweight); 
     46 
     47    double k = 1; 
     48    // FIXME - precalculate this freq score for several values of wt - may 
     49    // remove much computation. 
     50    wt = (double) wdf / (k + wdf); 
     51//    printf("(freq score %4.2f)", wt); 
     52 
     53    wt *= termweight; 
     54 
     55    printf("\t=> weight = %f\n", wt); 
     56 
     57    return wt; 
    3358} 
    3459 
     
    6994    DA_r = DAopen((byte *)(filename_r.c_str()), DARECS); 
    7095    if(DA_r == NULL) 
    71         throw OpeningError(string("Opening ") + filename_r + ": " + strerror(errno)); 
     96        throw OpeningError(string("When opening ") + filename_r + ": " + strerror(errno)); 
    7297 
    7398    DA_t = DAopen((byte *)(filename_t.c_str()), DATERMS); 
     
    75100        DAclose(DA_r); 
    76101        DA_r = NULL; 
    77         throw OpeningError(string("Opening ") + filename_t + ": " + strerror(errno)); 
     102        throw OpeningError(string("When opening ") + filename_t + ": " + strerror(errno)); 
    78103    } 
     104 
     105    dbsize = 1000;  /* FIXME - read from database */ 
     106 
    79107    opened = true; 
    80108 
     
    117145    postlist = DAopenpostings(&ti, DA_t); 
    118146 
    119     DAPostList * pl = new DAPostList(postlist, ti.freq); 
     147    DAPostList * pl = new DAPostList(postlist, ti.freq, dbsize); 
    120148    return pl; 
    121149} 
     
    130158DADatabase::term_name_to_id(termname name) 
    131159{ 
     160    if(!opened) throw OmError("DADatabase not opened."); 
    132161    termid id; 
    133162 
     
    146175DADatabase::term_id_to_name(termid id) 
    147176{ 
     177    if(!opened) throw OmError("DADatabase not opened."); 
    148178    if (id <= 0 || id > termidvec.size()) throw RangeError("invalid termid"); 
    149179//    printf("Looking up termid %d: name = `%s'\n", id, termidvec[id - 1].c_str()); 
  • trunk/xapian-core/backends/da/da_database.h

    r57 r60  
    1313        docid  currdoc; 
    1414        doccount termfreq; 
     15        weight termweight; 
    1516 
    16         DAPostList(struct postings *pl, doccount tf); 
     17        DAPostList(struct postings *pl, doccount termf, doccount dbsize); 
    1718    public: 
    1819        ~DAPostList(); 
     
    2122 
    2223        docid  get_docid();     // Gets current docid 
    23         docid get_weight();    // Gets current weight 
     24        weight get_weight();    // Gets current weight 
    2425        void   next();          // Moves to next docid 
    2526        void   skip_to(docid);  // Moves to next docid >= specified docid 
     
    3637        struct DAfile * DA_r; 
    3738        struct DAfile * DA_t; 
     39        doccount dbsize; 
    3840 
    3941        termid max_termid; 
  • trunk/xapian-core/common/omtypes.h

    r57 r60  
    99typedef docid doccount; 
    1010 
    11 typedef unsigned int weight; 
     11typedef double weight; 
    1212 
    1313#ifdef __cplusplus 
  • trunk/xapian-core/tests/dbtest.cc

    r52 r60  
    2121            did = postlist->get_docid(); 
    2222            wt = postlist->get_weight(); 
    23             printf("TermId: %d  DocId: %d  Weight: %d\n", tid, did, wt); 
     23            printf("TermId: %d  DocId: %d  Weight: %f\n", tid, did, wt); 
    2424            if(did == 120) postlist->skip_to(144); 
    2525            else postlist->next();