Ticket #203: patch

File patch, 7.1 KB (added by Richard Boulton, 17 years ago)

Nearly-working implementation of SCALE_WEIGHT query optimisation

  • tests/api_nodb.cc

     
    416416    return true;
    417417}
    418418
     419// Test various scaleweight queries to check that they collapse correctly.
     420static bool test_scaleweight5()
     421{
     422    Xapian::Query queries1[3] = {
     423        Xapian::Query("wibble"),
     424        Xapian::Query("wobble"),
     425        Xapian::Query(Xapian::Query::OP_OR, string("jelly"), string("belly"))
     426    };
    419427
     428    double multipliers[3] = {
     429        1.5, 1, 0
     430    };
     431    for (int i = 0; i < 3; ++i) {
     432        double multiplier = multipliers[i];
     433        vector<Xapian::Query> vec1;
     434        for (int j = 0; j < 3; ++j) {
     435            vec1.push_back(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT,
     436                                         queries1[j], multiplier));
     437        }
     438
     439        Xapian::Query myquery1(Xapian::Query::OP_OR, vec1.begin(), vec1.end());
     440
     441        if (multiplier == 1) {
     442            TEST_EQUAL(myquery1.get_description(),
     443                       "Xapian::Query((wibble OR wobble OR jelly OR belly))");
     444        } else {
     445            TEST_EQUAL(myquery1.get_description(),
     446                       "Xapian::Query(((wibble OR wobble OR jelly OR belly) * "
     447                       + om_tostring(multiplier) + "))");
     448        }
     449    }
     450    return true;
     451}
     452
     453// Test that scaleweight queries combine and simplify correctly.
     454static bool test_scaleweight6()
     455{
     456    Xapian::Query foo("foo");
     457    Xapian::Query bar("bar");
     458    Xapian::Query foo_7(Xapian::Query::OP_SCALE_WEIGHT, foo, 7);
     459    Xapian::Query bar_7(Xapian::Query::OP_SCALE_WEIGHT, bar, 7);
     460
     461    // Foo * 7 * 1/7 == Foo
     462    Xapian::Query foo_7_div_7(Xapian::Query::OP_SCALE_WEIGHT, foo_7, 1.0/7);
     463    TEST_EQUAL(foo_7_div_7.get_description(), "Xapian::Query(foo)");
     464
     465    // Foo * 7 * 10 == Foo * 70
     466    Xapian::Query foo_7_10(Xapian::Query::OP_SCALE_WEIGHT, foo_7, 10);
     467    TEST_EQUAL(foo_7_10.get_description(),
     468               "Xapian::Query((foo * 70))");
     469
     470    // (Foo * 7 OR Bar * 7) == (Foo OR Bar) * 7
     471    Xapian::Query foo_7_bar_7(Xapian::Query::OP_OR, foo_7, bar_7);
     472    TEST_EQUAL(foo_7_bar_7.get_description(), "Xapian::Query(((foo OR bar) * 7))");
     473
     474    // (Foo * 7 OR Foo * 7) == (Foo OR Foo) * 7
     475    Xapian::Query foo_7_foo_7(Xapian::Query::OP_OR, foo_7, foo_7);
     476    TEST_EQUAL(foo_7_foo_7.get_description(), "Xapian::Query((foo:(wqf=2) * 7))");
     477
     478    // (Foo * 7 OR Bar * 7) * 10 == (Foo OR Bar) * 70
     479    Xapian::Query foo_7_bar_7_10(Xapian::Query::OP_SCALE_WEIGHT,
     480                                 Xapian::Query(Xapian::Query::OP_OR,
     481                                               foo_7, bar_7),
     482                                 10);
     483    TEST_EQUAL(foo_7_bar_7_10.get_description(),
     484               "Xapian::Query(((foo OR bar) * 70))");
     485
     486    // (Foo * 7 OR Foo * 7) * 10 == (Foo, wqf=2) * 70
     487    Xapian::Query foo_7_foo_7_10(Xapian::Query::OP_SCALE_WEIGHT,
     488                                 Xapian::Query(Xapian::Query::OP_OR,
     489                                               foo_7, foo_7),
     490                                 10);
     491    TEST_EQUAL(foo_7_foo_7_10.get_description(),
     492               "Xapian::Query((foo:(wqf=2) * 70))");
     493
     494
     495    // (Foo * 7 OR Bar) * 10 == (Foo * 7 OR Bar) * 10
     496    Xapian::Query foo_7_bar_10(Xapian::Query::OP_SCALE_WEIGHT,
     497                               Xapian::Query(Xapian::Query::OP_OR,
     498                                             foo_7, bar),
     499                               10);
     500    TEST_EQUAL(foo_7_bar_10.get_description(),
     501               "Xapian::Query((((foo * 7) OR bar) * 10))");
     502
     503    // (Foo * 7) * 0 == Foo * 0
     504    Xapian::Query foo_7_0(Xapian::Query::OP_SCALE_WEIGHT, foo_7, 0);
     505    TEST_EQUAL(foo_7_0.get_description(),
     506               "Xapian::Query((foo * 0))");
     507
     508    // Factor is a double which, when multiplied by its reciprocal, doesn't
     509    // give exactly 1.0
     510    double factor = 179.76931348623157e306;
     511
     512    Xapian::Query foo_factor(Xapian::Query::OP_SCALE_WEIGHT, foo, factor);
     513    Xapian::Query foo_factor_inv(Xapian::Query::OP_SCALE_WEIGHT, foo_factor, 1.0 / factor);
     514    TEST_EQUAL(foo_factor_inv.get_description(), "Xapian::Query(foo)");
     515
     516    return true;
     517}
     518
     519
    420520// #######################################################################
    421521// # End of test cases: now we list the tests to run.
    422522
     
    441541    TESTCASE(stringlistserialise1),
    442542    TESTCASE(scaleweight3),
    443543    TESTCASE(scaleweight4),
     544    TESTCASE(scaleweight5),
     545    TESTCASE(scaleweight6),
    444546    END_OF_TESTCASES
    445547};
  • include/xapian/query.h

     
    327327         */
    328328        bool simplify_matchnothing();
    329329
     330        /** Pull up any common scale weight nodes.
     331         */
     332        void pull_up_scale_weight();
     333
    330334        /** Get a string describing the given query type.
    331335         */
    332336        static std::string get_op_name(Xapian::Query::Internal::op_t op);
  • api/omqueryinternal.cc

     
    705705    return false;
    706706}
    707707
     708void
     709Xapian::Query::Internal::pull_up_scale_weight()
     710{
     711    Assert(subqs.size() >= 1);
     712
     713    // If our operator is not OP_SCALE_WEIGHT, there's no point in "pulling up"
     714    // a single OP_SCALE_WEIGHT subquery.
     715    if (subqs.size() == 1 && op != OP_SCALE_WEIGHT)
     716        return;
     717
     718    // Check if all subqueries are OP_SCALE_WEIGHT
     719    // FIXME - in future, we might want to pull up weights if a majority of the
     720    // subqueries are OP_SCALE_WEIGHT with the same multiplier, but for now we
     721    // take the simpler approach of only doing it if they all are.
     722    subquery_list::iterator sq;
     723    for (sq = subqs.begin(); sq != subqs.end(); ++sq) {
     724        if ((*sq)->op != OP_SCALE_WEIGHT)
     725            return;
     726    }
     727
     728    // All subqueries are OP_SCALE_WEIGHT - check if they have exactly the same
     729    // weight.
     730    sq = subqs.begin();
     731    double common_subweight = (*sq)->dbl_parameter;
     732    ++sq;
     733    for (; sq != subqs.end(); ++sq) {
     734        if ((*sq)->dbl_parameter != common_subweight)
     735            return;
     736    }
     737
     738    // Remove the OP_SCALE_WEIGHT nodes from the subqueries.
     739    // Do this by building a new query, so that any subqueries get distributed
     740    // (since this is done inside add_subquery()).
     741    Xapian::Query::Internal newq(*this);
     742    for (sq = newq.subqs.begin(); sq != newq.subqs.end(); sq++) {
     743        delete *sq;
     744    }
     745    newq.subqs.clear();
     746    for (sq = subqs.begin(); sq != subqs.end(); ++sq) {
     747        Assert((*sq)->op == OP_SCALE_WEIGHT);
     748        Assert((*sq)->subqs.size() == 1);
     749        newq.add_subquery((*sq)->subqs.front());
     750    }
     751    Xapian::Query::Internal * newqptr = newq.end_construction();
     752    Assert(newqptr);
     753    this->swap(*newqptr);
     754
     755    if (op == OP_SCALE_WEIGHT) {
     756        // Combine the common weight with our weight.
     757        dbl_parameter *= common_subweight;
     758    } else {
     759        // Make a new OP_SCALE_WEIGHT node with the common weight, and put it
     760        // above ourself.
     761        Xapian::Query::Internal newq2(OP_SCALE_WEIGHT, 0);
     762        newq2.dbl_parameter = common_subweight;
     763        newq2.add_subquery(this);
     764        newqptr = newq2.end_construction();
     765        Assert(newqptr);
     766        this->swap(*newqptr);
     767    }
     768}
     769
    708770Xapian::Query::Internal *
    709771Xapian::Query::Internal::simplify_query()
    710772{
     
    715777        return 0;
    716778    }
    717779
     780    if (op != OP_LEAF && op != OP_VALUE_RANGE) {
     781        // Pull any scale weight operators with the same weight.
     782        pull_up_scale_weight();
     783    }
     784
    718785    // General simplifications, dependent on operator.
    719786    switch (op) {
    720787        case OP_LEAF: