Ticket #86: bug86-fix.patch
File bug86-fix.patch, 8.1 KB (added by , 18 years ago) |
---|
-
matcher/multimatch.cc
351 351 Xapian::doccount max_msize = first + check_at_least; 352 352 items.reserve(max_msize + 1); 353 353 354 // Set the minimum item, used to compare against to see if an item 355 // should be considered for the mset. 356 Xapian::Internal::MSetItem min_item(weight_cutoff, 0); 354 // Tracks the minimum item currently eligible for the MSet - we compare 355 // candidate items against this. 356 Xapian::Internal::MSetItem min_item(0.0, 0); 357 358 // Minimum weight an item must have to be worth considering. 359 Xapian::weight min_weight = weight_cutoff; 357 360 358 361 Xapian::weight percent_factor = percent_cutoff / 100.0; 359 362 … … 381 384 382 385 while (true) { 383 386 if (recalculate_w_max) { 384 if (min_ item.wt > 0.0 && getorrecalc_maxweight(pl) < min_item.wt) {387 if (min_weight > 0.0 && getorrecalc_maxweight(pl) < min_weight) { 385 388 DEBUGLINE(MATCH, "*** TERMINATING EARLY (1)"); 386 389 break; 387 390 } 388 391 } 389 392 390 if (next_handling_prune(pl, min_ item.wt, this)) {393 if (next_handling_prune(pl, min_weight, this)) { 391 394 DEBUGLINE(MATCH, "*** REPLACING ROOT"); 392 395 393 // no need for a full recalc (unless we've got to do one because 394 // of a prune elsewhere) - we're just switching to a subtree 395 if (getorrecalc_maxweight(pl) < min_item.wt) { 396 DEBUGLINE(MATCH, "*** TERMINATING EARLY (2)"); 397 break; 396 if (min_weight > 0.0) { 397 // No need for a full recalc (unless we've got to do one 398 // because of a prune elsewhere) - we're just switching to a 399 // subtree. 400 if (getorrecalc_maxweight(pl) < min_weight) { 401 DEBUGLINE(MATCH, "*** TERMINATING EARLY (2)"); 402 break; 403 } 398 404 } 399 405 } 400 406 … … 403 409 break; 404 410 } 405 411 406 Xapian::docid did = pl->get_docid(); 412 // Only calculate the weight if we need it for mcmp, or there's a 413 // percentage or weight cutoff in effect. Otherwise we calculate it 414 // below if we haven't already rejected this candidate. 407 415 Xapian::weight wt = 0.0; 408 // Only calculate the weight if we need it for mcmp - otherwise 409 // we calculate it below only if we keep the item 410 if (min_item.wt > 0.0) wt = pl->get_weight(); 416 bool calculated_weight = false; 417 if (sort_by != VAL || min_weight > 0.0) { 418 wt = pl->get_weight(); 419 if (wt < min_weight) continue; 420 calculated_weight = true; 421 } 411 422 423 Xapian::docid did = pl->get_docid(); 412 424 DEBUGLINE(MATCH, "Candidate document id " << did << " wt " << wt); 413 425 Xapian::Internal::MSetItem new_item(wt, did); 414 426 if (sort_by != REL) { … … 418 430 Xapian::docid m = (new_item.did - 1) / multiplier + 1; // real docid in that database 419 431 Xapian::Internal::RefCntPtr<Xapian::Document::Internal> doc(db.internal[n]->open_document(m, true)); 420 432 new_item.sort_key = doc->get_value(sort_key); 421 }422 433 423 // Test if item has high enough weight (or sort key) to get into 424 // proto-mset. 425 if (sort_by != REL || min_item.wt > 0.0) 434 // We're sorting by value (in part at least), so compare the item 435 // against the lowest currently in the proto-mset. If sort_by is 436 // VAL, then new_item.wt won't yet be set, but that doesn't 437 // matter since it's not used by the sort function. 426 438 if (!mcmp(new_item, min_item)) continue; 439 } 427 440 428 441 Xapian::Internal::RefCntPtr<Xapian::Document::Internal> doc; 429 442 … … 446 459 } 447 460 } 448 461 449 if ( min_item.wt <= 0.0) {462 if (!calculated_weight) { 450 463 // we didn't calculate the weight above, but now we will need it 451 464 wt = pl->get_weight(); 452 465 new_item.wt = wt; … … 489 502 // replacement item 490 503 new_item.collapse_count = old_item.collapse_count + 1; 491 504 492 // This is best match with this key so far: 493 // remove the old one from the MSet 494 if ((sort_by != VAL && min_item.wt <= 0.0) || 495 mcmp(old_item, min_item)) { 496 // Old one hasn't fallen out of MSet yet 497 // Scan through (unsorted) MSet looking for entry 505 // There was a previous item in the collapse tab so 506 // the MSet can't be empty. 507 Assert(!items.empty()); 508 509 // This is best potential MSet entry with this key which 510 // we've seen so far. Check if the previous best entry 511 // with this key might still be in the proto-MSet. If it 512 // might be, we need to check through for it. 513 if (old_item.wt >= min_weight && mcmp(old_item, min_item)) { 514 // Scan through (unsorted) MSet looking for entry. 498 515 // FIXME: more efficient way than just scanning? 499 516 Xapian::docid olddid = old_item.did; 500 DEBUGLINE(MATCH, "collapsem: removing " << olddid <<501 ": " << new_item.collapse_key);502 517 vector<Xapian::Internal::MSetItem>::iterator i; 503 for (i = items.begin(); i->did != olddid; ++i) { 504 Assert(i != items.end()); 518 for (i = items.begin(); i != items.end(); ++i) { 519 if (i->did == olddid) { 520 DEBUGLINE(MATCH, "collapsem: removing " << 521 olddid << ": " << 522 new_item.collapse_key); 523 // We can replace an arbitrary element in 524 // O(log N) but have to do it by hand (in this 525 // case the new elt is bigger, so we just swap 526 // down the tree). 527 // FIXME: implement this, and clean up is_heap 528 // handling 529 *i = new_item; 530 pushback = false; 531 is_heap = false; 532 break; 533 } 505 534 } 506 *i = new_item;507 pushback = false;508 // We can replace an arbitrary element in O(log N)509 // but have to do it by hand (in this case the new510 // elt is bigger, so we just swap down the tree)511 // FIXME: implement this, and clean up is_heap512 // handling513 is_heap = false;514 535 } 515 536 516 537 // Keep the old weight as it is now second best so far … … 536 557 pop_heap<vector<Xapian::Internal::MSetItem>::iterator, 537 558 MSetCmp>(items.begin(), items.end(), mcmp); 538 559 items.pop_back(); 560 561 min_item = items.front(); 539 562 if (sort_by == REL || sort_by == REL_VAL) { 540 Xapian::weight tmp = min_item.wt; 541 min_item = items.front(); 542 min_item.wt = tmp; 543 } else { 544 min_item = items.front(); 563 if (min_item.wt > min_weight) min_weight = min_item.wt; 545 564 } 546 if (getorrecalc_maxweight(pl) < min_ item.wt) {565 if (getorrecalc_maxweight(pl) < min_weight) { 547 566 DEBUGLINE(MATCH, "*** TERMINATING EARLY (3)"); 548 567 break; 549 568 } … … 563 582 greatest_wt = wt; 564 583 if (percent_cutoff) { 565 584 Xapian::weight w = wt * percent_factor; 566 if (w > min_item.wt) { 567 min_item.wt = w; 568 min_item.did = 0; 585 if (w > min_weight) { 586 min_weight = w; 569 587 if (!is_heap) { 570 588 is_heap = true; 571 589 make_heap<vector<Xapian::Internal::MSetItem>::iterator, 572 590 MSetCmp>(items.begin(), items.end(), mcmp); 573 591 } 574 while (!items.empty() && items.front().wt < min_ item.wt) {592 while (!items.empty() && items.front().wt < min_weight) { 575 593 pop_heap<vector<Xapian::Internal::MSetItem>::iterator, 576 594 MSetCmp>(items.begin(), items.end(), mcmp); 577 Assert(items.back().wt < min_ item.wt);595 Assert(items.back().wt < min_weight); 578 596 items.pop_back(); 579 597 } 580 598 #ifdef XAPIAN_DEBUG_PARANOID 581 599 vector<Xapian::Internal::MSetItem>::const_iterator i; 582 600 for (i = items.begin(); i != items.end(); ++i) { 583 Assert(i->wt >= min_ item.wt);601 Assert(i->wt >= min_weight); 584 602 } 585 603 #endif 586 604 } … … 665 683 = items.size(); 666 684 } else { 667 685 if (percent_cutoff) { 668 // another approach: Xapian::doccount new_est = items.size() * (1 - percent_factor) / (1 - min_ item.wt / greatest_wt);686 // another approach: Xapian::doccount new_est = items.size() * (1 - percent_factor) / (1 - min_weight / greatest_wt); 669 687 Xapian::doccount new_est; 670 688 new_est = Xapian::doccount((1 - percent_factor) * matches_estimated); 671 689 matches_estimated = max(size_t(new_est), items.size()); 672 // and another: items.size() + (1 - greatest_wt * percent_factor / min_ item.wt) * (matches_estimated - items.size());690 // and another: items.size() + (1 - greatest_wt * percent_factor / min_weight) * (matches_estimated - items.size()); 673 691 674 692 // Very likely an underestimate, but we can't really do better without 675 693 // checking further matches... Only possibility would be to track how