Ticket #164: weak_skip_to.patch
File weak_skip_to.patch, 12.4 KB (added by , 17 years ago) |
---|
-
matcher/branchpostlist.h
56 56 /** Utility method, to call recalc_maxweight() and do the pruning 57 57 * if a next() or skip_to() returns non-NULL result. 58 58 */ 59 voidhandle_prune(PostList *&kid, PostList *ret) {59 bool handle_prune(PostList *&kid, PostList *ret) { 60 60 if (ret) { 61 61 delete kid; 62 62 kid = ret; 63 63 64 64 // now tell matcher that maximum weights need recalculation. 65 65 matcher->recalc_maxweight(); 66 return true; 66 67 } 68 return false; 67 69 } 68 70 69 71 public: … … 123 125 return true; 124 126 } 125 127 128 inline bool 129 weak_skip_to_handling_prune(PostList * & pl, Xapian::docid did, Xapian::weight w_min, 130 MultiMatch *matcher, bool & found) 131 { 132 PostList *p = pl->weak_skip_to(did, w_min, found); 133 if (!p) return false; 134 delete pl; 135 pl = p; 136 // now tell matcher that maximum weights need recalculation. 137 if (matcher) matcher->recalc_maxweight(); 138 return true; 139 } 140 126 141 #endif /* OM_HGUARD_BRANCHPOSTLIST_H */ -
matcher/andpostlist.h
37 37 protected: 38 38 Xapian::docid head; 39 39 Xapian::weight lmax, rmax; 40 41 bool l_weak; 42 bool r_weak; 43 40 44 private: 41 45 Xapian::doccount dbsize; 42 46 43 void process_next_or_skip_to(Xapian::weight w_min, PostList *ret); 47 void process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did); 48 void process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did); 49 50 /// If r is weak, but l isn't, swap r and l. 51 /// Return true if a swap happened. 52 bool swap_to_make_l_weak(); 44 53 public: 45 54 Xapian::doccount get_termfreq_max() const; 46 55 Xapian::doccount get_termfreq_min() const; -
matcher/valuerangepostlist.cc
100 100 ValueRangePostList::next(Xapian::weight) 101 101 { 102 102 Assert(db); 103 Xapian::docid lastdocid = db->get_lastdocid();103 AssertParanoid(lastdocid == db->get_lastdocid()); 104 104 while (current < lastdocid) { 105 105 try { 106 106 if (++current == 0) break; … … 125 125 return next(w_min); 126 126 } 127 127 128 PostList * 129 ValueRangePostList::weak_skip_to(Xapian::docid did, Xapian::weight, 130 bool & found) 131 { 132 Assert(db); 133 AssertParanoid(lastdocid == db->get_lastdocid()); 134 if (did <= current) { 135 found = true; 136 return NULL; 137 } 138 current = did; 139 if (current > lastdocid) { 140 db = NULL; 141 found = true; 142 return NULL; 143 } 144 145 try { 146 AutoPtr<Xapian::Document::Internal> doc(db->open_document(current, true)); 147 string v = doc->get_value(valno); 148 if (v >= begin && v <= end) { 149 found = true; 150 return NULL; 151 } 152 } catch (const Xapian::DocNotFoundError &) { 153 // Document doesn't exist. 154 } 155 // Haven't determined the position of the next match, other than knowing that 156 // it's not at document "did". 157 found = false; 158 return NULL; 159 } 160 128 161 bool 129 162 ValueRangePostList::at_end() const 130 163 { -
matcher/valuerangepostlist.h
33 33 34 34 Xapian::docid current; 35 35 36 /// Flag, true iff \a current points to a document which satisfies the restriction. 37 bool current_valid; 38 39 /// Highest docid used in the database. 40 Xapian::docid lastdocid; 41 36 42 /// Disallow copying. 37 43 ValueRangePostList(const ValueRangePostList &); 38 44 … … 43 49 ValueRangePostList(const Xapian::Database::Internal *db_, 44 50 Xapian::valueno valno_, 45 51 const std::string &begin_, const std::string &end_) 46 : db(db_), valno(valno_), begin(begin_), end(end_), current(0) { } 52 : db(db_), valno(valno_), begin(begin_), end(end_), current(0), 53 current_valid(false), lastdocid(db->get_lastdocid()) 54 { } 47 55 48 56 Xapian::doccount get_termfreq_min() const; 49 57 … … 69 77 70 78 PostList * skip_to(Xapian::docid, Xapian::weight w_min); 71 79 80 PostList * weak_skip_to(Xapian::docid did, Xapian::weight w_min, bool & found); 81 82 bool prefer_weak_skip_to() { return true; } 83 72 84 bool at_end() const; 73 85 74 86 string get_description() const; -
matcher/andpostlist.cc
27 27 #include "omdebug.h" 28 28 29 29 inline void 30 AndPostList::process_next_or_skip_to(Xapian::weight w_min, PostList *ret)30 AndPostList::process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did) 31 31 { 32 32 DEBUGCALL(MATCH, void, "AndPostList::process_next_or_skip_to", 33 w_min << ", " << ret); 33 w_min << ", " << did); 34 34 35 head = 0; 35 handle_prune(r, ret); 36 if (did == 0) { 37 if (handle_prune(r, r->next(w_min - lmax))) 38 swap_to_make_l_weak(); 39 } else { 40 if (handle_prune(r, r->skip_to(did, w_min - lmax))) 41 swap_to_make_l_weak(); 42 } 43 36 44 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 37 45 if (r->at_end()) return; 38 46 … … 41 49 Xapian::docid rhead = r->get_docid(); 42 50 DEBUGLINE(MATCH, "rhead " << rhead); 43 51 DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax); 44 skip_to_handling_prune(l, rhead, w_min - rmax, matcher); 52 if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher)) 53 swap_to_make_l_weak(); 45 54 DEBUGLINE(MATCH, "l at_end = " << l->at_end()); 46 55 if (l->at_end()) return; 47 56 … … 52 61 if (lhead < rhead) { 53 62 // FIXME: CSE these w_min values? 54 63 // But note that lmax and rmax may change on recalc_maxweight... 55 skip_to_handling_prune(l, rhead, w_min - rmax, matcher); 64 if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher)) 65 swap_to_make_l_weak(); 56 66 DEBUGLINE(MATCH, "l at_end = " << l->at_end()); 57 67 if (l->at_end()) { 58 68 head = 0; … … 61 71 lhead = l->get_docid(); 62 72 DEBUGLINE(MATCH, "lhead " << lhead); 63 73 } else { 64 skip_to_handling_prune(r, lhead, w_min - lmax, matcher); 74 if (skip_to_handling_prune(r, lhead, w_min - lmax, matcher)) 75 swap_to_make_l_weak(); 65 76 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 66 77 if (r->at_end()) { 67 78 head = 0; … … 76 87 return; 77 88 } 78 89 90 inline void 91 AndPostList::process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did) 92 { 93 DEBUGCALL(MATCH, void, "AndPostList::process_half_weak_next_or_skip_to", 94 w_min << ", " << did); 95 96 bool pruned = false; 97 98 head = 0; 99 if (did == 0) { 100 pruned = handle_prune(r, r->next(w_min - lmax)); 101 } else { 102 pruned = handle_prune(r, r->skip_to(did, w_min - lmax)); 103 } 104 105 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 106 if (r->at_end()) return; 107 108 // r has just been advanced by next or skip_to so must be > head 109 // (and head is the current position of l) 110 Xapian::docid rhead = r->get_docid(); 111 DEBUGLINE(MATCH, "rhead " << rhead); 112 DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax); 113 bool found = false; 114 Xapian::docid lhead = rhead; 115 116 while (true) { 117 if (pruned && swap_to_make_l_weak()) { 118 DEBUGLINE(MATCH, "swapped l and r - swapping lhead and rhead"); 119 Xapian::docid tmp = lhead; 120 lhead = rhead; 121 rhead = tmp; 122 } 123 124 if (lhead == rhead) { 125 if (found) break; 126 // Not found, therefore l needs to be advanced, therefore so does 127 // r, but it's probably more efficient to move r first. 128 pruned = next_handling_prune(r, w_min - lmax, matcher); 129 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 130 if (r->at_end()) return; 131 rhead = r->get_docid(); 132 DEBUGLINE(MATCH, "rhead " << rhead); 133 } else if (lhead < rhead) { 134 pruned = weak_skip_to_handling_prune(l, rhead, w_min - rmax, matcher, found); 135 DEBUGLINE(MATCH, "l at_end = " << l->at_end()); 136 if (l->at_end()) return; 137 lhead = l->get_docid(); 138 DEBUGLINE(MATCH, "l found = " << found << ", lhead = " << lhead); 139 } else if (lhead > rhead) { 140 pruned = skip_to_handling_prune(r, lhead, w_min - lmax, matcher); 141 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 142 if (r->at_end()) return; 143 rhead = r->get_docid(); 144 DEBUGLINE(MATCH, "rhead " << rhead); 145 } 146 } 147 148 head = lhead; 149 return; 150 } 151 79 152 AndPostList::AndPostList(PostList *left_, PostList *right_, 80 153 MultiMatch *matcher_, 81 154 Xapian::doccount dbsize_, … … 90 163 lmax = l->get_maxweight(); 91 164 rmax = r->get_maxweight(); 92 165 } 166 167 swap_to_make_l_weak(); 93 168 } 94 169 170 bool 171 AndPostList::swap_to_make_l_weak() 172 { 173 DEBUGCALL(MATCH, bool, "AndPostList::swap_to_make_l_weak", ""); 174 l_weak = l->prefer_weak_skip_to(); 175 r_weak = r->prefer_weak_skip_to(); 176 DEBUGLINE(MATCH, "l_weak=" << l_weak << ", r_weak=" << r_weak); 177 if (l_weak != r_weak) { 178 if (r_weak) { 179 // Swap, so that l is weak if only one subtree is weak. 180 PostList * tmp_pl = r; r = l; l = tmp_pl; 181 Xapian::weight tmp_wt = rmax; rmax = lmax; lmax = tmp_wt; 182 l_weak = true; 183 r_weak = false; 184 AssertEq(l_weak, l->prefer_weak_skip_to()); 185 AssertEq(r_weak, r->prefer_weak_skip_to()); 186 RETURN(true); 187 } 188 } 189 RETURN(false); 190 } 191 95 192 PostList * 96 193 AndPostList::next(Xapian::weight w_min) 97 194 { 98 195 DEBUGCALL(MATCH, PostList *, "AndPostList::next", w_min); 99 process_next_or_skip_to(w_min, r->next(w_min - lmax)); 196 if (l_weak && !r_weak) { 197 process_half_weak_next_or_skip_to(w_min, 0); 198 } else { 199 process_next_or_skip_to(w_min, 0); 200 } 100 201 RETURN(NULL); 101 202 } 102 203 … … 104 205 AndPostList::skip_to(Xapian::docid did, Xapian::weight w_min) 105 206 { 106 207 DEBUGCALL(MATCH, PostList *, "AndPostList::skip_to", did << ", " << w_min); 107 if (did > head) 108 process_next_or_skip_to(w_min, r->skip_to(did, w_min - lmax)); 208 if (did > head) { 209 if (l_weak && !r_weak) { 210 process_half_weak_next_or_skip_to(w_min, did); 211 } else { 212 process_next_or_skip_to(w_min, did); 213 } 214 } 109 215 RETURN(NULL); 110 216 } 111 217 -
common/postlist.h
180 180 */ 181 181 virtual Internal *skip_to(Xapian::docid, Xapian::weight w_min) = 0; 182 182 183 /** Attempt to find the first entry in the postlist after a given 184 * location. 185 * 186 * This attempts to find the first entry in the postlist at or after a 187 * given location, but if it can't quickly determine which document 188 * this is will give up and report failure, rather than spend a great 189 * deal of time looking for the document. 190 * 191 * If the first entry in the postlist is at the given location, this 192 * call must find it. 193 * 194 * If the first entry is found, the \a found parameter is set to true, 195 * and the postlist is left in exactly the same state as it would be 196 * if \a skip_to() had been called. 197 * 198 * If the first entry is not found, the \a found parameter is set to 199 * false, and the postlist is left pointing to no document, but the 200 * next time that next() is called, the postlist will be moved to the 201 * first entry after that specified in this call. (Or, if the next 202 * call is to skip_to(), the skip_to() will behave as if the postlist 203 * currently pointed to a document in the range defined by that 204 * specified in this call and the location of the actual next entry in 205 * the list.) If \a get_docid() is called immediately after this 206 * method has returned having set \a found to false, a value for the 207 * lowest possible document ID which might be the next matching entry 208 * should be returned. (This can always be calculated cheaply, 209 * because \a did is always a valid return value here.) 210 * 211 * @param found This is a flag, which will be set to true if the next 212 * entry is found, or if there is determined to be no next entry, and 213 * to false otherwise. 214 */ 215 virtual Internal * weak_skip_to(Xapian::docid did, Xapian::weight w_min, 216 bool & found) { 217 found = true; 218 return skip_to(did, w_min); 219 } 220 221 /** Flag, true if we should try weak_skip_to operations rather than skip_to 222 * when possible when calling this postlist. 223 */ 224 virtual bool prefer_weak_skip_to() { return false; } 225 183 226 /// Returns true if we're off the end of the list 184 227 virtual bool at_end() const = 0; 185 228