Ticket #164: weak-skip_to-updated.patch
File weak-skip_to-updated.patch, 12.6 KB (added by , 17 years ago) |
---|
-
matcher/branchpostlist.h
57 57 /** Utility method, to call recalc_maxweight() and do the pruning 58 58 * if a next() or skip_to() returns non-NULL result. 59 59 */ 60 voidhandle_prune(PostList *&kid, PostList *ret) {60 bool handle_prune(PostList *&kid, PostList *ret) { 61 61 if (ret) { 62 62 delete kid; 63 63 kid = ret; 64 64 65 65 // now tell matcher that maximum weights need recalculation. 66 66 matcher->recalc_maxweight(); 67 return true; 67 68 } 69 return false; 68 70 } 69 71 70 72 public: … … 112 114 return true; 113 115 } 114 116 117 inline bool 118 weak_skip_to_handling_prune(PostList * & pl, Xapian::docid did, Xapian::weight w_min, 119 MultiMatch *matcher, bool & found) 120 { 121 PostList *p = pl->weak_skip_to(did, w_min, found); 122 if (!p) return false; 123 delete pl; 124 pl = p; 125 // now tell matcher that maximum weights need recalculation. 126 if (matcher) matcher->recalc_maxweight(); 127 return true; 128 } 129 115 130 #endif /* OM_HGUARD_BRANCHPOSTLIST_H */ -
matcher/andpostlist.h
37 37 protected: 38 38 Xapian::docid head; 39 39 Xapian::weight lmax, rmax; 40 41 bool l_weak; 42 bool r_weak; 43 40 44 private: 41 45 Xapian::doccount dbsize; 42 46 43 void process_next_or_skip_to(Xapian::weight w_min, PostList *ret); 47 void process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did); 48 void process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did); 49 50 /// If r is weak, but l isn't, swap r and l. 51 /// Return true if a swap happened. 52 bool swap_to_make_l_weak(); 44 53 public: 45 54 Xapian::doccount get_termfreq_max() const; 46 55 Xapian::doccount get_termfreq_min() const; -
matcher/valuerangepostlist.cc
101 101 ValueRangePostList::next(Xapian::weight) 102 102 { 103 103 Assert(db); 104 Xapian::docid lastdocid = db->get_lastdocid();104 AssertParanoid(lastdocid == db->get_lastdocid()); 105 105 while (current < lastdocid) { 106 106 try { 107 107 if (++current == 0) break; … … 126 126 return ValueRangePostList::next(w_min); 127 127 } 128 128 129 PostList * 130 ValueRangePostList::weak_skip_to(Xapian::docid did, Xapian::weight, 131 bool & found) 132 { 133 Assert(db); 134 AssertParanoid(lastdocid == db->get_lastdocid()); 135 if (did <= current) { 136 found = true; 137 return NULL; 138 } 139 current = did; 140 if (current > lastdocid) { 141 db = NULL; 142 found = true; 143 return NULL; 144 } 145 146 try { 147 AutoPtr<Xapian::Document::Internal> doc(db->open_document(current, true)); 148 string v = doc->get_value(valno); 149 if (v >= begin && v <= end) { 150 found = true; 151 return NULL; 152 } 153 } catch (const Xapian::DocNotFoundError &) { 154 // Document doesn't exist. 155 } 156 // Haven't determined the position of the next match, other than knowing that 157 // it's not at document "did". 158 found = false; 159 return NULL; 160 } 161 129 162 bool 130 163 ValueRangePostList::at_end() const 131 164 { -
matcher/valuerangepostlist.h
33 33 34 34 Xapian::docid current; 35 35 36 /// Flag, true iff \a current points to a document which satisfies the restriction. 37 bool current_valid; 38 39 /// Highest docid used in the database. 40 Xapian::docid lastdocid; 41 36 42 /// Disallow copying. 37 43 ValueRangePostList(const ValueRangePostList &); 38 44 … … 43 49 ValueRangePostList(const Xapian::Database::Internal *db_, 44 50 Xapian::valueno valno_, 45 51 const std::string &begin_, const std::string &end_) 46 : db(db_), valno(valno_), begin(begin_), end(end_), current(0) { } 52 : db(db_), valno(valno_), begin(begin_), end(end_), current(0), 53 current_valid(false), lastdocid(db->get_lastdocid()) 54 { } 47 55 48 56 Xapian::doccount get_termfreq_min() const; 49 57 … … 69 77 70 78 PostList * skip_to(Xapian::docid, Xapian::weight w_min); 71 79 80 PostList * weak_skip_to(Xapian::docid did, Xapian::weight w_min, bool & found); 81 82 bool prefer_weak_skip_to() { return true; } 83 72 84 bool at_end() const; 73 85 74 86 string get_description() const; -
matcher/andpostlist.cc
28 28 #include "omdebug.h" 29 29 30 30 inline void 31 AndPostList::process_next_or_skip_to(Xapian::weight w_min, PostList *ret)31 AndPostList::process_next_or_skip_to(Xapian::weight w_min, Xapian::docid did) 32 32 { 33 33 DEBUGCALL(MATCH, void, "AndPostList::process_next_or_skip_to", 34 w_min << ", " << ret); 34 w_min << ", " << did); 35 35 36 head = 0; 36 handle_prune(r, ret); 37 if (did == 0) { 38 if (handle_prune(r, r->next(w_min - lmax))) 39 swap_to_make_l_weak(); 40 } else { 41 if (handle_prune(r, r->skip_to(did, w_min - lmax))) 42 swap_to_make_l_weak(); 43 } 44 37 45 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 38 46 if (r->at_end()) return; 39 47 … … 42 50 Xapian::docid rhead = r->get_docid(); 43 51 DEBUGLINE(MATCH, "rhead " << rhead); 44 52 DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax); 45 skip_to_handling_prune(l, rhead, w_min - rmax, matcher); 53 if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher)) 54 swap_to_make_l_weak(); 46 55 DEBUGLINE(MATCH, "l at_end = " << l->at_end()); 47 56 if (l->at_end()) return; 48 57 … … 53 62 if (lhead < rhead) { 54 63 // FIXME: CSE these w_min values? 55 64 // But note that lmax and rmax may change on recalc_maxweight... 56 skip_to_handling_prune(l, rhead, w_min - rmax, matcher); 65 if (skip_to_handling_prune(l, rhead, w_min - rmax, matcher)) 66 swap_to_make_l_weak(); 57 67 DEBUGLINE(MATCH, "l at_end = " << l->at_end()); 58 68 if (l->at_end()) { 59 69 head = 0; … … 62 72 lhead = l->get_docid(); 63 73 DEBUGLINE(MATCH, "lhead " << lhead); 64 74 } else { 65 skip_to_handling_prune(r, lhead, w_min - lmax, matcher); 75 if (skip_to_handling_prune(r, lhead, w_min - lmax, matcher)) 76 swap_to_make_l_weak(); 66 77 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 67 78 if (r->at_end()) { 68 79 head = 0; … … 77 88 return; 78 89 } 79 90 91 inline void 92 AndPostList::process_half_weak_next_or_skip_to(Xapian::weight w_min, Xapian::docid did) 93 { 94 DEBUGCALL(MATCH, void, "AndPostList::process_half_weak_next_or_skip_to", 95 w_min << ", " << did); 96 97 bool pruned = false; 98 99 head = 0; 100 if (did == 0) { 101 pruned = handle_prune(r, r->next(w_min - lmax)); 102 } else { 103 pruned = handle_prune(r, r->skip_to(did, w_min - lmax)); 104 } 105 106 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 107 if (r->at_end()) return; 108 109 // r has just been advanced by next or skip_to so must be > head 110 // (and head is the current position of l) 111 Xapian::docid rhead = r->get_docid(); 112 DEBUGLINE(MATCH, "rhead " << rhead); 113 DEBUGLINE(MATCH, "w_min " << w_min << " rmax " << rmax); 114 bool found = false; 115 Xapian::docid lhead = rhead; 116 117 while (true) { 118 if (pruned && swap_to_make_l_weak()) { 119 DEBUGLINE(MATCH, "swapped l and r - swapping lhead and rhead"); 120 Xapian::docid tmp = lhead; 121 lhead = rhead; 122 rhead = tmp; 123 } 124 125 if (lhead == rhead) { 126 if (found) break; 127 // Not found, therefore l needs to be advanced, therefore so does 128 // r, but it's probably more efficient to move r first. 129 pruned = next_handling_prune(r, w_min - lmax, matcher); 130 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 131 if (r->at_end()) return; 132 rhead = r->get_docid(); 133 DEBUGLINE(MATCH, "rhead " << rhead); 134 } else if (lhead < rhead) { 135 pruned = weak_skip_to_handling_prune(l, rhead, w_min - rmax, matcher, found); 136 DEBUGLINE(MATCH, "l at_end = " << l->at_end()); 137 if (l->at_end()) return; 138 lhead = l->get_docid(); 139 DEBUGLINE(MATCH, "l found = " << found << ", lhead = " << lhead); 140 } else if (lhead > rhead) { 141 pruned = skip_to_handling_prune(r, lhead, w_min - lmax, matcher); 142 DEBUGLINE(MATCH, "r at_end = " << r->at_end()); 143 if (r->at_end()) return; 144 rhead = r->get_docid(); 145 DEBUGLINE(MATCH, "rhead " << rhead); 146 } 147 } 148 149 head = lhead; 150 return; 151 } 152 80 153 AndPostList::AndPostList(PostList *left_, PostList *right_, 81 154 MultiMatch *matcher_, 82 155 Xapian::doccount dbsize_, … … 91 164 lmax = l->get_maxweight(); 92 165 rmax = r->get_maxweight(); 93 166 } 167 168 swap_to_make_l_weak(); 169 } 170 171 bool 172 AndPostList::swap_to_make_l_weak() 173 { 174 DEBUGCALL(MATCH, bool, "AndPostList::swap_to_make_l_weak", ""); 175 l_weak = l->prefer_weak_skip_to(); 176 r_weak = r->prefer_weak_skip_to(); 177 DEBUGLINE(MATCH, "l_weak=" << l_weak << ", r_weak=" << r_weak); 178 if (l_weak != r_weak) { 179 if (r_weak) { 180 // Swap, so that l is weak if only one subtree is weak. 181 PostList * tmp_pl = r; r = l; l = tmp_pl; 182 Xapian::weight tmp_wt = rmax; rmax = lmax; lmax = tmp_wt; 183 l_weak = true; 184 r_weak = false; 185 AssertEq(l_weak, l->prefer_weak_skip_to()); 186 AssertEq(r_weak, r->prefer_weak_skip_to()); 187 RETURN(true); 188 } 189 } 190 RETURN(false); 94 191 } 95 192 96 193 PostList * 97 194 AndPostList::next(Xapian::weight w_min) 98 195 { 99 196 DEBUGCALL(MATCH, PostList *, "AndPostList::next", w_min); 100 process_next_or_skip_to(w_min, r->next(w_min - lmax)); 197 if (l_weak && !r_weak) { 198 process_half_weak_next_or_skip_to(w_min, 0); 199 } else { 200 process_next_or_skip_to(w_min, 0); 201 } 101 202 RETURN(NULL); 102 203 } 103 204 … … 105 206 AndPostList::skip_to(Xapian::docid did, Xapian::weight w_min) 106 207 { 107 208 DEBUGCALL(MATCH, PostList *, "AndPostList::skip_to", did << ", " << w_min); 108 if (did > head) 109 process_next_or_skip_to(w_min, r->skip_to(did, w_min - lmax)); 209 if (did > head) { 210 if (l_weak && !r_weak) { 211 process_half_weak_next_or_skip_to(w_min, did); 212 } else { 213 process_next_or_skip_to(w_min, did); 214 } 215 } 110 216 RETURN(NULL); 111 217 } 112 218 -
common/postlist.h
153 153 */ 154 154 Internal *skip_to(Xapian::docid did) { return skip_to(did, 0.0); } 155 155 156 /** Attempt to find the first entry in the postlist after a given 157 * location. 158 * 159 * This attempts to find the first entry in the postlist at or after a 160 * given location, but if it can't quickly determine which document 161 * this is will give up and report failure, rather than spend a great 162 * deal of time looking for the document. 163 * 164 * If the first entry in the postlist is at the given location, this 165 * call must find it. 166 * 167 * If the first entry is found, the \a found parameter is set to true, 168 * and the postlist is left in exactly the same state as it would be 169 * if \a skip_to() had been called. 170 * 171 * If the first entry is not found, the \a found parameter is set to 172 * false, and the postlist is left pointing to no document, but the 173 * next time that next() is called, the postlist will be moved to the 174 * first entry after that specified in this call. (Or, if the next 175 * call is to skip_to(), the skip_to() will behave as if the postlist 176 * currently pointed to a document in the range defined by that 177 * specified in this call and the location of the actual next entry in 178 * the list.) If \a get_docid() is called immediately after this 179 * method has returned having set \a found to false, a value for the 180 * lowest possible document ID which might be the next matching entry 181 * should be returned. (This can always be calculated cheaply, 182 * because \a did is always a valid return value here.) 183 * 184 * @param found This is a flag, which will be set to true if the next 185 * entry is found, or if there is determined to be no next entry, and 186 * to false otherwise. 187 */ 188 virtual Internal * weak_skip_to(Xapian::docid did, Xapian::weight w_min, 189 bool & found) { 190 found = true; 191 return skip_to(did, w_min); 192 } 193 194 /** Flag, true if we should try weak_skip_to operations rather than skip_to 195 * when possible when calling this postlist. 196 */ 197 virtual bool prefer_weak_skip_to() { return false; } 198 156 199 /// Return a string description of this object. 157 200 virtual std::string get_description() const = 0; 158 201 };