Ticket #431: valuestream_pythonic_iterators.patch

File valuestream_pythonic_iterators.patch, 7.2 KB (added by Richard Boulton, 14 years ago)

Proposed implementation

  • python/pythontest2.py

     
    633633                         'Iterator has moved, and does not support random access',
    634634                         getattr, postings[i], 'positer')
    635635
     636def test_valuestream_iter():
     637    """Test a valuestream iterator on Database.
     638
     639    """
     640    db = setup_database()
     641
     642    # Check basic iteration
     643    expect([(item.docid, item.value) for item in db.valuestream(0)],
     644           [(3, '\xa4'), (4, '\xa2'), (5, '\xa4')])
     645    expect([(item.docid, item.value) for item in db.valuestream(1)], [])
     646    expect([(item.docid, item.value) for item in db.valuestream(5)],
     647           [(5, "five")])
     648    expect([(item.docid, item.value) for item in db.valuestream(9)],
     649           [(5, "nine")])
     650
     651    # Test skip_to() on iterator with no values, and behaviours when called
     652    # after already returning StopIteration.
     653    i = db.valuestream(1)
     654    expect_exception(StopIteration, "", i.skip_to, 1)
     655    expect_exception(StopIteration, "", i.skip_to, 1)
     656    i = db.valuestream(1)
     657    expect_exception(StopIteration, "", i.skip_to, 1)
     658    expect_exception(StopIteration, "", i.next)
     659    i = db.valuestream(1)
     660    expect_exception(StopIteration, "", i.next)
     661    expect_exception(StopIteration, "", i.skip_to, 1)
     662
     663    # Test that skipping to a value works, and that skipping doesn't have to
     664    # advance.
     665    i = db.valuestream(0)
     666    item = i.skip_to(4)
     667    expect((item.docid, item.value), (4, '\xa2'))
     668    item = i.skip_to(4)
     669    expect((item.docid, item.value), (4, '\xa2'))
     670    item = i.skip_to(1)
     671    expect((item.docid, item.value), (4, '\xa2'))
     672    item = i.skip_to(5)
     673    expect((item.docid, item.value), (5, '\xa4'))
     674    expect_exception(StopIteration, "", i.skip_to, 6)
     675
     676    # Test that alternating skip_to() and next() works.
     677    i = db.valuestream(0)
     678    item = i.next()
     679    expect((item.docid, item.value), (3, '\xa4'))
     680    item = i.skip_to(4)
     681    expect((item.docid, item.value), (4, '\xa2'))
     682    item = i.next()
     683    expect((item.docid, item.value), (5, '\xa4'))
     684    expect_exception(StopIteration, "", i.skip_to, 6)
     685
     686    # Test that next works correctly after skip_to() called with an earlier
     687    # item.
     688    i = db.valuestream(0)
     689    item = i.skip_to(4)
     690    expect((item.docid, item.value), (4, '\xa2'))
     691    item = i.skip_to(1)
     692    expect((item.docid, item.value), (4, '\xa2'))
     693    item = i.next()
     694    expect((item.docid, item.value), (5, '\xa4'))
     695
     696    # Test that next works correctly after skipping to last item
     697    i = db.valuestream(0)
     698    item = i.skip_to(5)
     699    expect((item.docid, item.value), (5, '\xa4'))
     700    expect_exception(StopIteration, "", i.next)
     701
    636702def test_position_iter():
    637703    """Test position iterator for a document in a database.
    638704
  • python/extra.i

     
    11451145    return ValueIter(self.values_begin(), self.values_end())
    11461146Document.values = _document_gen_values_iter
    11471147
     1148
     1149##########################################
     1150# Support for iteration of value streams #
     1151##########################################
     1152
     1153class ValueStreamItem(object):
     1154    """An item returned from iteration of the values in a document.
     1155
     1156    The item supports access to the following attributes:
     1157
     1158     - `docid`: The docid for the item.
     1159     - `value`: The contents of the value.
     1160
     1161    """
     1162
     1163    __slots__ = ('docid', 'value', )
     1164
     1165    def __init__(self, docid, value):
     1166        self.docid = docid
     1167        self.value = value
     1168
     1169class ValueStreamIter(object):
     1170    """An iterator over all the values stored in a document.
     1171
     1172    The iterator will return ValueStreamItem objects, in ascending order of value number.
     1173
     1174    """
     1175    def __init__(self, start, end):
     1176        self.iter = start
     1177        self.end = end
     1178        self.moved = True
     1179
     1180    def __iter__(self):
     1181        return self
     1182
     1183    # For Python2:
     1184    def next(self):
     1185        if not self.moved:
     1186            self.iter.next()
     1187            self.moved = True
     1188
     1189        if self.iter==self.end:
     1190            raise StopIteration
     1191        else:
     1192            self.moved = False
     1193            return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
     1194
     1195    # For Python3:
     1196    def __next__(self):
     1197        if not self.moved:
     1198            self.iter.next()
     1199            self.moved = True
     1200
     1201        if self.iter==self.end:
     1202            raise StopIteration
     1203        else:
     1204            self.moved = False
     1205            return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
     1206
     1207    def skip_to(self, docid):
     1208        """Skip the iterator forward.
     1209
     1210        The iterator is advanced to the first document with a document ID
     1211        which is greater than or equal to the supplied document ID.
     1212
     1213        If there are no such items, this will raise StopIteration.
     1214
     1215        This returns the item which the iterator is moved to.  The subsequent
     1216        item will be returned the next time that next() is called (unless
     1217        skip_to() is called again first).
     1218
     1219        """
     1220        if self.iter != self.end:
     1221            self.iter.skip_to(docid)
     1222        if self.iter == self.end:
     1223            self.moved = True
     1224            raise StopIteration
     1225        self.moved = False
     1226        return ValueStreamItem(self.iter.get_docid(), self.iter.get_value())
     1227
     1228# Modify Database to add a "valuestream()" method, and remove the
     1229# valuestream_begin() and valuestream_end() methods.
     1230def wrapper():
     1231    vs_begin = Database.valuestream_begin
     1232    vs_end = Database.valuestream_end
     1233    def _database_gen_valuestream_iter(self, slot):
     1234        """Get an iterator over all the values stored in a slot in the database.
     1235
     1236        The iterator will return ValueStreamItem objects, in ascending order of
     1237        document id.
     1238
     1239        """
     1240        return ValueStreamIter(vs_begin(self, slot), vs_end(self, slot))
     1241    return _database_gen_valuestream_iter
     1242Database.valuestream = wrapper()
     1243del wrapper
     1244del Database.valuestream_begin
     1245del Database.valuestream_end
     1246
    11481247# Set the list of names which should be public.
    11491248# Note that this needs to happen at the end of xapian.py.
    11501249__all__ = []
  • xapian.i

     
    183183        }
    184184    }
    185185
    186     Xapian::valueno get_valueno();
     186    Xapian::docid get_docid() const;
     187    Xapian::valueno get_valueno() const;
     188    void skip_to(Xapian::docid docid_or_slot);
     189    bool check(Xapian::docid docid);
    187190    std::string get_description() const;
    188191};
    189192
     
    519522        doccount get_value_freq(Xapian::valueno valno) const;
    520523        string get_value_lower_bound(Xapian::valueno valno) const;
    521524        string get_value_upper_bound(Xapian::valueno valno) const;
     525        Xapian::termcount get_doclength_lower_bound() const;
     526        Xapian::termcount get_doclength_upper_bound() const;
     527        Xapian::termcount get_wdf_upper_bound(const std::string & term) const;
     528        ValueIterator valuestream_begin(Xapian::valueno slot) const;
     529        ValueIteratorEnd_ valuestream_end(Xapian::valueno) const;
    522530        doclength get_doclength(docid docid) const;
    523531        void keep_alive();
    524532        Document get_document(docid did);