Ticket #342: last_mod.patch
File last_mod.patch, 2.7 KB (added by , 16 years ago) |
---|
-
.cc
old new 72 72 73 73 static bool skip_duplicates = false; 74 74 static bool follow_symlinks = false; 75 static bool ignore_time = false; 75 76 static string dbpath; 76 77 static string root; 77 78 static string indexroot; … … 195 196 if (urlterm.length() > MAX_SAFE_TERM_LENGTH) 196 197 urlterm = hash_long_term(urlterm, MAX_SAFE_TERM_LENGTH); 197 198 198 if (skip_duplicates && db.term_exists(urlterm)) { 199 cout << "duplicate. Ignored." << endl; 200 return; 201 } 199 { 200 // First find the docid with the urlterm. 201 Xapian::docid docid = 0; 202 Xapian::PostingIterator p = db.postlist_begin(urlterm); 203 if (p != db.postlist_end(urlterm)) { 204 docid = *p; 205 if (skip_duplicates) { 206 cout << "duplicate. Ignored." << endl; 207 return; 208 } 209 } 210 if (docid && !ignore_time) { 211 // Check the timestamp. 212 Xapian::Document doc = db.get_document(docid); 213 string value = doc.get_value(VALUE_LASTMOD); 214 time_t old_last_mod = binary_string_to_int(value); 215 if (old_last_mod >= last_mod) { 216 cout << "not newer. Ignored." << endl; 217 if (docid < updated.size()) { 218 updated[docid] = true; 219 return; 220 }} 221 } 222 } 202 223 203 224 string md5; 204 225 if (mimetype == "text/html") { … … 677 698 { "depth-limit",required_argument, NULL, 'l' }, 678 699 { "follow", no_argument, NULL, 'f' }, 679 700 { "stemmer", required_argument, NULL, 's' }, 701 { "ignore-time",no_argument, NULL, 'i' }, 680 702 { 0, 0, NULL, 0 } 681 703 }; 682 704 … … 752 774 mime_map["djv"] = "image/vnd.djvu"; 753 775 mime_map["djvu"] = "image/vnd.djvu"; 754 776 755 while ((getopt_ret = gnu_getopt_long(argc, argv, "hvd:D:U:M:lpf ", longopts, NULL)) != -1) {777 while ((getopt_ret = gnu_getopt_long(argc, argv, "hvd:D:U:M:lpfi", longopts, NULL)) != -1) { 756 778 switch (getopt_ret) { 757 779 case 'h': { 758 780 cout << PROG_NAME" - "PROG_DESC"\n\n" … … 766 788 " -M, --mime-type additional MIME mapping ext:type\n" 767 789 " -l, --depth-limit=LIMIT set recursion limit (0 = unlimited)\n" 768 790 " -f, --follow follow symbolic links\n" 791 " -i, --ignore-time ignore timestamp comparison\n" 769 792 " --overwrite create the database anew (the default is to update\n" 770 793 " if the database already exists)" << endl; 771 794 print_stemmer_help(" "); … … 795 818 case 'p': // don't delete unupdated documents 796 819 preserve_unupdated = true; 797 820 break; 821 case 'i': // --ignore-time: on updates parse the file again 822 ignore_time = true; 823 break; 798 824 case 'l': { // Set recursion limit 799 825 int arg = atoi(optarg); 800 826 if (arg < 0) arg = 0;