Ticket #282: xapian-omega-1.0.7a-from-ticket-285-and-cleaned-up-updated-2010-10-27.patch

File xapian-omega-1.0.7a-from-ticket-285-and-cleaned-up-updated-2010-10-27.patch, 60.9 KB (added by Olly Betts, 14 years ago)

Updated version of patch

  • xapian-omega-1.0.7a/ChangeLog

    diff -u  xapian-omega-1.0.7a/ChangeLog.orig
    old new  
    15261526
    15271527        * configure.ac: Check for strftime.
    15281528
     15292006-08-22 09:30:12 Reini Urban <reinhard.urban@avl.com>
     1530
     1531        omega-0.9.6c:
     1532        * omindex.cc: Fix wrong timestamp comparison in cache logic
     1533        * scriptindex.cc: Add lastmod and size records and values.
     1534        * excel2text, outlook2text.in: New scripts
     1535
     15362006-08-18 15:13:32 Reini Urban <reinhard.urban@avl.com>
     1537
     1538        omega-0.9.6b:
     1539        * omindex.cc: Add HAVE_UNRAR, HAVE_MSGCONVERT, HAVE_READPST checks.
     1540        Add options --verbose, --silent
     1541       
     15422006-08-17 18:06:26 Reini Urban <reinhard.urban@avl.com>
     1543
     1544        omega-0.9.6a:
     1545        * omindex.cc: Added cache_dir, cached virtual directories zip,msg,pst,...).
     1546        New option: -c/--nocleanup.
     1547        Consistently log stderr to /var/log/omega/omindex-error.log.
     1548        * configure.ac: Add HAVE_UNRAR, HAVE_MSGCONVERT,
     1549        HAVE_READPST.
     1550        * configfile.cc: New cache_dir
     1551        * Makefile.am: Link omindex against configfile.
     1552
    15291553Sun Jul 09 01:40:09 BST 2006  Olly Betts <olly@survex.com>
    15301554
    15311555        * docs/omegascript.txt: Note that (by design) an omegascript template
  • xapian-omega-1.0.7a/Makefile.am

    diff -u  xapian-omega-1.0.7a/Makefile.am.orig
    old new  
    6161pkglibbindir = $(pkglibdir)/bin
    6262pkglibbin_PROGRAMS = omega
    6363bin_PROGRAMS = omindex scriptindex
     64dist_libexec_SCRIPTS = outlook2text excel2text mimeexplode msgconvert.pl
    6465dist_bin_SCRIPTS = dbi2omega htdig2omega mbox2omega
    6566
    6667check_PROGRAMS = htmlparsetest md5test utf8converttest
     
    9293        common/safewindows.h\
    9394        common/stringutils.h
    9495
    95 AM_LDFLAGS = $(ICONV_LDFLAGS)
     96AM_LDFLAGS = -no-undefined $(ICONV_LDFLAGS)
    9697
    9798omega_SOURCES = omega.cc query.cc cgiparam.cc utils.cc configfile.cc date.cc\
    9899 cdb_init.cc cdb_find.cc cdb_hash.cc cdb_unpack.cc loadfile.cc\
     
    102103 md5wrap.cc xmlparse.cc metaxmlparse.cc utf8convert.cc sample.cc diritor.cc\
     104 configfile.cc\
    103105 runfilter.cc freemem.cc common/msvc_dirent.cc
    104106if NEED_MKDTEMP
     
    109111scriptindex_SOURCES = scriptindex.cc myhtmlparse.cc htmlparse.cc\
    110112 common/getopt.cc commonhelp.cc utils.cc hashterm.cc loadfile.cc\
     113 configfile.cc\
    111114 common/safe.cc common/stringutils.cc utf8convert.cc utf8truncate.cc
     
    127130MAINTAINERCLEANFILES = $(dist_man_MANS)
    128131endif
    129132
     133CLEANFILES = $(dist_libexec_SCRIPTS) $(dist_bin_SCRIPTS)
     134
     135omega.conf: $(srcdir)/omega.conf.in Makefile
     136        sed "s,@localstatedir@,$(localstatedir)," $(srcdir)/omega.conf.in > $@
     137outlook2text: $(srcdir)/outlook2text.in mimeexplode Makefile
     138        sed "s,@MSGCONVERT@,$(MSGCONVERT),;s,@MIMEEXPLODE@,$(pkglibbindir)/mimeexplode," $(srcdir)/outlook2text.in > $@
     139
    130140if DOCUMENTATION_RULES
  • xapian-omega-1.0.7a/configfile.cc

    diff -u  xapian-omega-1.0.7a/configfile.cc.orig
    old new  
    4242string template_dir = "/var/lib/omega/templates/";
    4343string log_dir = "/var/log/omega/";
    4444string cdb_dir = "/var/lib/omega/cdb/";
     45string cache_dir = "/var/lib/omega/cache/";
    4546
    4647/** Return true if the file fname exists.
    4748 */
     
    6465    }
    6566
    6667    while (in) {
    67         char line[1024];
    68         in.getline(line, sizeof(line));
    69 
    70         char *p = line;
    71         while (isspace((unsigned char)*p)) ++p;
    72         if (!*p || *p == '#') continue; // Ignore blank line and comments
    73 
    74         char *q = p;
    75         while (*q && !isspace((unsigned char)*q)) ++q;
    76         string name(p, q - p);
    77 
    78         p = q;
    79         while (isspace((unsigned char)*p)) ++p;
    80         q = p;
    81         while (*q && !isspace((unsigned char)*q)) ++q;
    82         string value(p, q - p);
    83 
    84         while (*q && isspace((unsigned char)*q)) ++q;
    85         if (value.empty() || *q) {
    86             throw string("Bad line in configuration file `") + cfile + "'";
    87         }
    88 
     68        string name, value;
     69        in >> name >> value;
     70        if (value[value.length()-1] != '/') value += "/";
    8971        if (name == "database_dir") {
    90             database_dir = value + "/";
     72            database_dir = value;
    9173        } else if (name == "template_dir") {
    92             template_dir = value + "/";
     74            template_dir = value;
    9375        } else if (name == "log_dir") {
    94             log_dir = value + "/";
     76            log_dir = value;
    9577        } else if (name == "cdb_dir") {
    96             cdb_dir = value + "/";
     78            cdb_dir = value;
     79        } else if (name == "cache_dir") {
     80            cache_dir = value;
    9781        }
    9882    }
    9983
  • xapian-omega-1.0.7a/configfile.h

    diff -u  xapian-omega-1.0.7a/configfile.h.orig
    old new  
    3030extern string template_dir;
    3131extern string log_dir;
    3232extern string cdb_dir;
     33extern string cache_dir;
    3334
    3435void read_config_file();
    3536
  • xapian-omega-1.0.7a/excel2text

    diff -u  xapian-omega-1.0.7a/excel2text.orig
    old new  
     1#! /bin/sh
     2# strip numbers, to stdout
     3xls2csv -q0 "$1" | sed -re's/[0123456789.]+,//g'
  • xapian-omega-1.0.7a/mimeexplode

    diff -u  xapian-omega-1.0.7a/mimeexplode.orig
    old new  
     1#!/usr/bin/perl -w
     2
     3=head1 NAME
     4
     5mimeexplode - explode one or more MIME messages
     6
     7=head1 SYNOPSIS
     8
     9    mimeexplode [-d <dir>] <mime-msg-file> <mime-msg-file> ...
     10
     11    someprocess | mimeexplode -
     12
     13=head1 DESCRIPTION
     14
     15Takes one or more files from the command line that contain MIME
     16messages, and explodes their contents out into subdirectories
     17of the current working directory.  The subdirectories are
     18just called C<msg0>, C<msg1>, C<msg2>, etc.  Existing directories are
     19skipped over.
     20
     21The message information is output to the stdout, like this:
     22
     23    Message: msg3 (inputfile1.msg)
     24        Part: msg3/filename-1.dat (text/plain)
     25        Part: msg3/filename-2.dat (text/plain)
     26    Message: msg5 (input-file2.msg)
     27        Part: msg5/dir.gif (image/gif)
     28        Part: msg5/face.jpg (image/jpeg)
     29    Message: msg6 (infile3)
     30        Part: msg6/filename-1.dat (text/plain)
     31
     32This was written as an example of the MIME:: modules in the
     33MIME-parser package I wrote.  It may prove useful as a quick-and-dirty
     34way of splitting a MIME message if you need to decode something, and
     35you don't have a MIME mail reader on hand.
     36
     37=head1 COMMAND LINE OPTIONS
     38
     39-d outdir
     40
     41=head1 AUTHOR
     42
     43Eryq C<eryq@zeegee.com>, in a big hurry...
     44Reini Urban C<rurban@x-ray.at>: -d option to always explode into the same dir
     45
     46=cut
     47
     48#BEGIN { unshift @INC, ".." }    # to test MIME:: stuff before installing it!
     49
     50require 5.001;
     51
     52use strict;
     53use vars;
     54
     55use MIME::Parser;
     56use Getopt::Std;
     57my %opts;
     58my $outbase = '';
     59my $postfix = '';
     60
     61#------------------------------------------------------------
     62# make_msg - make and return the name of a msgXXX directory
     63#------------------------------------------------------------
     64
     65#ignored
     66#sub make_msg {
     67#    while (-d "msg$Msgno") {
     68#       ++$Msgno;
     69#       die "self-imposed limit reached" if $Msgno == 256;
     70#    }
     71#   mkdir "msg$Msgno",0755 or die "couldn't make msg$Msgno: $!";
     72#    "msg$Msgno";
     73#}
     74
     75#------------------------------------------------------------
     76# dump_entity - dump an entity's file info
     77#------------------------------------------------------------
     78sub dump_entity {
     79    my $ent = shift;
     80    my @parts = $ent->parts;
     81
     82    if (@parts) {        # multipart...
     83        map { dump_entity($_) } @parts;
     84    }
     85    else {               # single part...
     86        print "    Part: ", $ent->bodyhandle->path,
     87              " (", scalar($ent->head->mime_type), ")\n";
     88    }
     89}
     90
     91#------------------------------------------------------------
     92# main
     93#------------------------------------------------------------
     94sub main {
     95    my $file;
     96    my $entity;
     97
     98    # make sure the same message gets exploded into the same dir
     99    getopts('d:', \%opts);
     100    $outbase = $opts{d} ? $opts{d} : "msg0";
     101    my $outdir = $outbase;
     102
     103    # Go through messages:
     104    @ARGV or unshift @ARGV, "-";
     105    while (defined($file = shift @ARGV)) {
     106
     107      # Sanity:
     108      (-d $outdir) or mkdir "$outdir",0755;
     109      (-w "$outdir") or die "cwd $outdir not writable!";
     110      #my $msgdir = make_msg();
     111      #print "Message: $msgdir ($file)\n";
     112
     113      # Create a new parser object:
     114      my $parser = new MIME::Parser;
     115      ### $parser->parse_nested_messages('REPLACE');
     116
     117      # Optional: set up parameters that will affect how it extracts
     118      #   documents from the input stream:
     119      $parser->output_dir($outdir);
     120
     121      # Parse an input stream:
     122      open FILE, $file or die "couldn't open $file";
     123      $entity = $parser->read(\*FILE) or
     124        print STDERR "Couldn't parse MIME in $file; continuing...\n";
     125      close FILE;
     126
     127      # Congratulations: you now have a (possibly multipart) MIME entity!
     128      dump_entity($entity) if $entity;
     129      ### $entity->dump_skeleton if $entity;
     130
     131      $postfix++;
     132      $outdir = $outbase.$postfix;
     133    }
     134    1;
     135}
     136
     137exit (&main ? 0 : -1);
     138#------------------------------------------------------------
     1391;
     140
  • xapian-omega-1.0.7a/msgconvert.pl

    diff -u  xapian-omega-1.0.7a/msgconvert.pl.orig
    old new  
     1#!/usr/bin/perl -w
     2#
     3# msgconvert.pl:
     4#
     5# Convert .MSG files (made by Outlook (Express)) to multipart MIME messages.
     6#
     7# Copyright 2002, 2004, 2006 Matijs van Zuijlen
     8#
     9# This program is free software; you can redistribute it and/or modify it
     10# under the terms of the GNU General Public License as published by the
     11# Free Software Foundation; either version 2 of the License, or (at your
     12# option) any later version.
     13#
     14# This program is distributed in the hope that it will be useful, but
     15# WITHOUT ANY WARRANTY; without even the implied warranty of
     16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
     17# Public License for more details.
     18#
     19# CHANGES:
     20# 20020715  Recognize new items 'Cc', mime type of attachment, long
     21#           filename of attachment, and full headers. Attachments turn out
     22#           to be numbered, so a regexp is now used to recognize label of
     23#           items that are attachments.
     24# 20020831  long file name will definitely be used if present. Full headers
     25#           and mime type information are used when present. Created
     26#           generic system for specifying known items to be skipped.
     27#           Unexpected contents is never reason to bail out anymore. Added
     28#           support for usage message and option processing (--verbose).
     29# 20040104  Handle address data slightly better, make From line less fake,
     30#           make $verbose and $skippable_entries global vars, handle HTML
     31#           variant of body text if present (though not optimally).
     32# 20040214  Fix typos and incorrect comments.
     33# 20040307  - Complete rewrite: All functional parts are now in the package
     34#             MSGParser;
     35#           - Creation of MIME::Entity object is delayed until the output
     36#             routines, which means all data is known; This means I can
     37#             create a multipart/alternative body.
     38#           - Item names are parsed (thanks to bfrederi@alumni.sfu.ca for
     39#             the information).
     40# 20040514  Check if $self->{HEAD} actually exists before trying to add its
     41#           contents to the output Mime object's header data.
     42#           (Bug reported by Thomas Ng).
     43#           Don't produce multipart messages if not needed.
     44#           (Bug reported by Justin B. Scout).
     45# 20040529  Correctly format OLEDATE.
     46# 20040530  - Extract date from property 0047 (thanks, Marc Goodman).
     47#           - Use address data to make To: and Cc: lines complete
     48#           - Use the in-reply-to property
     49#           - More unknown properties named.
     50#           - Found another property containing an SMTP address.
     51#           - Put non-SMTP type addresses back in output.
     52# 20040825  Replace 'our' to declare globals with 'use vars'. This means
     53#           the globals our now properly scoped inside the package and not
     54#           the file.
     55#           This also fixes the bug that this program did not work on perl
     56#           versions below 5.6. (Bug reported by Tim Gustafson)
     57# 20060218  More sensible encoding warnings.
     58# 20060219  Move OLE parsing to main program.
     59#           Parse nested MSG files (Bug reported by Christof Lukas).
     60# 20060225  Simplify code.
     61#
     62
     63#
     64# Import modules.
     65#
     66package MSGParser;
     67use strict;
     68use OLE::Storage_Lite;
     69use MIME::Entity;
     70use MIME::Parser;
     71use Date::Format;
     72use POSIX qw(mktime);
     73use constant DIR_TYPE => 1;
     74use constant FILE_TYPE => 2;
     75
     76use vars qw($skipproperties $skipheaders);
     77#
     78# Descriptions partially based on mapitags.h
     79#
     80$skipproperties = {
     81  # Envelope properties
     82  '000B' => "Conversation key?",
     83  '001A' => "Type of message",
     84  '003B' => "Sender address variant",
     85  '003D' => "Contains 'Re: '",
     86  '003F' => "'recieved by' id",
     87  '0040' => "'recieved by' name",
     88  '0041' => "Sender variant address id",
     89  '0042' => "Sender variant name",
     90  '0043' => "'recieved representing' id",
     91  '0044' => "'recieved representing' name",
     92  '0046' => "Read receipt address id",
     93  '0051' => "'recieved by' search key",
     94  '0052' => "'recieved representing' search key",
     95  '0053' => "Read receipt search key",
     96  '0064' => "Sender variant address type",
     97  '0065' => "Sender variant address",
     98  '0070' => "Conversation topic",
     99  '0071' => "Conversation index",
     100  '0075' => "'recieved by' address type",
     101  '0076' => "'recieved by' email address",
     102  '0077' => "'recieved representing' address type",
     103  '0078' => "'recieved representing' email address",
     104  '007F' => "something like a message id",
     105  # Recipient properties
     106  '0C19' => "Reply address variant",
     107  '0C1D' => "Reply address variant",
     108  '0C1E' => "Reply address type",
     109  # Non-transmittable properties
     110  '0E02' => "?Should BCC be displayed",
     111  '0E0A' => "sent mail id",
     112  '0E1D' => "Subject w/o Re",
     113  '0E27' => "64 bytes: Unknown",
     114  '0FF6' => "Index",
     115  '0FF9' => "Index",
     116  '0FFF' => "Address variant",
     117  # Content properties
     118  '1008' => "Summary or something",
     119  '1009' => "RTF Compressed",
     120  # 'Common property'
     121  '3001' => "Display name",
     122  '3002' => "Address Type",
     123  '300B' => "'Search key'",
     124  # Attachment properties
     125  '3702' => "Attachment encoding",
     126  '3703' => "Attachment extension",
     127  '3709' => "'Attachment rendering'", # Maybe an icon or something?
     128  '3713' => "Icon URL?",
     129  # 'Mail user'
     130  '3A20' => "Address variant",
     131  # 3900 -- 39FF: 'Address book'
     132  '39FF' => "7 bit display name",
     133  # 'Display table properties'
     134  '3FF8' => "Routing data?",
     135  '3FF9' => "Routing data?",
     136  '3FFA' => "Routing data?",
     137  '3FFB' => "Routing data?",
     138  # 'Transport-defined envelope property'
     139  '4029' => "Sender variant address type",
     140  '402A' => "Sender variant address",
     141  '402B' => "Sender variant name",
     142  '5FF6' => "Recipient name",
     143  '5FF7' => "Recipient address variant",
     144  # 'Provider-defined internal non-transmittable property'
     145  '6740' => "Unknown, binary data",
     146  # User defined id's
     147  '8000' => "Content Class",
     148  '8002' => "Unknown, binary data",
     149};
     150
     151$skipheaders = {
     152  "MIME-Version" => 1,
     153  "Content-Type" => 1,
     154  "Content-Transfer-Encoding" => 1,
     155  "X-Mailer" => 1,
     156  "X-Msgconvert" => 1,
     157  "X-MS-Tnef-Correlator" => 1,
     158  "X-MS-Has-Attach" => 1,
     159};
     160
     161use constant ENCODING_UNICODE => '001F';
     162use constant KNOWN_ENCODINGS => {
     163    '000D' => 'Directory',
     164    '001F' => 'Unicode',
     165    '001E' => 'Ascii?',
     166    '0102' => 'Binary',
     167};
     168
     169use constant MAP_ATTACHMENT_FILE => {
     170  '3701' => ["DATA",        0], # Data
     171  '3704' => ["SHORTNAME",   1], # Short file name
     172  '3707' => ["LONGNAME",    1], # Long file name
     173  '370E' => ["MIMETYPE",    1], # mime type
     174  '3716' => ["DISPOSITION", 1], # disposition
     175};
     176
     177use constant MAP_SUBITEM_FILE => {
     178  '1000' => ["BODY_PLAIN",      0], # Body
     179  '1013' => ["BODY_HTML",       0], # HTML Version of body
     180  '0037' => ["SUBJECT",         1], # Subject
     181  '0047' => ["SUBMISSION_ID",   1], # Seems to contain the date
     182  '007D' => ["HEAD",            1], # Full headers
     183  '0C1A' => ["FROM",            1], # Reply-To: Name
     184  '0C1E' => ["FROM_ADDR_TYPE",  1], # From: Address type
     185  '0C1F' => ["FROM_ADDR",       1], # Reply-To: Address
     186  '0E04' => ["TO",              1], # To: Names
     187  '0E03' => ["CC",              1], # Cc: Names
     188  '1035' => ["MESSAGEID",       1], # Message-Id
     189  '1042' => ["INREPLYTO",       1], # In reply to Message-Id
     190};
     191
     192use constant MAP_ADDRESSITEM_FILE => {
     193  '3001' => ["NAME",            1], # Real name
     194  '3002' => ["TYPE",            1], # Address type
     195  '403D' => ["TYPE",            1], # Address type
     196  '3003' => ["ADDRESS",         1], # Address
     197  '403E' => ["ADDRESS",         1], # Address
     198  '39FE' => ["SMTPADDRESS",     1], # SMTP Address variant
     199};
     200
     201#
     202# Main body of module
     203#
     204
     205sub new {
     206  my $that = shift;
     207  my $class = ref $that || $that;
     208
     209  my $self = {
     210    ATTACHMENTS => [],
     211    ADDRESSES => [],
     212    VERBOSE => 0,
     213    HAS_UNICODE => 0,
     214    FROM_ADDR_TYPE => "",
     215  };
     216  bless $self, $class;
     217}
     218
     219#
     220# Main sub: parse the PPS tree, and return
     221#
     222sub parse {
     223  my $self = shift;
     224  my $PPS = shift or die "Internal error: No PPS tree";
     225  $self->_RootDir($PPS);
     226}
     227
     228sub mime_object {
     229  my $self = shift;
     230
     231  my $bodymime;
     232  my $mime;
     233
     234  if ($self->_IsMultiPart) {
     235    # Construct a multipart message object
     236
     237    $mime = MIME::Entity->build(Type => "multipart/mixed");
     238
     239    # Set the entity that we'll save the body parts to. If there's more than
     240    # one part, it's a new entity, otherwise, it's the main $mime object.
     241    if ($self->{BODY_HTML} and $self->{BODY_PLAIN}) {
     242      $bodymime = MIME::Entity->build(
     243        Type => "multipart/alternative",
     244        Encoding => "8bit",
     245      );
     246      $mime->add_part($bodymime);
     247    } else {
     248      $bodymime = $mime;
     249    }
     250    if ($self->{BODY_PLAIN}) {
     251      $self->_SaveAttachment($bodymime, {
     252        MIMETYPE => 'text/plain; charset=ISO-8859-1',
     253        ENCODING => '8bit',
     254        DATA => $self->{BODY_PLAIN},
     255        DISPOSITION => 'inline',
     256      });
     257    }
     258    if ($self->{BODY_HTML}) {
     259      $self->_SaveAttachment($bodymime, {
     260        MIMETYPE => 'text/html',
     261        ENCODING => '8bit',
     262        DATA => $self->{BODY_HTML},
     263        DISPOSITION => 'inline',
     264      });
     265    }
     266    foreach my $att (@{$self->{ATTACHMENTS}}) {
     267      $self->_SaveAttachment($mime, $att);
     268    }
     269  } elsif ($self->{BODY_PLAIN}) {
     270    # Construct a single part message object with a plain text body
     271    $mime = MIME::Entity->build(
     272      Type => "text/plain",
     273      Data => $self->{BODY_PLAIN}
     274    );
     275  } elsif ($self->{BODY_HTML}) {
     276    # Construct a single part message object with an HTML body
     277    $mime = MIME::Entity->build(
     278      Type => "text/html",
     279      Data => $self->{BODY_HTML}
     280    );
     281  }
     282
     283  $self->_CopyHeaderData($mime);
     284
     285  $self->_SetHeaderFields($mime);
     286
     287  return $mime;
     288}
     289
     290# Actually output the message in mbox format
     291sub print {
     292  my $self = shift;
     293
     294  my $mime = $self->mime_object;
     295
     296  # Construct From line from whatever we know.
     297  my $string = "";
     298  $string = (
     299    $self->{FROM_ADDR_TYPE} eq "SMTP" ?
     300    $self->{FROM_ADDR} :
     301    'someone@somewhere'
     302  );
     303  $string =~ s/\n//g;
     304
     305  # The date used here is not really important.
     306  print "From ", $string, " ", scalar localtime, "\n";
     307  $mime->print(\*STDOUT);
     308  print "\n";
     309}
     310
     311sub set_verbosity {
     312  my ($self, $verbosity) = @_;
     313  defined $verbosity or die "Internal error: no verbosity level";
     314  $self->{VERBOSE} = $verbosity;
     315}
     316
     317#
     318# Below are functions that walk the PPS tree. The *Dir functions handle
     319# processing the directory nodes of the tree (mainly, iterating over the
     320# children), whereas the *Item functions handle processing the items in the
     321# directory (if such an item is itself a directory, it will in turn be
     322# processed by the relevant *Dir function).
     323#
     324
     325#
     326# RootItem: Check Root Entry, parse sub-entries.
     327# The OLE file consists of a single entry called Root Entry, which has
     328# several children. These children are parsed in the sub SubItem.
     329#
     330sub _RootDir {
     331  my ($self, $PPS) = @_;
     332
     333  foreach my $child (@{$PPS->{Child}}) {
     334    $self->_SubItem($child);
     335  }
     336}
     337
     338sub _SubItem {
     339  my ($self, $PPS) = @_;
     340 
     341  if ($PPS->{Type} == DIR_TYPE) {
     342    $self->_SubItemDir($PPS);
     343  } elsif ($PPS->{Type} == FILE_TYPE) {
     344    $self->_SubItemFile($PPS);
     345  } else {
     346    warn "Unknown entry type: $PPS->{Type}";
     347  }
     348}
     349
     350sub _SubItemDir {
     351  my ($self, $PPS) = @_;
     352
     353  $self->_GetOLEDate($PPS);
     354
     355  my $name = $self->_GetName($PPS);
     356
     357  if ($name =~ /__recip_version1 0_ /) { # Address of one recipient
     358    $self->_AddressDir($PPS);
     359  } elsif ($name =~ '__attach_version1 0_ ') { # Attachment
     360    $self->_AttachmentDir($PPS);
     361  } else {
     362    $self->_UnknownDir($self->_GetName($PPS));
     363  }
     364}
     365
     366sub _SubItemFile {
     367  my ($self, $PPS) = @_;
     368
     369  my $name = $self->_GetName($PPS);
     370  my ($property, $encoding) = $self->_ParseItemName($name);
     371
     372  $self->_MapProperty($self, $PPS->{Data}, $property,
     373    MAP_SUBITEM_FILE) or $self->_UnknownFile($name);
     374}
     375
     376sub _AddressDir {
     377  my ($self, $PPS) = @_;
     378
     379  my $address = {
     380    NAME        => undef,
     381    ADDRESS     => undef,
     382    TYPE        => "",
     383  };
     384  foreach my $child (@{$PPS->{Child}}) {
     385    $self->_AddressItem($child, $address);
     386  }
     387  push @{$self->{ADDRESSES}}, $address;
     388}
     389
     390sub _AddressItem {
     391  my ($self, $PPS, $addr_info) = @_;
     392
     393  my $name = $self->_GetName($PPS);
     394
     395  # DIR Entries: There should be none.
     396  if ($PPS->{Type} == DIR_TYPE) {
     397    $self->_UnknownDir($name);
     398  } elsif ($PPS->{Type} == FILE_TYPE) {
     399    my ($property, $encoding) = $self->_ParseItemName($name);
     400    $self->_MapProperty($addr_info, $PPS->{Data}, $property,
     401      MAP_ADDRESSITEM_FILE) or $self->_UnknownFile($name);
     402  } else {
     403    warn "Unknown entry type: $PPS->{Type}";
     404  }
     405}
     406
     407sub _AttachmentDir {
     408  my ($self, $PPS) = @_;
     409
     410  my $attachment = {
     411    SHORTNAME   => undef,
     412    LONGNAME    => undef,
     413    MIMETYPE    => 'application/octet-stream',
     414    ENCODING    => 'base64',
     415    DISPOSITION => 'attachment',
     416    DATA        => undef
     417  };
     418  foreach my $child (@{$PPS->{Child}}) {
     419    $self->_AttachmentItem($child, $attachment);
     420  }
     421  push @{$self->{ATTACHMENTS}}, $attachment;
     422}
     423
     424sub _AttachmentItem {
     425  my ($self, $PPS, $att_info) = @_;
     426
     427  my $name = $self->_GetName($PPS);
     428
     429  my ($property, $encoding) = $self->_ParseItemName($name);
     430
     431  if ($PPS->{Type} == DIR_TYPE) {
     432
     433    if ($property eq '3701') {  # Nested MSG file
     434      my $msgp = new MSGParser();
     435      $msgp->parse($PPS);
     436      my $data = $msgp->mime_object->as_string;
     437      $att_info->{DATA} = $data;
     438      $att_info->{MIMETYPE} = 'message/rfc822';
     439      $att_info->{ENCODING} = '8bit';
     440    } else {
     441      $self->_UnknownDir($name);
     442    }
     443
     444  } elsif ($PPS->{Type} == FILE_TYPE) {
     445    $self->_MapProperty($att_info, $PPS->{Data}, $property,
     446      MAP_ATTACHMENT_FILE) or $self->_UnknownFile($name);
     447  } else {
     448    warn "Unknown entry type: $PPS->{Type}";
     449  }
     450}
     451
     452sub _MapProperty {
     453  my ($self, $hash, $data, $property, $map) = @_;
     454
     455  defined $property or return 0;
     456  my $arr = $map->{$property} or return 0;
     457
     458  $arr->[1] and $data =~ s/\000//g;
     459  $hash->{$arr->[0]} = $data;
     460
     461  return 1;
     462}
     463
     464sub _UnknownDir {
     465  my ($self, $name) = @_;
     466
     467  if ($name eq '__nameid_version1 0') {
     468    $self->{VERBOSE}
     469      and warn "Skipping DIR entry $name (Introductory stuff)\n";
     470    return;
     471  }
     472  warn "Unknown DIR entry $name\n";
     473}
     474
     475sub _UnknownFile {
     476  my ($self, $name) = @_;
     477
     478  if ($name eq '__properties_version1 0') {
     479    $self->{VERBOSE}
     480      and warn "Skipping FILE entry $name (Properties)\n";
     481    return;
     482  }
     483
     484  my ($property, $encoding) = $self->_ParseItemName($name);
     485  unless (defined $property) {
     486    warn "Unknown FILE entry $name\n";
     487    return;
     488  }
     489  if ($skipproperties->{$property}) {
     490    $self->{VERBOSE}
     491      and warn "Skipping property $property ($skipproperties->{$property})\n";
     492    return;
     493  } elsif ($property =~ /^80/) {
     494    $self->{VERBOSE}
     495      and warn "Skipping property $property (user-defined property)\n";
     496    return;
     497  } else {
     498    warn "Unknown property $property\n";
     499    return;
     500  }
     501}
     502
     503#
     504# Helper functions
     505#
     506
     507sub _GetName {
     508  my ($self, $PPS) = @_;
     509  return $self->_NormalizeWhiteSpace(OLE::Storage_Lite::Ucs2Asc($PPS->{Name}));
     510}
     511
     512sub _NormalizeWhiteSpace {
     513  my ($self, $name) = @_;
     514  $name =~ s/\W/ /g;
     515  return $name;
     516}
     517
     518sub _GetOLEDate {
     519  my ($self, $PPS) = @_;
     520  unless (defined ($self->{OLEDATE})) {
     521    # Make Date
     522    my $datearr;
     523    $datearr = $PPS->{Time2nd};
     524    $datearr = $PPS->{Time1st} unless($datearr);
     525    $self->{OLEDATE} = $self->_FormatDate($datearr) if $datearr;
     526  }
     527}
     528
     529sub _FormatDate {
     530  my ($self, $datearr) = @_;
     531
     532  # TODO: This is a little convoluted. Directly using strftime didn't seem
     533  # to work.
     534  my $datetime = mktime(@$datearr);
     535  return time2str("%a, %d %h %Y %X %z", $datetime);
     536}
     537
     538# If we didn't get the date from the original header data, we may be able
     539# to get it from the SUBMISSION_ID:
     540# It seems to have the format of a semicolon-separated list of key=value
     541# pairs. The key l has a value with the format:
     542# <SERVER>-<DATETIME>Z-<NUMBER>, where DATETIME is the date and time in
     543# the format YYMMDDHHMMSS.
     544sub _SubmissionIdDate {
     545  my $self = shift;
     546
     547  my $submission_id = $self->{SUBMISSION_ID} or return undef;
     548  $submission_id =~ m/l=.*-(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)Z-.*/
     549    or return undef;
     550  my $year = $1;
     551  $year += 100 if $year < 20;
     552  return $self->_FormatDate([$6,$5,$4,$3,$2-1,$year]);
     553}
     554
     555sub _ParseItemName {
     556  my ($self, $name) = @_;
     557
     558  if ($name =~ /^__substg1 0_(....)(....)$/) {
     559    my ($property, $encoding) = ($1, $2);
     560    if ($encoding eq ENCODING_UNICODE and not ($self->{HAS_UNICODE})) {
     561      warn "This MSG file contains Unicode fields."
     562        . " This is currently unsupported.\n";
     563      $self->{HAS_UNICODE} = 1;
     564    } elsif (not (KNOWN_ENCODINGS()->{$encoding})) {
     565      warn "Unknown encoding $encoding. Results may be strange or wrong.\n";
     566    }
     567    return ($property, $encoding);
     568  } else {
     569    return (undef, undef);
     570  }
     571}
     572
     573sub _SaveAttachment {
     574  my ($self, $mime, $att) = @_;
     575
     576  my $ent = $mime->attach(
     577    Type => $att->{MIMETYPE},
     578    Encoding => $att->{ENCODING},
     579    Data => [],
     580    Filename => ($att->{LONGNAME} ? $att->{LONGNAME} : $att->{SHORTNAME}),
     581    Disposition => $att->{DISPOSITION}
     582  );
     583
     584  my $handle;
     585  if ($handle = $ent->open("w")) {
     586    $handle->print($att->{DATA});
     587    $handle->close;
     588  } else {
     589    warn "Could not write data!";
     590  }
     591}
     592
     593sub _SetAddressPart {
     594  my ($self, $adrname, $partname, $data) = @_;
     595
     596  my $address = $self->{ADDRESSES}->{$adrname};
     597  $data =~ s/\000//g;
     598  #warn "Processing address data part $partname : $data\n";
     599  if (defined ($address->{$partname})) {
     600    if ($address->{$partname} eq $data) {
     601      warn "Skipping duplicate but identical address information for"
     602      . " $partname\n" if $self->{VERBOSE};
     603    } else {
     604      warn "Address information $partname inconsistent:\n";
     605      warn "    Original data: $address->{$partname}\n";
     606      warn "    New data: $data\n";
     607    }
     608  } else {
     609    $address->{$partname} = $data;
     610  }
     611}
     612
     613# Set header fields
     614sub _AddHeaderField {
     615  my ($self, $mime, $fieldname, $value) = @_;
     616
     617  my $oldvalue = $mime->head->get($fieldname);
     618  return if $oldvalue;
     619  $mime->head->add($fieldname, $value) if $value;
     620}
     621
     622sub _Address {
     623  my ($self, $tag) = @_;
     624  my $name = $self->{$tag} || "";
     625  my $address = $self->{$tag . "_ADDR"} || "";
     626  return "$name <$address>";
     627}
     628
     629# Find SMTP addresses for the given list of names
     630sub _ExpandAddressList {
     631  my ($self, $names) = @_;
     632
     633  my $addresspool = $self->{ADDRESSES};
     634  my @namelist = split /; */, $names;
     635  my @result;
     636  name: foreach my $name (@namelist) {
     637    foreach my $address (@$addresspool) {
     638      if ($name eq $address->{NAME}) {
     639        my $addresstext = $address->{NAME} . " <";
     640        if (defined ($address->{SMTPADDRESS})) {
     641          $addresstext .= $address->{SMTPADDRESS};
     642        } elsif ($address->{TYPE} eq "SMTP") {
     643          $addresstext .= $address->{ADDRESS};
     644        }
     645        $addresstext .= ">";
     646        push @result, $addresstext;
     647        next name;
     648      }
     649    }
     650    push @result, $name;
     651  }
     652  return join ", ", @result;
     653}
     654
     655sub _ParseHead {
     656  my ($self, $data) = @_;
     657  defined $data or return undef;
     658  # Parse full header date if we got that.
     659  my $parser = new MIME::Parser();
     660  $parser->output_to_core(1);
     661  $parser->decode_headers(1);
     662  $data =~ s/^Microsoft Mail.*$/X-MSGConvert: yes/m;
     663  my $entity = $parser->parse_data($data)
     664    or warn "Couldn't parse full headers!";
     665  my $head = $entity->head;
     666  $head->unfold;
     667  return $head;
     668}
     669
     670# Find out if we need to construct a multipart message
     671sub _IsMultiPart {
     672  my $self = shift;
     673
     674  return (
     675    ($self->{BODY_HTML} and $self->{BODY_PLAIN})
     676      or @{$self->{ATTACHMENTS}}>0
     677  );
     678}
     679
     680# Copy original header data.
     681# Note: This should contain the Date: header.
     682sub _CopyHeaderData {
     683  my ($self, $mime) = @_;
     684
     685  my $head = $self->_ParseHead($self->{HEAD}) or return;
     686
     687  foreach my $tag (grep {!$skipheaders->{$_}} $head->tags) {
     688    foreach my $value ($head->get_all($tag)) {
     689      $mime->head->add($tag, $value);
     690    }
     691  }
     692}
     693
     694# Set header fields
     695sub _SetHeaderFields {
     696  my ($self, $mime) = @_;
     697
     698  # If we didn't get the date from the original header data, we may be able
     699  # to get it from the SUBMISSION_ID:
     700  $self->_AddHeaderField($mime, 'Date', $self->_SubmissionIdDate());
     701
     702  # Third and last chance to set the Date: header; this uses the date the
     703  # MSG file was saved.
     704  $self->_AddHeaderField($mime, 'Date', $self->{OLEDATE});
     705  $self->_AddHeaderField($mime, 'Subject', $self->{SUBJECT});
     706  $self->_AddHeaderField($mime, 'From', $self->_Address("FROM"));
     707  #$self->_AddHeaderField($mime, 'Reply-To', $self->_Address("REPLYTO"));
     708  $self->_AddHeaderField($mime, 'To', $self->_ExpandAddressList($self->{TO}));
     709  $self->_AddHeaderField($mime, 'Cc', $self->_ExpandAddressList($self->{CC}));
     710  $self->_AddHeaderField($mime, 'Message-Id', $self->{MESSAGEID});
     711  $self->_AddHeaderField($mime, 'In-Reply-To', $self->{INREPLYTO});
     712}
     713
     714package main;
     715use Getopt::Long;
     716use Pod::Usage;
     717
     718# Setup command line processing.
     719my $verbose = '';
     720my $help = '';      # Print help message and exit.
     721GetOptions('verbose' => \$verbose, 'help|?' => \$help) or pod2usage(2);
     722pod2usage(1) if $help;
     723
     724# Get file name
     725my $file = $ARGV[0];
     726defined $file or pod2usage(2);
     727warn "Will parse file: $file\n" if $verbose;
     728
     729# Load and parse MSG file (is OLE)
     730my $Msg = OLE::Storage_Lite->new($file);
     731my $PPS = $Msg->getPpsTree(1);
     732$PPS or die "$file must be an OLE file";
     733
     734# parse PPS tree
     735my $parser = new MSGParser();
     736$parser->set_verbosity(1) if $verbose;
     737$parser->parse($PPS);
     738$parser->print();
     739
     740#
     741# Usage info follows.
     742#
     743__END__
     744
     745=head1 NAME
     746
     747msgconvert.pl - Convert Outlook .msg files to mbox format
     748
     749=head1 SYNOPSIS
     750
     751msgconvert.pl [options] <file.msg>
     752
     753  Options:
     754    --verbose   be verbose
     755    --help      help message
     756
     757=head1 OPTIONS
     758
     759=over 8
     760
     761=item B<--verbose>
     762
     763    Print information about skipped parts of the .msg file.
     764
     765=item B<--help>
     766
     767    Print a brief help message.
     768
     769=head1 DESCRIPTION
     770
     771This program will output the message contained in file.msg in mbox format
     772on stdout. It will complain about unrecognized OLE parts on
     773stderr.
     774
     775=head1 BUGS
     776
     777Not all data that's in the .MSG file is converted. There simply are some
     778parts whose meaning escapes me. One of these must contain the date the
     779message was sent, for example. Formatting of text messages will also be
     780lost. YMMV.
     781
     782=cut
  • xapian-omega-1.0.7a/omega.cc

    diff -u  xapian-omega-1.0.7a/omega.cc.orig
    old new  
    264264        }
    265265    }
    266266
     267    // filter by URL substring
     268    val = cgi_params.find("U");
     269    if (val != cgi_params.end()) {
     270        string url = val->second;
     271        if (!url.empty()) {
     272            filters += ("U" + url + "*");
     273            filters += filter_sep;
     274        }
     275    }
     276
    267277    // date range filters
    268278    val = cgi_params.find("START");
    269279    if (val != cgi_params.end()) date_start = val->second;
  • xapian-omega-1.0.7a/omega.conf.in

    diff -u  xapian-omega-1.0.7a/omega.conf.in.orig
    old new  
     1# Directory containing Xapian databases:
     2database_dir @localstatedir@/omega/data
     3
     4# Directory containing OmegaScript templates:
     5template_dir @localstatedir@/omega/templates
     6
     7# Directory to write Omega logs to:
     8log_dir      /var/log/omega
     9
     10# Directory containing any cdb files for the $lookup OmegaScript command:
     11cdb_dir      @localstatedir@/omega/cdb
     12
     13# Directory containing extracted archives:
     14cache_dir    @localstatedir@/omega/cache
  • xapian-omega-1.0.7a/omindex.cc

    diff -u  xapian-omega-1.0.7a/omindex.cc.orig
    old new  
    44 * Copyright 2001,2005 James Aylett
    55 * Copyright 2001,2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts
     7 * Copyright 2006,2007,2008 AVL List GesmbH
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
     
    4243#include <xapian.h>
    4344
    4445#include "commonhelp.h"
     46#include "configfile.h"
    4547#include "diritor.h"
    4648#include "hashterm.h"
    4749#include "loadfile.h"
     
    6264extern char * mkdtemp(char *);
    6365#endif
    6466
     67#ifndef LIBEXECDIR
     68// must have ending slash
     69//# define LIBEXECDIR "/usr/lib/omega/bin/"
     70# define LIBEXECDIR ""
     71#endif
     72#ifndef PKGDATADIR
     73// must have ending slash
     74# define PKGDATADIR "/usr/share/omega/"
     75#endif
     76
    6577using namespace std;
    6678
    6779#define TITLE_SIZE 128
     
    6981
    7082#define PROG_NAME "omindex"
    7183#define PROG_DESC "Index static website data via the filesystem"
     84
     85/* used in runfilter.cc */
     86bool verbose = false;
     87string error_log;
    7288
    7389static bool skip_duplicates = false;
    7490static bool follow_symlinks = false;
     91static bool nocleanup = false;
     92static bool silent = false;
    7593static string dbpath;
    7694static string root;
    7795static string indexroot;
     
    136154
     155static void
     156index_cached_directory(size_t depth_limit,
     157                       const string &file,
     158                       const string &url,
     159                       const string &ext,
     160                       const string &cmd,
     161                       map<string, string>& mime_map);
     162static
     163int mkdir_p(const string &path, mode_t mode);
     164
    137165inline static bool
    138166p_notalnum(unsigned int c)
    139167{
     
    184212
    185     cout << "Indexing \"" << url << "\" as " << mimetype << " ... " << flush;
     213    if (!silent)
     214        cout << "Indexing \"" << url.substr(1) << "\" as " << mimetype << " ... " << flush;
    186215
    187216    string urlterm("U");
    188217    urlterm += baseurl;
     
    217246            // indexing is disallowed
    218247        }
    219248        if (!p.indexing_allowed) {
    220             cout << "indexing disallowed by meta tag - skipping" << endl;
     249            if (!silent)
     250                cout << "indexing disallowed by meta tag - skipping" << endl;
    221251            return;
    222252        }
    223253        dump = p.dump;
     
    245275            return;
    246276        }
    247277        md5_string(dump, md5);
     278#if 0 // FIXME: this won't work as omindex will have the database locked...
     279    } else if (mimetype == "message/rfc822") { // // => mbox2script
     280        //for stemmer lang, parse stemmer.get_description => Xapian::Stem(bla)
     281        string cmd = LIBEXECDIR"mbox2omega " + shell_protect(file) + error_log+"| "
     282            "scriptindex " + shell_protect(dbpath) + " "PKGDATADIR"mbox2script.script";
     283        try {
     284            dump = stdout_to_string(cmd);
     285        } catch (ReadError) {
     286            cout << "\"" << cmd << "\" failed - skipping" << endl;
     287            return;
     288        }
     289#endif
    248290    } else if (mimetype == "application/pdf") {
    249291        string safefile = shell_protect(file);
     
    383425    } else if (mimetype == "text/rtf") {
    384426        // The --text option unhelpfully converts all non-ASCII characters to
    385427        // "?" so we use --html instead, which produces HTML entities.
    386         string cmd = "unrtf --nopict --html 2>/dev/null " + shell_protect(file);
     428        string cmd = "unrtf --nopict --html 2>/dev/null " + shell_protect(file) + error_log;
    387429        MyHtmlParser p;
    388430        try {
    389431            p.parse_html(stdout_to_string(cmd));
    (this hunk was shorter than expected)  
    536579            Xapian::docid did = db.replace_document(urlterm, newdocument);
    537580            if (did < updated.size()) {
    538581                updated[did] = true;
     582                if (!silent)
    539583                cout << "updated." << endl;
    540584            } else {
     585                if (!silent)
    541586                cout << "added." << endl;
    542587            }
    543588        } catch (...) {
    544589            // FIXME: is this ever actually needed?
    545590            db.add_document(newdocument);
     591            if (!silent)
    546592            cout << "added (failed re-seek for duplicate)." << endl;
    547593        }
    548594    } else {
    549595        // If this were a duplicate, we'd have skipped it above.
    550596        db.add_document(newdocument);
     597        if (!silent)
    551598        cout << "added." << endl;
    552599    }
    553600}
    554601
     602/* Note: switched to cache_dir as root for virtual directories,
     603   because /srcdir/.zip might not be creatable. */
    555604static void
    556605index_directory(size_t depth_limit, const string &dir,
    557606                map<string, string>& mime_map)
    558607{
    559608    string path = root + indexroot + dir;
    560609
    561     cout << "[Entering directory " << dir << "]" << endl;
     610    if (!silent)
     611        cout << "[Entering directory " << dir.substr(1) << "]" << endl;
    562612
    563613    DirectoryIterator d(follow_symlinks);
    564614    try {
    565         d.start(path);
    566         while (d.next()) try {
    567             string url = dir;
    568             if (!url.empty() && url[url.size() - 1] != '/') url += '/';
    569             url += d.leafname();
    570             string file = root + indexroot + url;
    571             switch (d.get_type()) {
    572                 case DirectoryIterator::DIRECTORY:
    573                     if (depth_limit == 1) continue;
    574                     try {
    575                         size_t new_limit = depth_limit;
    576                         if (new_limit) --new_limit;
    577                         index_directory(new_limit, url, mime_map);
    578                     } catch (...) {
    579                         cout << "Caught unknown exception in index_directory, rethrowing" << endl;
    580                         throw;
     615        d.start(root + indexroot + dir);
     616    } catch (const std::string & error) {
     617        cout << error << " - skipping" << endl;
     618        return;
     619    }
     620    while (d.next()) try {
     621        struct stat statbuf;
     622        string url = dir;
     623        if (!url.empty() && url[url.size() - 1] != '/') url += '/';
     624        url += d.leafname();
     625        string file = root + indexroot + url;
     626        switch (d.get_type()) {
     627            case DirectoryIterator::DIRECTORY:
     628                if (depth_limit == 1) continue;
     629                try {
     630                    size_t new_limit = depth_limit;
     631                    if (new_limit) --new_limit;
     632                    index_directory(new_limit, url, mime_map);
     633                } catch (...) {
     634                    cout << "Caught unknown exception in index_directory, rethrowing" << endl;
     635                    throw;
     636                }
     637                continue;
     638            case DirectoryIterator::REGULAR_FILE: {
     639                string ext;
     640                string::size_type dot = url.find_last_of('.');
     641                if (dot != string::npos) ext = url.substr(dot + 1);
     642                if (!ext.empty()) {
     643                    ext = string(ext); // lowercase ext
     644                    for (unsigned int i=0; i<ext.length(); i++) {
     645                        ext[i] = tolower(ext[i]);
    581646                    }
    582                 case DirectoryIterator::REGULAR_FILE: {
    583                     string ext;
    584                     string::size_type dot = url.find_last_of('.');
    585                     if (dot != string::npos) ext = url.substr(dot + 1);
    586 
    587                     map<string,string>::iterator mt = mime_map.find(ext);
    588                     if (mt == mime_map.end()) {
    589                         // If the extension isn't found, see if the lower-cased
    590                         // version (if different) is found.
    591                         bool changed = false;
    592                         string::iterator i;
    593                         for (i = ext.begin(); i != ext.end(); ++i) {
    594                             if (*i >= 'A' && *i <= 'Z') {
    595                                 *i = tolower(*i);
    596                                 changed = true;
    597                             }
     647                }
     648
     649                if (strcmp(d.leafname(), "mbox") == 0) {
     650                    // Special filename.
     651                    off_t size = d.get_size();
     652                    time_t mtime = d.get_mtime();
     653                    index_file(indexroot + url, "message/rfc822", mtime, size);
     654                    continue;
     655                }
     656
     657                map<string,string>::iterator mt = mime_map.find(ext);
     658                if (mt == mime_map.end()) {
     659                    // If the extension isn't found, see if the lower-cased
     660                    // version (if different) is found.
     661                    bool changed = false;
     662                    string::iterator i;
     663                    for (i = ext.begin(); i != ext.end(); ++i) {
     664                        if (*i >= 'A' && *i <= 'Z') {
     665                            *i = tolower(*i);
     666                            changed = true;
     667                        }
     668                    }
     669                    if (changed) mt = mime_map.find(ext);
     670                }
     671                if (mt != mime_map.end()) {
     672                    string oldroot = root;
     673                    // Only check the file size if we recognise the
     674                    // extension to avoid a call to stat()/lstat() for
     675                    // files we can't handle when readdir() tells us the
     676                    // file type.
     677                    off_t size = d.get_size();
     678                    if (size == 0) {
     679                        cout << "Skipping empty file: \"" << file << "\""
     680                             << endl;
     681                        continue;
     682                    }
     683
     684#ifndef _MSC_VER
     685                    // NOTE: unpacking does not work on MSWin32 this way!
     686                    // we'd really have to pull in utils.cc:rmdir from xapian-core
     687                    if (ext == "zip") {
     688                        if (depth_limit == 1) {
     689                            cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl;
     690                            continue;
     691                        }
     692                        // overwrite
     693                        string cmd = "unzip -u -P. -o " +shell_protect(file) + " -d " +shell_protect(cache_dir+"/.zip"+indexroot+url+"/");
     694                        try {
     695                            size_t new_limit = depth_limit;
     696                            if (new_limit) --new_limit;
     697                            index_cached_directory(new_limit, file, url, ext, cmd, mime_map);
     698                        } catch (ReadError) {
     699                            cout << "failed " << cmd << " << in index_cached_directory" << endl;
     700                            root = oldroot;
     701                        } catch (...) {
     702                            cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl;
     703                            root = oldroot;
     704                            throw;
    598705                        }
    599                         if (changed) mt = mime_map.find(ext);
     706                        continue;
    600707                    }
    601                     if (mt != mime_map.end()) {
    602                         // Only check the file size if we recognise the
    603                         // extension to avoid a call to stat()/lstat() for
    604                         // files we can't handle when readdir() tells us the
    605                         // file type.
    606                         off_t size = d.get_size();
    607                         if (size == 0) {
    608                             cout << "Skipping empty file: \"" << file << "\""
    609                                  << endl;
     708                    else if (ext == "rar") {
     709                        if (depth_limit == 1) {
     710                            cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl;
    610711                            continue;
    611712                        }
    612 
    613                         // It's in our MIME map so we know how to index it.
    614                         const string & mimetype = mt->second;
     713                        // overwrite
     714                        string cmd = "unrar x -o+ " +shell_protect(file) + " "
     715                            + shell_protect(cache_dir+"/.rar"+indexroot+url+"/");
     716                        try {
     717                            size_t new_limit = depth_limit;
     718                            if (new_limit) --new_limit;
     719                            index_cached_directory(new_limit, file, url, ext, cmd, mime_map);
     720                        } catch (ReadError) {
     721                            cout << "failed " << cmd << " << in index_cached_directory" << endl;
     722                            root = oldroot;
     723                        } catch (...) {
     724                            cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl;
     725                            root = oldroot;
     726                            throw;
     727                        }
     728                        continue;
     729                    }
     730#ifdef HAVE_MSGCONVERT
     731                    else if (ext == "msg") {
     732                        struct stat statcache;
     733                        char olddir[256];
     734                       
     735                        if (depth_limit == 1) {
     736                            cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl;
     737                            continue;
     738                        }
     739                        string cmd = LIBEXECDIR"outlook2text "+shell_protect(file);
     740                        // unpack multiparts and attachments. so we have to chdir first
     741                        string fulldir = cache_dir+"/.msg"+indexroot+url;
     742                        getcwd(olddir,256);
     743#ifdef HAVE_LSTAT
     744                        lstat(fulldir.c_str(), &statcache);
     745#else
     746                        stat(fulldir.c_str(), &statcache);
     747#endif
     748                        if (!S_ISDIR(statcache.st_mode)) {
     749                            mkdir_p(fulldir, 0755);
     750                        }
    615751                        try {
    616                             time_t mtime = d.get_mtime();
    617                             index_file(indexroot + url, mimetype, mtime, size);
    618                         } catch (NoSuchFilter) {
    619                             // FIXME: we ought to ignore by mime-type not
    620                             // extension.
    621                             cout << "Filter for \"" << mimetype
    622                                  << "\" not installed - ignoring extension \""
    623                                  << ext << "\"" << endl;
    624                             mime_map.erase(mt);
    625                         }
    626                     } else {
    627                         cout << "Unknown extension: \"" << file
    628                              << "\" - skipping" << endl;
     752                            chdir (fulldir.c_str());
     753                            size_t new_limit = depth_limit;
     754                            if (new_limit) --new_limit;
     755                            index_cached_directory(new_limit, file, url, ext, cmd, mime_map);
     756                            chdir (olddir);
     757                        } catch (ReadError) {
     758                            cout << "failed " << cmd << " << in index_cached_directory" << endl;
     759                            chdir (olddir);
     760                            root = oldroot;
     761                        } catch (...) {
     762                            cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl;
     763                            chdir (olddir);
     764                            root = oldroot;
     765                            throw;
     766                        }
     767                        continue;
    629768                    }
    630                     continue;
    631                 }
    632                 default:
    633                     cout << "Not a regular file \"" << file
     769#endif
     770#ifdef HAVE_READPST
     771                    else if (ext == "pst") {
     772                        if (depth_limit == 1) {
     773                            cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl;
     774                            continue;
     775                        }
     776                        // unpack attachments also, together with mbox files
     777                        string cmd = "readpst -r -cv -w -o "
     778                            + shell_protect(cache_dir+"/.pst"+indexroot+url+"/")+" "+shell_protect(file);
     779                        try {
     780                            size_t new_limit = depth_limit;
     781                            if (new_limit) --new_limit;
     782                            index_cached_directory(new_limit, file, url, ext, cmd, mime_map);
     783                        } catch (ReadError) {
     784                            root = oldroot;
     785                            cout << "failed " << cmd << " << in index_cached_directory" << endl;
     786                        } catch (...) {
     787                            root = oldroot;
     788                            cout << "Caught unknown exception in index_cached_directory, rethrowing" << endl;
     789                            throw;
     790                        }
     791                        continue;
     792                    }
     793#endif
     794#endif
     795                    // It's in our MIME map so we know how to index it.
     796                    const string & mimetype = mt->second;
     797                    try {
     798                        time_t mtime = d.get_mtime();
     799                        index_file(indexroot + url, mimetype, mtime, size);
     800                    } catch (NoSuchFilter) {
     801                        // FIXME: we ought to ignore by mime-type not
     802                        // extension.
     803                        cout << "Filter for \"" << mimetype
     804                             << "\" not installed - ignoring extension \""
     805                             << ext << "\"" << endl;
     806                        mime_map.erase(mt);
     807                    }
     808                } else {
     809                    cout << "Unknown extension: \"" << file
    634810                         << "\" - skipping" << endl;
     811                }
     812                continue;
    635813            }
    636         } catch (const std::string & error) {
    637             cout << error << " - skipping" << endl;
    638             continue;
     814            default:
     815                cout << "Not a regular file \"" << file
     816                     << "\" - skipping" << endl;
    639817        }
    640818    } catch (const std::string & error) {
    641         cout << error << " - skipping directory" << endl;
    642         return;
     819        cout << error << " - skipping" << endl;
     820        continue;
     821    }
     822}
     823
     824static
     825int mkdir_p(const string &path, mode_t mode) {
     826#ifdef __WIN32__
     827    stdout_to_string("mkdir \""+shell_protect(path)+"\"");
     828#else
     829    stdout_to_string("mkdir -p "+shell_protect(path));
     830#endif
     831    return 0;
     832}
     833
     834/*
     835 * unpack .msg/.pst/.rar/.zip into local cache dir and recurse there
     836 */
     837static void
     838index_cached_directory(size_t depth_limit,
     839                       const string &file,
     840                       const string &url,
     841                       const string &ext,
     842                       const string &cmd,
     843                       map<string, string>& mime_map)
     844{
     845    string oldroot = root;
     846    root = cache_dir;
     847    string cache = root+"/."+ext+indexroot;
     848    string cachedir = cache+url;
     849    struct stat statfile, statcache;
     850    bool extract_cache;
     851#ifdef HAVE_LSTAT
     852    lstat(file.c_str(), &statfile);
     853    lstat(cachedir.c_str(), &statcache);
     854#else
     855    stat(file.c_str(), &statfile);
     856    stat(cachedir.c_str(), &statcache);
     857#endif
     858    extract_cache = true;
     859    // if cachedir exists and if file is older as cachedir and if cachedir existed 5 secs ago,
     860    // then it was already extracted.
     861    if (S_ISDIR(statcache.st_mode)
     862        && S_ISREG(statfile.st_mode)
     863        && (statfile.st_mtime < statcache.st_mtime)
     864        && (statcache.st_mtime < (time_t)(time(NULL)-500))) // not created by nested mkdir call
     865    {
     866        // but is it in the database also? prevent from deleting skipped files
     867        if (!silent)
     868            cout << "Unchanged cache \"" << cachedir << "\" - \"" << file << "\" - skip extraction "
     869                 // << statfile.st_mtime << " < " << statcache.st_mtime
     870                 << endl;
     871        extract_cache = false;
     872    }
     873    if (S_ISDIR(statcache.st_mode) && S_ISREG(statfile.st_mode) )
     874    {
     875        // check database timestamp for cached container, esp. for cleaned up caches.
     876        // if already in db we need not to extract again
     877        string urlterm("U");
     878        urlterm += baseurl;
     879        urlterm += "/."+ext+indexroot+url;
     880        if (urlterm.length() > MAX_SAFE_TERM_LENGTH)
     881            urlterm = hash_long_term(urlterm, MAX_SAFE_TERM_LENGTH);
     882       
     883        {
     884            // at first find the docid with the beginning urlterm and check its timestamp
     885            Xapian::docid docid = 0;
     886            Xapian::PostingIterator p = db.postlist_begin(urlterm);
     887            if (p != db.postlist_end(urlterm)) {
     888                docid = *p;
     889            }
     890            if (docid && !ignore_time) {
     891                // new: first search value (1)
     892                Xapian::Document doc = db.get_document(docid);
     893                string lastmod;
     894                if (doc.values_count())
     895                    lastmod = doc.get_value(VALUE_LASTMOD);
     896                if (!lastmod.empty()) {
     897                    if (string_to_int(lastmod) >= statfile.st_mtime) {
     898                        if (!silent)
     899                            cout << "Cache "<< "."+ext+indexroot+url << " not newer. Ignored." << endl;
     900                        if (docid < updated.size()) {
     901                            updated[docid] = true;
     902                            root = oldroot;
     903                            return;
     904                        }
     905                    }
     906                }
     907            }
     908        }
     909    }
     910
     911    if (extract_cache) {
     912        if (!silent)
     913            cout << "[EXTRACT into cache " << cachedir << "]" << endl;
     914        if (verbose && S_ISDIR(statcache.st_mode) && S_ISREG(statfile.st_mode))
     915            cout << " ...changed cache \"" << cachedir << "\" - \"" << file << "\" "
     916                 << statfile.st_mtime << " < " << statcache.st_mtime << " time: " << time(NULL)
     917                 << endl;
     918        if (!S_ISDIR(statcache.st_mode))
     919            mkdir_p(cachedir, 0755);
     920        stdout_to_string(cmd);
     921#ifndef __WIN32__
     922        stdout_to_string("chmod -R u+rwx " + shell_protect(cachedir));
     923#endif
     924#ifdef HAVE_LSTAT
     925        lstat(cachedir.c_str(), &statcache);
     926#else
     927        stat(cachedir.c_str(), &statcache);
     928#endif
     929    }
     930
     931    if (S_ISDIR(statcache.st_mode)) {
     932        if (depth_limit == 1) {
     933            cout << "Recursion limit reached for \""<< url << "\" - skipping " << endl;
     934        } else {
     935            // max loop 5, magic start: /.ext+file
     936            index_directory(depth_limit+5, "/."+ext+url, mime_map);
     937            if (!nocleanup) {
     938                if (!silent)
     939                    cout << "[CLEANUP " << "rm -rf " << shell_protect(cachedir) << "]" << endl;
     940                rm_rf(cachedir);
     941            }
     942        }
     943    }
     944    else { // no -p would be fatal here
     945        cout << "cachedir " << shell_protect(cachedir) << " does not exist - skipped" << endl;
    643946    }
     947    root = oldroot;
    644948}
    645949
    646950int
     
    653958    static const struct option longopts[] = {
    654959        { "help",       no_argument,            NULL, 'h' },
    655960        { "version",    no_argument,            NULL, 'v' },
     961        { "verbose",    no_argument,            NULL, 'V' },
     962        { "silent",     no_argument,            NULL, 'S' },
    656963        { "overwrite",  no_argument,            NULL, 'o' },
    657964        { "duplicates", required_argument,      NULL, 'd' },
    658965        { "preserve-nonduplicates",     no_argument,    NULL, 'p' },
     
    667974        { "depth-limit",required_argument,      NULL, 'l' },
    668975        { "follow",     no_argument,            NULL, 'f' },
    669976        { "stemmer",    required_argument,      NULL, 's' },
     977        { "nocleanup",  no_argument,            NULL, 'c' },
     978        { "cachedir",   required_argument,      NULL, 'C' },
    670979        { 0, 0, NULL, 0 }
    671980    };
    672981
     
    7171026    mime_map["xlt"] = "application/vnd.ms-excel"; // Excel template
    7181027    mime_map["ppt"] = "application/vnd.ms-powerpoint";
    7191028    mime_map["pps"] = "application/vnd.ms-powerpoint"; // Powerpoint slideshow
     1029#ifdef HAVE_READPST
     1030    //  Outlook messager folder
     1031    mime_map["pst"] = "application/vnd.ms-outlook-pst"; // readpst | uudeview (libpst)
     1032#endif
     1033#ifdef HAVE_MSGCONVERT
     1034    mime_map["msg"] = "application/vnd.ms-outlook";     // outlook2text via msgconvert.pl
     1035#endif
     1036    mime_map["mbox"] = "message/rfc822";                // => mbox2omega
    7201037    // Perl:
    7211038    mime_map["pl"] = "text/x-perl";
    7221039    mime_map["pm"] = "text/x-perl";
     
    7271044    // DjVu:
    7281045    mime_map["djv"] = "image/vnd.djvu";
    7291046    mime_map["djvu"] = "image/vnd.djvu";
     1047#ifndef _MSC_VER
     1048    mime_map["zip"] = "application/x-zip"; // recursive scanning
     1049#  ifdef HAVE_UNRAR
     1050    mime_map["rar"] = "application/x-rar"; // recursive scanning
     1051#  endif
     1052#endif
     1053
     1054    read_config_file();
    7301055 
    731     while ((getopt_ret = gnu_getopt_long(argc, argv, "hvd:D:U:M:lpf", longopts, NULL)) != -1) {
     1056    while ((getopt_ret = gnu_getopt_long(argc, argv, "hvd:D:U:M:C:lpfc", longopts, NULL))!=EOF) {
    7321057        switch (getopt_ret) {
    7331058        case 'h': {
    7341059            cout << PROG_NAME" - "PROG_DESC"\n\n"
     
    7531078"                                duplicate replace mode\n"
    7541079"  -D, --db                 path to database to use\n"
    7551080"  -U, --url                base url DIRECTORY represents (default: /)\n"
     1081"  -C, --cachedir           path to local cache to use (default from omega.conf)\n"
    7561082"  -M, --mime-type          additional MIME mapping ext:type\n"
    7571083"  -l, --depth-limit=LIMIT  set recursion limit (0 = unlimited)\n"
    7581084"  -f, --follow             follow symbolic links\n"
     1085"  -c, --nocleanup          keep cache, don't delete temporary .zip,.rar,.pst,.msg cache folders\n"
    7591086"      --overwrite          create the database anew (the default is to update\n"
    760 "                           if the database already exists)" << endl;
     1087"                           if the database already exists)"
     1088"      --verbose            Print commands also\n"
     1089"      --silent             Print only errors\n";
    7611090            print_stemmer_help("     ");
    7621091            print_help_and_version_help("     ");
    7631092            return 0;
     
    7851114        case 'p': // don't delete unupdated documents
    7861115            preserve_unupdated = true;
    7871116            break;
     1117        case 'V':
     1118            verbose = true;
     1119            break;
     1120        case 'c':
     1121            nocleanup = true;
     1122            break;
    7881123        case 'l': { // Set recursion limit
    7891124            int arg = atoi(optarg);
    7901125            if (arg < 0) arg = 0;
     
    8171152        case 'U':
    8181153            baseurl = optarg;
    8191154            break;
     1155        case 'C':
     1156            cache_dir = optarg;
     1157            break;
    8201158        case 'o': // --overwrite
    8211159            overwrite = true;
    8221160            break;
     
    8441182    if (baseurl.empty()) {
    8451183        cerr << PROG_NAME": --url not specified, assuming `/'.\n";
    8461184    }
     1185    error_log = " 2>>"+log_dir+"omindex-error.log";
    8471186    // baseurl mustn't end '/' or you end up with the wrong URL
    8481187    // (//thing is different to /thing). We could probably make this
    8491188    // safe a different way, by ensuring that we don't put a leading '/'
     
    8691208    } else {
    8701209        indexroot = ""; // index the whole of root
    8711210    }
     1211    // add the db basename to cache_dir
     1212    {
     1213        const char *p = strrchr(dbpath.c_str(), '/');
     1214        // on windows only
     1215        if (!p) p = strrchr(dbpath.c_str(), '\\');
     1216        if (p) { p++; } else { p = dbpath.c_str(); }
     1217        cache_dir += p;
     1218    }
    8721219
    8731220    int exitcode = 1;
    8741221    try {
  • xapian-omega-1.0.7a/outlook2text.in

    diff -u  xapian-omega-1.0.7a/outlook2text.in.orig
    old new  
     1#! /bin/sh
     2# converts msg to mbox and extract attachments
     3# either be in the cache dir, or accept it as 2nd arg
     4if [ -n $2 ]; then
     5  @MSGCONVERT@ "$1" | @MIMEEXPLODE@ -d "$2"
     6else
     7  # already is in the cache dir
     8  base=`basename "$1" .msg`
     9  @MSGCONVERT@ "$1" | @MIMEEXPLODE@ -d "${base}"
     10fi
  • xapian-omega-1.0.7a/query.cc

    diff -u  xapian-omega-1.0.7a/query.cc.orig
    old new  
    141141        switch (t[0]) {
    142142            case 'a':
    143143                return (t == "a" || t == "about" || t == "an" || t == "and" ||
    144                         t == "are" || t == "as" || t == "at");
     144                    t == "are" || t == "as" || t == "at" || t == "according" ||
     145                    t == "again"  || t == "against"  || t == "ah"  || t == "all" ||
     146                    t == "although"  || t == "always" || t == "anyone" || t == "after" ||
     147                    t == "also"  || t == "any");
    145148            case 'b':
    146149                return (t == "be" || t == "by");
    147150            case 'e':
  • xapian-omega-1.0.7a/runfilter.cc

    diff -u  xapian-omega-1.0.7a/runfilter.cc.orig
    old new  
    6060
    6161using namespace std;
    6262
     63extern string error_log;
     64extern bool verbose;
     65
    6366string
    6467stdout_to_string(const string &cmd)
    6568{
     
    97100            setrlimit(RLIMIT_AS, &ram_limit);
    98101        }
    99102
    100         execl("/bin/sh", "/bin/sh", "-c", cmd.c_str(), (void*)NULL);
     103        string tmp;
     104        tmp = cmd + error_log;
     105        if (verbose) {
     106            cout << " Executing '" << tmp << "'..." << endl;
     107        }
     108
     109        execl("/bin/sh", "/bin/sh", "-c", tmp.c_str(), (void*)NULL);
    101110        _exit(-1);
    102111    }
    103112
     
    134143        throw ReadError();
    135144    }
    136145#else
    137     FILE * fh = popen(cmd.c_str(), "r");
     146    string tmp;
     147    tmp = cmd + error_log;
     148    if (verbose) {
     149        cout << " Executing '" << tmp << "'..." << endl;
     150    }
     151    FILE * fh = popen(tmp.c_str(), "r");
    138152    if (fh == NULL) throw ReadError();
    139153    while (!feof(fh)) {
    140154        char buf[4096];
  • xapian-omega-1.0.7a/scriptindex.cc

    diff -u  xapian-omega-1.0.7a/scriptindex.cc.orig
    old new  
    44 * Copyright 2001 Sam Liddicott
    55 * Copyright 2001,2002 Ananova Ltd
    66 * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
     7 * Copyright 2006,2007 AVL List GesmbH
    78 *
    89 * This program is free software; you can redistribute it and/or
    910 * modify it under the terms of the GNU General Public License as
     
    3839#include <stdio.h>
    3940#include <time.h>
    4041#include "safeunistd.h"
     42#include <sys/stat.h>
    4143
    4244#include "commonhelp.h"
     45#include "configfile.h"
    4346#include "hashterm.h"
    4447#include "loadfile.h"
    4548#include "myhtmlparse.h"
    4649#include "stringutils.h"
    4750#include "utf8truncate.h"
    4851#include "utils.h"
     52#include "values.h"
    4953
    5054#include "gnu_getopt.h"
     
    422426{
    423427    string line;
    424428    size_t line_no = 0;
     429    time_t last_mod = 0;
     430    long   file_size = 0;
     431
     432    if (strcmp(fname,"<stdin>") != 0) {
     433        struct stat statbuf;
     434        stat(fname, &statbuf);
     435        if (! statbuf.st_size) {
     436            cout << "Empty \"" << fname << "\" - skipping\n";
     437            return false;
     438        }
     439        file_size = statbuf.st_size;
     440        last_mod = statbuf.st_mtime;
     441    }
    425442    while (!stream.eof() && getline(stream, line)) {
    426443        ++line_no;
    427444        Xapian::Document doc;
     
    638655            for (i = fields.begin(); i != fields.end(); ++i) {
    639656                list<string>::const_iterator j;
    640657                for (j = i->second.begin(); j != i->second.end(); j++) {
     658                    if (i->first == "lastmod")  last_mod = 0;
     659                    if (i->first == "size")     file_size = 0;
    641660                    data += i->first;
    642661                    data += '=';
    643662                    data += *j;
    644663                    data += '\n';
    645664                }
    646665            }
     666            // provide some extra fields if not already provided by the script
     667            if (last_mod) {        // if indexed per filename
     668                data += "lastmod="+int_to_string(last_mod)+'\n';
     669                doc.add_value(VALUE_LASTMOD, int_to_string(last_mod));
     670            }
     671            if (file_size) {        // if indexed per filename
     672                data += "size="+int_to_string(file_size)+'\n';
     673                doc.add_value(VALUE_FILESIZE, int_to_string(file_size));
     674            }
    647675
    648676            // Put the data in the document
    649677            doc.set_data(data);
  • xapian-omega-1.0.7a/utils.cc

    diff -u  xapian-omega-1.0.7a/utils.cc.orig
    old new  
    3030
    3131using namespace std;
    3232
     33#ifdef __WIN32__
     34#include "safewindows.h"
     35#endif
     36
    3337// This ought to be enough for any of the conversions below.
    3438#define BUFSIZE 100
    3539
     40/// Allow system to work directly on C++ strings.
     41inline int system(const string &command) { return system(command.c_str()); }
     42
     43/// Remove a directory and contents.
     44void
     45rm_rf(const string &filename)
     46{
     47    // Check filename exists and is actually a directory
     48    struct stat sb;
     49    if (stat(filename, &sb) != 0 || !S_ISDIR(sb.st_mode)) return;
     50
     51    string safefile = shell_protect(filename);
     52#ifdef __WIN32__
     53# if 1
     54    static int win95 = -1;
     55    if (win95 == -1) {
     56        OSVERSIONINFO info;
     57        memset(&info, 0, sizeof(OSVERSIONINFO));
     58        info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
     59        if (GetVersionEx(&info)) {
     60            win95 = (info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS);
     61        }
     62    }
     63
     64    if (win95) {
     65        // for 95 like systems:
     66        system("deltree /y \"" + safefile + "\"");
     67    } else {
     68        // for NT like systems:
     69        system("rd /s /q \"" + safefile + "\"");
     70    }
     71# else
     72    safefile.append("\0", 2);
     73    SHFILEOPSTRUCT shfo;
     74    memset((void*)&shfo, 0, sizeof(shfo));
     75    shfo.hwnd = 0;
     76    shfo.wFunc = FO_DELETE;
     77    shfo.pFrom = safefile.data();
     78    shfo.fFlags = FOF_NOCONFIRMATION|FOF_NOERRORUI|FOF_SILENT;
     79    (void)SHFileOperation(&shfo);
     80# endif
     81#else
     82    system("rm -rf " + safefile);
     83#endif
     84}
  • xapian-omega-1.0.7a/utils.h

    diff -u  xapian-omega-1.0.7a/utils.h.orig
    old new  
    2222
    2323#include <string>
    2424
     25#include <stdlib.h>
     26#include <sys/stat.h>
     27#include <sys/types.h>
     28#ifdef _MSC_VER
     29# include <direct.h>
     30# include <io.h>
     31#else
     32# include <unistd.h>
     33#endif
     34#include <ctype.h>
     35#include <fcntl.h>
     36
    2537/** Converts year, month, day into an 8 character string like: "20061031". */
    2638std::string date_to_string(int year, int month, int day);
    2739
     
    3749/** Converts a string to an int. */
    3850int string_to_int(const std::string & s);
    3951
     52void rm_rf(const std::string &filename);
     53
    4054#endif
  • xapian-omega-1.0.7a/xapian-omega.spec.in

    diff -u  xapian-omega-1.0.7a/xapian-omega.spec.in.orig
    old new  
    4545# Create /var directories
    4646mkdir -p %{buildroot}%{contentdir}/omega/data
    4747mkdir -p %{buildroot}%{contentdir}/omega/cdb
     48mkdir -p %{buildroot}%{contentdir}/omega/cache
    4849mkdir -p %{buildroot}%{logdir}/omega
    4950# Default templates
    5051mkdir -p %{buildroot}%{contentdir}/omega/templates
     
    7778/var/www/icons/omega
    7879%{_datadir}/%{name}
    7980%config(noreplace) /etc/omega.conf
    80 %doc %{_datadir}/doc/%{name}-%{version}
     81%docdir /usr/share/doc/%{name}-%{version}
     82%doc AUTHORS ChangeLog COPYING NEWS README TODO
    8183# man pages may be gzipped, hence the trailing wildcard.
    8284%{_mandir}/man1/omindex.1*
    8385%{_mandir}/man1/scriptindex.1*