Ticket #290: omindex.diff

File omindex.diff, 2.3 KB (added by Frank J Bruzzaniti, 16 years ago)
  • xapian-applications/omega/omindex.cc

     
    178178    } catch (ReadError) {
    179179        // It's probably best to index the document even if pdfinfo fails.
    180180    }
     181
     182    } else if (startswith(mimetype, "application/vnd.openxmlformats-officedocument.wordprocessingml."))
     183    {
     184    // Inspired by http://mjr.towers.org.uk/comp/sxw2text
     185    string safefile = shell_protect(file);
     186    string cmd = "unzip -p " + safefile + " word/document.xml";
     187    try {
     188        XmlParser xmlparser;
     189        xmlparser.parse_html(stdout_to_string(cmd));
     190        dump = xmlparser.dump;
     191    } catch (ReadError) {
     192        cout << "\"" << cmd << "\" failed - skipping\n";
     193        return;
     194    }
     195
     196    } else if (startswith(mimetype, "application/vnd.openxmlformats-officedocument.spreadsheetml."))
     197    {
     198    // Inspired by http://mjr.towers.org.uk/comp/sxw2text
     199    string safefile = shell_protect(file);
     200    string cmd = "unzip -p " + safefile + " xl/sharedStrings.xml";
     201    try {
     202        XmlParser xmlparser;
     203        xmlparser.parse_html(stdout_to_string(cmd));
     204        dump = xmlparser.dump;
     205    } catch (ReadError) {
     206        cout << "\"" << cmd << "\" failed - skipping\n";
     207        return;
     208    }
     209
    181210}
    182211
    183212static void
     
    718747    mime_map["sxg"] = "application/vnd.sun.xml.writer.global";
    719748    mime_map["stw"] = "application/vnd.sun.xml.writer.template";
    720749    // Some other word processor formats:
     750    mime_map["docx"] = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; //Word 2007
    721751    mime_map["doc"] = "application/msword";
    722752    mime_map["dot"] = "application/msword"; // Word template
    723753    mime_map["wpd"] = "application/vnd.wordperfect";
     
    727757    mime_map["zabw"] = "application/x-abiword-compressed"; // AbiWord compressed
    728758    mime_map["rtf"] = "text/rtf";
    729759    // Other MS formats:
     760    mime_map["xlsx"] = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; //Excel 2007
    730761    mime_map["xls"] = "application/vnd.ms-excel";
    731762    mime_map["xlb"] = "application/vnd.ms-excel";
    732763    mime_map["xlt"] = "application/vnd.ms-excel"; // Excel template