Ticket #290: omindex.diff

File omindex.diff, 2.3 kB (added by frankjb, 4 months ago)
  • xapian-applications/omega/omindex.cc

     
    178178    } catch (ReadError) { 
    179179        // It's probably best to index the document even if pdfinfo fails. 
    180180    } 
     181 
     182    } else if (startswith(mimetype, "application/vnd.openxmlformats-officedocument.wordprocessingml.")) 
     183    { 
     184    // Inspired by http://mjr.towers.org.uk/comp/sxw2text 
     185    string safefile = shell_protect(file); 
     186    string cmd = "unzip -p " + safefile + " word/document.xml"; 
     187    try { 
     188        XmlParser xmlparser; 
     189        xmlparser.parse_html(stdout_to_string(cmd)); 
     190        dump = xmlparser.dump; 
     191    } catch (ReadError) { 
     192        cout << "\"" << cmd << "\" failed - skipping\n"; 
     193        return; 
     194    } 
     195 
     196    } else if (startswith(mimetype, "application/vnd.openxmlformats-officedocument.spreadsheetml.")) 
     197    { 
     198    // Inspired by http://mjr.towers.org.uk/comp/sxw2text 
     199    string safefile = shell_protect(file); 
     200    string cmd = "unzip -p " + safefile + " xl/sharedStrings.xml"; 
     201    try { 
     202        XmlParser xmlparser; 
     203        xmlparser.parse_html(stdout_to_string(cmd)); 
     204        dump = xmlparser.dump; 
     205    } catch (ReadError) { 
     206        cout << "\"" << cmd << "\" failed - skipping\n"; 
     207        return; 
     208    } 
     209 
    181210} 
    182211 
    183212static void 
     
    718747    mime_map["sxg"] = "application/vnd.sun.xml.writer.global"; 
    719748    mime_map["stw"] = "application/vnd.sun.xml.writer.template"; 
    720749    // Some other word processor formats: 
     750    mime_map["docx"] = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; //Word 2007 
    721751    mime_map["doc"] = "application/msword"; 
    722752    mime_map["dot"] = "application/msword"; // Word template 
    723753    mime_map["wpd"] = "application/vnd.wordperfect"; 
     
    727757    mime_map["zabw"] = "application/x-abiword-compressed"; // AbiWord compressed 
    728758    mime_map["rtf"] = "text/rtf"; 
    729759    // Other MS formats: 
     760    mime_map["xlsx"] = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; //Excel 2007 
    730761    mime_map["xls"] = "application/vnd.ms-excel"; 
    731762    mime_map["xlb"] = "application/vnd.ms-excel"; 
    732763    mime_map["xlt"] = "application/vnd.ms-excel"; // Excel template