Ticket #290: ms2007.patch

File ms2007.patch, 3.0 KB (added by Frank J Bruzzaniti, 15 years ago)

Here's a new patch that includes support for Power Point (.pptx)

  • .cc

    old new  
    351351            cout << "\"" << cmd << "\" failed - skipping\n";
    352352            return;
    353353        }
     354    } else if (startswith(mimetype, "application/vnd.openxmlformats-officedocument.wordprocessingml."))
     355    {
     356    // Inspired by http://mjr.towers.org.uk/comp/sxw2text
     357    string safefile = shell_protect(file);
     358    string cmd = "unzip -p " + safefile + " word/document.xml";
     359    try {
     360        XmlParser xmlparser;
     361        xmlparser.parse_html(stdout_to_string(cmd));
     362        dump = xmlparser.dump;
     363    } catch (ReadError) {
     364        cout << "\"" << cmd << "\" failed - skipping\n";
     365        return;
     366    }
     367    } else if (startswith(mimetype, "application/vnd.openxmlformats-officedocument.spreadsheetml."))
     368    {
     369    // Inspired by http://mjr.towers.org.uk/comp/sxw2text
     370    string safefile = shell_protect(file);
     371    string cmd = "unzip -p " + safefile + " xl/sharedStrings.xml";
     372    try {
     373        XmlParser xmlparser;
     374        xmlparser.parse_html(stdout_to_string(cmd));
     375        dump = xmlparser.dump;
     376    } catch (ReadError) {
     377        cout << "\"" << cmd << "\" failed - skipping\n";
     378        return;
     379    }
     380    } else if (startswith(mimetype, "application/vnd.openxmlformats-officedocument.presentationml."))
     381    {
     382    // Inspired by http://mjr.towers.org.uk/comp/sxw2text
     383    string safefile = shell_protect(file);
     384    string cmd = "unzip -p " + safefile + " ppt/slides/slide*.xml";
     385    try {
     386        XmlParser xmlparser;
     387        xmlparser.parse_html(stdout_to_string(cmd));
     388        dump = xmlparser.dump;
     389    } catch (ReadError) {
     390        cout << "\"" << cmd << "\" failed - skipping\n";
     391        return;
     392    }
    354393    } else if (mimetype == "application/vnd.wordperfect") {
    355394        // Looking at the source of wpd2html and wpd2text I think both output
    356395        // utf-8, but it's hard to be sure without sample Unicode .wpd files
     
    730769    // Some other word processor formats:
    731770    mime_map["doc"] = "application/msword";
    732771    mime_map["dot"] = "application/msword"; // Word template
     772    mime_map["docx"] = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; //Word 2007
    733773    mime_map["wpd"] = "application/vnd.wordperfect";
    734774    mime_map["wps"] = "application/vnd.ms-works";
    735775    mime_map["wpt"] = "application/vnd.ms-works"; // Works template
     
    740780    mime_map["xls"] = "application/vnd.ms-excel";
    741781    mime_map["xlb"] = "application/vnd.ms-excel";
    742782    mime_map["xlt"] = "application/vnd.ms-excel"; // Excel template
     783    mime_map["xlsx"] = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; //Excel 2007
    743784    mime_map["ppt"] = "application/vnd.ms-powerpoint";
    744785    mime_map["pps"] = "application/vnd.ms-powerpoint"; // Powerpoint slideshow
     786    mime_map["pptx"] = "application/vnd.openxmlformats-officedocument.presentationml.presentation"; //PowerPoint 2007
    745787    // Perl:
    746788    mime_map["pl"] = "text/x-perl";
    747789    mime_map["pm"] = "text/x-perl";