如何在Java中读取Doc或Docx文件?

我想用java读一个文件

import org.apache.poi.poifs.filesystem.*; import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.hwpf.*; import org.apache.poi.hwpf.extractor.*; import org.apache.poi.hwpf.usermodel.HeaderStories; import java.io.*; public class ReadDocFileFromJava { public static void main(String[] args) { /**This is the document that you want to read using Java.**/ String fileName = "C:\\Path to file\\Test.doc"; /**Method call to read the document (demonstrate some useage of POI)**/ readMyDocument(fileName); } public static void readMyDocument(String fileName){ POIFSFileSystem fs = null; try { fs = new POIFSFileSystem(new FileInputStream(fileName)); HWPFDocument doc = new HWPFDocument(fs); /** Read the content **/ readParagraphs(doc); int pageNumber=1; /** We will try reading the header for page 1**/ readHeader(doc, pageNumber); /** Let's try reading the footer for page 1**/ readFooter(doc, pageNumber); /** Read the document summary**/ readDocumentSummary(doc); } catch (Exception e) { e.printStackTrace(); } } public static void readParagraphs(HWPFDocument doc) throws Exception{ WordExtractor we = new WordExtractor(doc); /**Get the total number of paragraphs**/ String[] paragraphs = we.getParagraphText(); System.out.println("Total Paragraphs: "+paragraphs.length); for (int i = 0; i < paragraphs.length; i++) { System.out.println("Length of paragraph "+(i +1)+": "+ paragraphs[i].length()); System.out.println(paragraphs[i].toString()); } } public static void readHeader(HWPFDocument doc, int pageNumber){ HeaderStories headerStore = new HeaderStories( doc); String header = headerStore.getHeader(pageNumber); System.out.println("Header Is: "+header); } public static void readFooter(HWPFDocument doc, int pageNumber){ HeaderStories headerStore = new HeaderStories( doc); String footer = headerStore.getFooter(pageNumber); System.out.println("Footer Is: "+footer); } public static void readDocumentSummary(HWPFDocument doc) { DocumentSummaryInformation summaryInfo=doc.getDocumentSummaryInformation(); String category = summaryInfo.getCategory(); String company = summaryInfo.getCompany(); int lineCount=summaryInfo.getLineCount(); int sectionCount=summaryInfo.getSectionCount(); int slideCount=summaryInfo.getSlideCount(); enter code here System.out.println("---------------------------"); System.out.println("Category: "+category); System.out.println("Company: "+company); System.out.println("Line Count: "+lineCount); System.out.println("Section Count: "+sectionCount); System.out.println("Slide Count: "+slideCount); } } 

http://sanjaal.com/java/tag/java-and-docx-format/

我想用Java读取doc或docx文件

这里是ReadDoc / docx.java的代码:这将读取一个dox / docx文件并将其内容打印到控制台。 你可以自定义你的方式。

 import java.io.*; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.extractor.WordExtractor; public class ReadDocFile { public static void main(String[] args) { File file = null; WordExtractor extractor = null; try { file = new File("c:\\New.doc"); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument document = new HWPFDocument(fis); extractor = new WordExtractor(document); String[] fileData = extractor.getParagraphText(); for (int i = 0; i < fileData.length; i++) { if (fileData[i] != null) System.out.println(fileData[i]); } } catch (Exception exep) { exep.printStackTrace(); } } }