import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
public class WordToTextConverter{
public static void main(String[] args){
try{
convertWordToText(args[0],args[1]);
}catch(ArrayIndexOutOfBoundsException aiobe){
System.out.println("Usage:java WordToTextConverter <word_file> <text_file>");
}
}
public static void convertWordToText(String src, String desc){
try{
//create file inputstream object to read data from file
FileInputStream fs=new FileInputStream(src);
//create document object to wrap the file inputstream object
XWPFDocument docx=new XWPFDocument(fs);
//create text extractor object to extract text from the document
XWPFWordExtractor extractor=new XWPFWordExtractor(docx);
//create file writer object to write text to the output file
FileWriter fw=new FileWriter(desc);
//write text to the output file
fw.write(extractor.getText());
//clear data from memory
fw.flush();
//close inputstream and file writer
fs.close();
fw.close();
}catch(IOException e){e.printStackTrace();}
}
}
In the code of the program above, XWPFDocument is used to construct a Microsoft Word document object from the FileInputStream object. FileInputStream object contains all data of the original Microsoft Word file. To extract all text from the document, you need to use the XWPFWordExtractor class. You will pass the document object to the constructor of the XWPFWordExtractor when you create an object of the XWPFWordExtractor class. From the XWPFWordExtractor object, you can get the text content of this object by using its getText method. Once you have the text, the FileWriter class can be used to output it to the destination file.