The main APIs used in this program are Apache POI and iText. Apache API is used to extract information from a microsoft word file while iText is used to create a PDF file.
This is original Microsoft word file:
This is Generated pdf file from the original microsoft word file:
WordToPdfConverter source code:
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Iterator;
import java.util.List;
import javax.swing.JFileChooser;
import javax.swing.filechooser.FileNameExtensionFilter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.PageSize;
public class WordToPdfConverter{
public static void main(String[] args){
selectFiles();
}
public static void selectFiles(){
JFileChooser chooser = new JFileChooser();
FileNameExtensionFilter filter = new FileNameExtensionFilter("Microsoft Word 2007+", "docx");
chooser.setFileFilter(filter);
chooser.setMultiSelectionEnabled(true);
int returnVal = chooser.showOpenDialog(null);
if(returnVal == JFileChooser.APPROVE_OPTION) {
File[] Files=chooser.getSelectedFiles();
System.out.println("Please wait...");
for( int i=0;i<Files.length;i++){
String wordfile=Files[i].toString();
convertWordToPdf(wordfile,wordfile.substring(0,wordfile.indexOf('.'))+".pdf");
}
System.out.println("Conversion complete");
}
}
public static void convertWordToPdf(String src, String desc){
try{
//create file inputstream object to read data from file
FileInputStream fs=new FileInputStream(src);
//create document object to wrap the file inputstream object
XWPFDocument doc=new XWPFDocument(fs);
//72 units=1 inch
Document pdfdoc=new Document(PageSize.A4,72,72,72,72);
//create a pdf writer object to write text to mypdf.pdf file
PdfWriter pwriter=PdfWriter.getInstance(pdfdoc, new FileOutputStream(desc));
//specify the vertical space between the lines of text
pwriter.setInitialLeading(20);
//get all paragraphs from word docx
List<XWPFParagraph> plist=doc.getParagraphs();
//open pdf document for writing
pdfdoc.open();
for (int i = 0; i < plist.size(); i++) {
//read through the list of paragraphs
XWPFParagraph pa = plist.get(i);
//get all run objects from each paragraph
List<XWPFRun> runs = pa.getRuns();
//read through the run objects
for (int j = 0; j < runs.size(); j++) {
XWPFRun run=runs.get(j);
//get pictures from the run and add them to the pdf document
List<XWPFPicture> piclist=run.getEmbeddedPictures();
//traverse through the list and write each image to a file
Iterator<XWPFPicture> iterator=piclist.iterator();
while(iterator.hasNext()){
XWPFPicture pic=iterator.next();
XWPFPictureData picdata=pic.getPictureData();
byte[] bytepic=picdata.getData();
Image imag=Image.getInstance(bytepic);
pdfdoc.add(imag);
}
//get color code
int color=getCode(run.getColor());
//construct font object
Font f=null;
if(run.isBold() && run.isItalic())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.BOLDITALIC, new BaseColor(color));
else if(run.isBold())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.BOLD, new BaseColor(color));
else if(run.isItalic())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.ITALIC, new BaseColor(color));
else if(run.isStrike())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.STRIKETHRU, new BaseColor(color));
else
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.NORMAL, new BaseColor(color));
//construct unicode string
String text=run.getText(-1);
byte[] bs;
if (text!=null){
bs=text.getBytes();
String str=new String(bs,"UTF-8");
//add string to the pdf document
Chunk chObj1=new Chunk(str,f);
pdfdoc.add(chObj1);
}
}
//output new line
pdfdoc.add(new Chunk(Chunk.NEWLINE));
}
//close pdf document
pdfdoc.close();
}catch(Exception e){e.printStackTrace();}
}
public static int getCode(String code){
int colorCode;
if(code!=null)
colorCode=Long.decode("0x"+code).intValue();
else
colorCode=Long.decode("0x000000").intValue();
return colorCode;
}
}
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Iterator;
import java.util.List;
import javax.swing.JFileChooser;
import javax.swing.filechooser.FileNameExtensionFilter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.PageSize;
public class WordToPdfConverter{
public static void main(String[] args){
selectFiles();
}
public static void selectFiles(){
JFileChooser chooser = new JFileChooser();
FileNameExtensionFilter filter = new FileNameExtensionFilter("Microsoft Word 2007+", "docx");
chooser.setFileFilter(filter);
chooser.setMultiSelectionEnabled(true);
int returnVal = chooser.showOpenDialog(null);
if(returnVal == JFileChooser.APPROVE_OPTION) {
File[] Files=chooser.getSelectedFiles();
System.out.println("Please wait...");
for( int i=0;i<Files.length;i++){
String wordfile=Files[i].toString();
convertWordToPdf(wordfile,wordfile.substring(0,wordfile.indexOf('.'))+".pdf");
}
System.out.println("Conversion complete");
}
}
public static void convertWordToPdf(String src, String desc){
try{
//create file inputstream object to read data from file
FileInputStream fs=new FileInputStream(src);
//create document object to wrap the file inputstream object
XWPFDocument doc=new XWPFDocument(fs);
//72 units=1 inch
Document pdfdoc=new Document(PageSize.A4,72,72,72,72);
//create a pdf writer object to write text to mypdf.pdf file
PdfWriter pwriter=PdfWriter.getInstance(pdfdoc, new FileOutputStream(desc));
//specify the vertical space between the lines of text
pwriter.setInitialLeading(20);
//get all paragraphs from word docx
List<XWPFParagraph> plist=doc.getParagraphs();
//open pdf document for writing
pdfdoc.open();
for (int i = 0; i < plist.size(); i++) {
//read through the list of paragraphs
XWPFParagraph pa = plist.get(i);
//get all run objects from each paragraph
List<XWPFRun> runs = pa.getRuns();
//read through the run objects
for (int j = 0; j < runs.size(); j++) {
XWPFRun run=runs.get(j);
//get pictures from the run and add them to the pdf document
List<XWPFPicture> piclist=run.getEmbeddedPictures();
//traverse through the list and write each image to a file
Iterator<XWPFPicture> iterator=piclist.iterator();
while(iterator.hasNext()){
XWPFPicture pic=iterator.next();
XWPFPictureData picdata=pic.getPictureData();
byte[] bytepic=picdata.getData();
Image imag=Image.getInstance(bytepic);
pdfdoc.add(imag);
}
//get color code
int color=getCode(run.getColor());
//construct font object
Font f=null;
if(run.isBold() && run.isItalic())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.BOLDITALIC, new BaseColor(color));
else if(run.isBold())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.BOLD, new BaseColor(color));
else if(run.isItalic())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.ITALIC, new BaseColor(color));
else if(run.isStrike())
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.STRIKETHRU, new BaseColor(color));
else
f=FontFactory.getFont(FontFactory.TIMES_ROMAN,run.getFontSize(),Font.NORMAL, new BaseColor(color));
//construct unicode string
String text=run.getText(-1);
byte[] bs;
if (text!=null){
bs=text.getBytes();
String str=new String(bs,"UTF-8");
//add string to the pdf document
Chunk chObj1=new Chunk(str,f);
pdfdoc.add(chObj1);
}
}
//output new line
pdfdoc.add(new Chunk(Chunk.NEWLINE));
}
//close pdf document
pdfdoc.close();
}catch(Exception e){e.printStackTrace();}
}
public static int getCode(String code){
int colorCode;
if(code!=null)
colorCode=Long.decode("0x"+code).intValue();
else
colorCode=Long.decode("0x000000").intValue();
return colorCode;
}
}
In the code above, the XWPFDocument (in POI library) is used to construct a Microsoft Word file. The object of this class accept the FileInputStream as its argument. The FileInputStream class is used to read the Microsoft Word file. When you have document that contains all data of the original Microsoft Word file, you can get the paragraph objects inside the document by using the getParagraphs() method. This method returns all paragraphs found in the original Microsoft Word file. In each paragraph object, there are many smaller items called run objects. From each run object you can extract text, image, and formatting styles that are applied to the text when the Microsoft Word file is written.
Once you have the text, images, and formatting styles data, you can write them to the destination pdf file by using classes and methods from the iText library.
Hi Dara,
ReplyDeleteI have copied this source and trying to run. The input file is a docx file (2003 doc file is saved as 2007 docx by ms office). This docx contains 2 images. But after generating the pdf file it comes as blank. Could you please guide me for this issue. Thanks
Samir
I converted a 2003 doc file that contains images to 2007 docx file and ran the program to convert the docx file to pdf file. It worked good. All images and font styles are preserved. Please make sure that the images in the doc file are pictures (Insert->Picture). They should not shapes, ClipArt, or SmartArt.
Deletehttp://www.zeonpad.com/
Deletezeonpadpdf is java library for converting Microsoft Word, Excel, PowerPoint, Publisher, Visio, Outlook, images,text document to PDF. It works for both Desktop and Server application. It generates the high quality PDF file that are adobe Acrobat compatible. this library does not support multi-threaded conversion. this library is currently based on the Java COM bridge called JACOB, a bridge is required between COM and the Java Virtual Machine. its 100% free. Supports both 32bit and 64 bit windows os.
Hello Tarun,
DeleteI tried zeonpad, it worked good on my local system. but it is raising an exception when used ton server.
Can you help out?
Thanks
Do you try to convert ms word document with khmer unicode to pdf? does it work?
ReplyDeleteToday there is no free api in Java that is able to do conversion from khmer unicode word to pdf. I will try to do it later.
ReplyDeletezeonpad provide free java api to convert ms office documents to pdf.
DeleteI use Aspose.Words for Java for converting my word file to many formats including pdf and also to create word files. I am very satisfied with this API they keep updating this Library and provide new codes for developers which they can use in their APIs. You should also try it:
ReplyDeletehttp://www.aspose.com/java/word-component.aspx
Hi tried this word to pdf convertion.i got pdf.but the problem is tables which are in document are not came to generating pdf.some images also showing different sizes plz help for this ..........and give the solution ASAP
ReplyDeleteYea, i too observed that tables in doc are missing in PDF
DeleteHey Buddy! can you please share some simple way of converting JPG to PDF . I want to share this JPG to PDF converter with you. You must try this once. It is free and easy to use.
ReplyDeleteThank you for sharing this. I know Microsoft Office has its own plug-in for saving Word and Excel as PDF files. But if anyone need to convert Word to PDF on other applications, a third-party converter is necessary. I use a converter, which support Word, Excel and Tiff to PDF, tiff, word and convert Word to PDF, I think it's also a good tool.
ReplyDeleteI'm not a developer, i always use this free online pdf to word converter to convert pdf to word online.
ReplyDeleteI have tables in my word doc. But this code is not writing the tables into pdf. How do I achieve this?
ReplyDelete-
DeleteI read your post and need to thank you for sharing such pleasant lines. Buzz Application
ReplyDeleteWhat all jar files do we need to add in order to Run the program??
ReplyDeleteAdd the jar and dependencies jar names. Not able to work with this example because of dependencies
ReplyDeleteGood blog. After searching lot I got which jars to be used.
Deletelist of jars,
commons-codec-1.10
commons-imaging-1.0
commons-io-2.4
dom4j-1.6.1
fr.opensagres.xdocreport.itext.extension-2.0.0
itext-2.1.7
itext-pdfa-5.4.5
itext-xtra-5.5.11
itext-pdf-5.4.5
oomxl-schema-1.3
org.apache.poi.xwpf.converter.core-1.0.5
org.apache.poi.xwpf.converter.pdf-1.0.5
poi-3.16
poi-ooxml-3.14
stax-api-1.0.1
xml-apis-1.0.b2
xmlbeans-2.6.0
poi-3.15-beta1
how to extract tables, headers, footers, hyperlink, shapes ,charts from docx file and write them in into PDF files. Please post codes for these also if possible
ReplyDeleteHere, it has been described with complete clarity which displays how simple it is to have the conversion of word to pdf. Indeed you have tried your best to explain the whole method thus barring a few points; it can safely be considered a good attempt. You may visit: Word to pdf server.
ReplyDeleteThis comment has been removed by the author.
ReplyDeleteThis online pdf merger or pdf combiner or pdf joiner will easily merge pdf files together with use of any software or any registration. pdf merger
ReplyDeleteGive the record a name and ensure that PDF is chosen in the "Spare As Type" dropdown, and after that tap the Publish button. That is it! You presently have a PDF document that you can appropriate. https://ds11.pdffiller.com/en/industry
ReplyDeletec# word to pdf converter
ReplyDeleteThey ought to depict how new innovations regularly expand the ebb and flow dimensions of logical comprehension and present new zones of research. diebesten vpn
ReplyDeleteGreat blog. All posts have something to learn. Your work is very good and I appreciate you and hopping for some more informative posts. Convert PDF to Word Free Online
ReplyDeleteThanks to sharing.. very nice post on DOCX to PDF Converter
ReplyDelete