I have a problem while reading a content (text and images) from pdf file and write the content to word document. But content is junk characters in word document instead of original data. I have used itext-1.4.8.jar and itextpdf-5.0 jar . Any help appreciated.
Here is my code
import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.ContentByteUtils; import com.lowagie.text.Document; import com.lowagie.text.DocumentException; import com.lowagie.text.Paragraph; import com.lowagie.text.rtf.RtfWriter2; public class Check1 { public static void main(String[] args) throws FileNotFoundException, IOException, DocumentException { PdfReader reader = new PdfReader( "/home/mujafar/Desktop/NPTEL Transcription Guidelines.pdf"); int n = reader.getNumberOfPages(); System.out.println("total no of pages:::" + n); Document document = new Document(); RtfWriter2.getInstance(document, new FileOutputStream( "/home/mujafar/Desktop/file.docx")); System.out.println("file created"); document.open(); byte[] bytes; for (int i = 1; i <= n; i++) { bytes = ContentByteUtils.getContentBytesForPage(reader, i); String s = new String(bytes); document.add(new Paragraph(s)); document.newPage(); } document.close(); } }