Good afternoon,
First of all I would like to thank the administrators for accepting my registration to the forum.
My problem is that I'm running the code hereunder to generate a new file from extracted content, and for some reason I'm getting a FileNotFoundException error.
The strange thing is that if I manually replace this part (where the error is being generated):
finalDoc.save(finalPath);
with this part:
finalDoc.save("C:\\Users\\MyName\\Desktop\\test.pd f");
the file is created successfully.
This is strange since the output of the finalPath variable is the same as "C:\\Users\\MyName\\Desktop\\test.pdf" (i.e. of type String).
This is the API of the package:
Overview (Apache PDFBox 1.2.1 API)
Thanks for any help!
import org.apache.pdfbox.exceptions.InvalidPasswordException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.util.PDFTextStripperByArea; import java.awt.Rectangle; import java.util.List; /** * This is an example on how to extract text from a specific area on the PDF document. * * Usage: java org.apache.pdfbox.examples.util.ExtractTextByArea <input-pdf> * * @author <a href="#">Ben Litchfield</a> * @version $Revision: 1.2 $ */ public class ExtractTextByArea { private ExtractTextByArea() { //utility class and should not be constructed. } /** * This will print the documents text in a certain area. * * @param args The command line arguments. * * @throws Exception If there is an error parsing the document. */ public static void main( String[] args ) throws Exception { if( args.length != 1 ) { usage(); } else { PDDocument document = null; try { document = PDDocument.load( args[0] ); if( document.isEncrypted() ) { try { document.decrypt( "" ); } catch( InvalidPasswordException e ) { System.err.println( "Error: Document is encrypted with a password." ); System.exit( 1 ); } } PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition( true ); Rectangle rect = new Rectangle( 335, 90, 30, 20 ); stripper.addRegion( "class1", rect ); List allPages = document.getDocumentCatalog().getAllPages(); PDPage firstPage = (PDPage)allPages.get( 0 ); stripper.extractRegions( firstPage ); PDDocument finalDoc = new PDDocument(); finalDoc.addPage(firstPage); String path = "C:\\Users\\MyName\\Desktop\\"; String extension = (stripper.getTextForRegion("class1" ) + ".pdf"); String finalPath = path.concat(extension); finalDoc.save(finalPath); } finally { if( document != null ) { document.close(); } } } } /** * This will print the usage for this document. */ private static void usage() { System.err.println( "Usage: java org.apache.pdfbox.examples.util.ExtractTextByArea <input-pdf>" ); } }