PDFReader.java

/*
 * GovWay - A customizable API Gateway 
 * https://govway.org
 * 
 * Copyright (c) 2005-2024 Link.it srl (https://link.it). 
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 3, as published by
 * the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package org.openspcoop2.utils.pdf;

import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDNameTreeNode;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDSignatureField;
import org.apache.pdfbox.pdmodel.interactive.form.PDXFAResource;
import org.openspcoop2.utils.UtilsException;

/**
* PDFReader
*
* @author Andrea Poli (apoli@link.it)
* @author $Author$
* @version $Rev$, $Date$
*/
public class PDFReader extends AbstractPDFCore {
	
	public PDFReader(PDDocument doc) throws UtilsException {
		super(doc);
	}
	
	public PDFReader(byte[] content) throws UtilsException {
		super(content, false);
	}
	public PDFReader(File doc) throws UtilsException {
		super(doc, false);
	}
	public PDFReader(InputStream is) throws UtilsException {
		super(is, false);
	}
	
	public PDFReader(byte[] content, boolean analyzeSignature) throws UtilsException {
		super(content, analyzeSignature);
		if(analyzeSignature) {
			this.initSignature();
		}
	}
	public PDFReader(File doc, boolean analyzeSignature) throws UtilsException {
		super(doc, analyzeSignature);
		if(analyzeSignature) {
			this.initSignature();
		}
	}
	public PDFReader(InputStream is, boolean analyzeSignature) throws UtilsException {
		super(is, analyzeSignature);
		if(analyzeSignature) {
			this.initSignature();
		}
	}
		
	public Map<String, EmbeddedFile> getEmbeddedFiles(boolean analyzeKids) throws UtilsException{
		Map<String, EmbeddedFile> map = new HashMap<>();
		checkDocumentCatalog();
		try {
			PDDocumentNameDictionary namesDictionary = new PDDocumentNameDictionary( this.document.getDocumentCatalog());
			PDEmbeddedFilesNameTreeNode efTree = namesDictionary.getEmbeddedFiles();
			if (efTree != null) {
				Map<String, PDComplexFileSpecification> names = efTree.getNames();
				if (names != null && !names.isEmpty()) {
					readEmbeddedFiles(names, map);
				} 
				
				if(analyzeKids) {
					readEmbeddedFilesFromKids(efTree, map);
				}
	         }
		}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
		return map;
	}
	private void readEmbeddedFilesFromKids(PDEmbeddedFilesNameTreeNode efTree, Map<String, EmbeddedFile> map) throws UtilsException{
		try {
			List<PDNameTreeNode<PDComplexFileSpecification>> kids = efTree.getKids();
			if(kids!=null && !kids.isEmpty()) {
				for (PDNameTreeNode<PDComplexFileSpecification> node : kids) {
					Map<String, PDComplexFileSpecification> names = node.getNames();
					readEmbeddedFiles(names, map);
				}
			}
		}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
	}
	private void readEmbeddedFiles(Map<String, PDComplexFileSpecification> names, Map<String, EmbeddedFile> map) throws UtilsException {
        for (Map.Entry<String, PDComplexFileSpecification> entry : names.entrySet()) {
        	PDComplexFileSpecification fileSpec = entry.getValue();
        	EmbeddedFile internalDoc = createPDFInternalDocument(fileSpec);
            if(internalDoc!=null) {
            	String fileName = internalDoc.getFilename()!=null ? internalDoc.getFilename() : "file-"+map.size()+1;
            	map.put(fileName, internalDoc);
            }
        }
    }
	private EmbeddedFile createPDFInternalDocument(PDComplexFileSpecification fileSpec) throws UtilsException {
		try {
			EmbeddedFile internalDoc = null;
			String filename = getFileName(fileSpec);
	        PDEmbeddedFile embeddedFile = getEmbeddedFile(fileSpec);
	        if(embeddedFile!=null) {
	        	internalDoc = new EmbeddedFile();
	        	internalDoc.setFileSpec(fileSpec);
	        	internalDoc.setEmbeddedFile(embeddedFile);
	        	internalDoc.setFilename(filename);
	        	internalDoc.setContent(embeddedFile.toByteArray());
	        	internalDoc.setMediaType(embeddedFile.getSubtype());
	        }
	        return internalDoc;
		}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
	}
	
    private String getFileName(PDComplexFileSpecification fileSpec) {
    	String filename = null;
        if (fileSpec != null) {
        	filename = fileSpec.getFileUnicode();
            if (filename == null) {
            	filename = fileSpec.getFileDos();
            }
            if (filename == null) {
            	filename = fileSpec.getFileMac();
            }
            if (filename == null) {
            	filename = fileSpec.getFileUnix();
            }
            if (filename == null) {
            	filename = fileSpec.getFile();
            }
        }
        return filename;
    }
    private PDEmbeddedFile getEmbeddedFile(PDComplexFileSpecification fileSpec) {
        PDEmbeddedFile embeddedFile = null;
        if (fileSpec != null) {
            embeddedFile = fileSpec.getEmbeddedFileUnicode();
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileDos();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileMac();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFileUnix();
            }
            if (embeddedFile == null) {
                embeddedFile = fileSpec.getEmbeddedFile();
            }
        }
        return embeddedFile;
    }
    
    
    
    public Map<String, EmbeddedFile> getAnnotationFiles() throws UtilsException{
		Map<String, EmbeddedFile> map = new HashMap<>();
		try {
			if(this.document.getPages()!=null) {
				for (PDPage page : this.document.getPages()) {
					readAnnotationFiles(page, map);
	            }
			}
		}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
		return map;
	}
    private void readAnnotationFiles(PDPage page, Map<String, EmbeddedFile> map) throws UtilsException {
    	try {
    		if(page!=null && page.getAnnotations()!=null) {
                for (PDAnnotation annotation : page.getAnnotations()) {
                    if (annotation instanceof PDAnnotationFileAttachment) {
                        PDAnnotationFileAttachment annotationFileAttachment = (PDAnnotationFileAttachment) annotation;
                        readAnnotationFile(annotationFileAttachment, map);
                    }
                }
			}
    	}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
    }
    private void readAnnotationFile(PDAnnotationFileAttachment annotationFileAttachment, Map<String, EmbeddedFile> map) throws UtilsException {
    	try {
	    	PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) annotationFileAttachment.getFile();
	        EmbeddedFile internalDoc = createPDFInternalDocument(fileSpec);
	        if(internalDoc!=null) {
	        	String fileName = internalDoc.getFilename()!=null ? internalDoc.getFilename() : "file-"+map.size()+1;
	        	map.put(fileName, internalDoc);
	        }
    	}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
    }
    
    
    
    public XFADocument getXFAFile() throws UtilsException{
    	checkDocumentCatalog();
    	try {
    		XFADocument doc = null;
    		PDAcroForm pdAcroForm = this.document.getDocumentCatalog().getAcroForm();
			if(pdAcroForm!=null && pdAcroForm.getXFA()!=null) {
				PDXFAResource xfa = pdAcroForm.getXFA();
				doc = new XFADocument();
				doc.setXfa(xfa);
				doc.setDocument(xfa.getDocument());
				doc.setContent(xfa.getBytes());
			}
			return doc;
		}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
    }
    
    
    public boolean isSignaturesExist() {
    	return this.document.getDocumentCatalog()!=null && this.document.getDocumentCatalog().getAcroForm()!=null && this.document.getDocumentCatalog().getAcroForm().isSignaturesExist();
    }

    private List<PDFSignature> pdfSignatures = null;
    private synchronized void initSignature() throws UtilsException {
    	if(this.pdfSignatures==null) {
    		
    		checkDocumentCatalog();
    		
    		if(this.rawDocument==null) {
    			throw new UtilsException("Initialize PDFReader with boolean parameter 'analyzeSignature' enabled");
    		}
    		
    		try {
        		PDAcroForm pdAcroForm = this.document.getDocumentCatalog().getAcroForm();
    			if(pdAcroForm!=null) {
		    		Map<String, PDSignatureField> mapPDSignatureField = readMapSignatureField(pdAcroForm);
		            
		    		List<PDSignature> signatures = this.document.getSignatureDictionaries();
		            this.pdfSignatures = readSignatures(signatures, mapPDSignatureField);		    		
    			}
    			else {
    				this.pdfSignatures=new ArrayList<>(); // per evitare nuovamente una inizializzazione
    			}
    		}catch(Exception e) {
    			throw new UtilsException(e.getMessage(),e);
    		}
    	}
    }
    private Map<String, PDSignatureField> readMapSignatureField(PDAcroForm pdAcroForm) throws UtilsException{
    	try {
    		Map<String, PDSignatureField> mapPDSignatureField = new HashMap<>();
            for (PDField f : pdAcroForm.getFields()) {
            	if(f instanceof PDSignatureField) {
            		PDSignatureField signatureField = (PDSignatureField) f;
            		if(signatureField.getSignature()!=null) {
            			mapPDSignatureField.put(signatureField.getSignature().getName(), signatureField);
 	                }
            	}
			}
            return mapPDSignatureField;
    	}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
    }
    private List<PDFSignature> readSignatures(List<PDSignature> signatures, Map<String, PDSignatureField> mapPDSignatureField) throws UtilsException{
    	try {
    		List<PDFSignature> pdfSignaturesList = new ArrayList<>();
    		if(signatures!=null) {
    			for (PDSignature signature : signatures) {
	        		PDSignatureField signatureField = mapPDSignatureField.get(signature.getName());
	        		if(signatureField==null) {
	        			throw new UtilsException("SignatureField '"+signature.getName()+"' not found");
	        		}
	        		PDFSignature pdfSignature = new PDFSignature(this.rawDocument, signatureField, signature);
	        		pdfSignaturesList.add(pdfSignature);
	        	}
    		}
    		return pdfSignaturesList;
    	}catch(Exception e) {
			throw new UtilsException(e.getMessage(),e);
		}
    }
    public List<PDFSignature> getSignature() throws UtilsException {
    	if(this.pdfSignatures==null) {
    		initSignature();
    	}
    	return this.pdfSignatures;
    }
}