Sha256: 85ae6b77e7a0181cce1ed14cf462a8bd563742344666c4c4fca7ec0812e985a7

Contents?: true

Size: 1.91 KB

Versions: 5

Compression:

Stored size: 1.91 KB

Contents

package org.documentcloud;

import java.util.List;
import java.io.IOException;
import java.text.SimpleDateFormat;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;

// Extracts metadata from a PDF file.
public class ExtractInfo extends Extractor {

  private PDDocument doc;
  private PDDocumentInformation info;
  private String key;

  // The list of metadata keys we know how to extract.
  private enum Keys {
    AUTHOR, DATE, CREATOR, KEYWORDS, PRODUCER, SUBJECT, TITLE, LENGTH
  }

  // The mainline.
  public static void main(String[] args) {
    (new ExtractInfo()).run(args);
  }

  // The first argument is always the name of the metadata key.
  protected void parseArguments(List<String> args) {
    super.parseArguments(args);
    key = args.remove(0).toUpperCase();
  }

  // Extract the configured bit of metadata from a PDF, decrypting if necessary.
  public void extract(String pdfPath) {
    try {
      doc = PDDocument.load(pdfPath, false);
      decrypt(doc);
      info = doc.getDocumentInformation();
      String val = extractInfo();
      if (val != null) System.out.println(val);
      doc.close();
    } catch(IOException e) {
      System.out.println(e.getMessage());
      System.exit(1);
    }
  }

  // Use the PDDocumentInformation object to fetch metadata values as strings.
  public String extractInfo() throws IOException {
    switch(Keys.valueOf(key)) {
      case AUTHOR:    return info.getAuthor();
      case DATE:      return new SimpleDateFormat("yyyy-MM-dd").format(info.getCreationDate().getTime());
      case CREATOR:   return info.getCreator();
      case KEYWORDS:  return info.getKeywords();
      case PRODUCER:  return info.getProducer();
      case SUBJECT:   return info.getSubject();
      case TITLE:     return info.getTitle();
      case LENGTH:    return String.valueOf(doc.getNumberOfPages());
      default:        return null;
    }
  }

}

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
docsplit-0.2.0 lib/docsplit/ExtractInfo.java
docsplit-0.1.3 lib/docsplit/ExtractInfo.java
docsplit-0.1.2 lib/docsplit/ExtractInfo.java
docsplit-0.1.1 lib/docsplit/ExtractInfo.java
docsplit-0.1.0 lib/docsplit/ExtractInfo.java