README.md in ocr-file-0.0.8 vs README.md in ocr-file-0.0.10
- old
+ new
@@ -43,10 +43,11 @@
ocr_engine: 'tesseract', # 'cloud-vision'
# Image Pre-Processing
image_preprocess: true,
effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'], # Applies effects as listed. 'norm' is also available
automatic_reprocess: true, # Will possibly do double + the operations but can produce better results automatically
+ dimensions: [width, height], # Can be nil but will lock the images
# PDF to Image Processing
optimise_pdf: true,
extract_pdf_images: true, # if false will screenshot each PDF page
temp_filename_prefix: 'image',
spelling_correction: true, # Will attempt to fix text at the end (not used for searchable pdf output)
@@ -79,10 +80,11 @@
# How to merge files into a single PDF:
# The files can be images or other PDFs
file_paths = []
merged_document = ::HexaPDF::Document.new
- documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path) }
+ dimensions = [width, height] # or nil to maintain dimensions
+ documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path, dimensions: dimensions) }
OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
```
### Notes / Tips
Set `extract_pdf_images` to `false` for higher quality OCR. However this will consume more temporary space per PDF page and also be considerably slower.