Sha256: 0f1933d071c9589c4f66b146c444eff9ff561a63875d604f3c9b5c2c95d2821e

Contents?: true

Size: 471 Bytes

Versions: 23

Compression:

Stored size: 471 Bytes

Contents

#!/usr/bin/env ruby
# coding: utf-8

# Extract an (imperfect) array of paragraphs divided somewhat
# arbitrarily on line length. 

require 'pdf/reader'

reader = PDF::Reader.new('somefile.pdf')

paragraph = ""
paragraphs = []
reader.pages.each do |page|
  lines = page.text.scan(/^.+/)
  lines.each do |line|
    if line.length > 55
      paragraph += " #{line}"
    else
      paragraph += " #{line}"
      paragraphs << paragraph
      paragraph = ""
    end
  end
end

Version data entries

23 entries across 22 versions & 2 rubygems

Version Path
pdf-reader-2.13.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.12.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.11.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.10.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.9.2 examples/fuzzy_paragraphs.rb
pdf-reader-2.9.1 examples/fuzzy_paragraphs.rb
pdf-reader-2.9.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.8.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.7.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.6.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.5.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.4.2 examples/fuzzy_paragraphs.rb
pdf-reader-2.4.1 examples/fuzzy_paragraphs.rb
pdf-reader-2.4.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.3.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.2.1 examples/fuzzy_paragraphs.rb
embulk-input-druginfo_interview_form-0.1.0 vendor/bundle/ruby/2.4.0/gems/pdf-reader-2.2.0/examples/fuzzy_paragraphs.rb
embulk-input-druginfo_interview_form-0.1.0 vendor/bundle/ruby/2.5.0/gems/pdf-reader-2.2.0/examples/fuzzy_paragraphs.rb
pdf-reader-2.2.0 examples/fuzzy_paragraphs.rb
pdf-reader-2.1.0 examples/fuzzy_paragraphs.rb