Sha256: 0f1933d071c9589c4f66b146c444eff9ff561a63875d604f3c9b5c2c95d2821e
Contents?: true
Size: 471 Bytes
Versions: 23
Compression:
Stored size: 471 Bytes
Contents
#!/usr/bin/env ruby # coding: utf-8 # Extract an (imperfect) array of paragraphs divided somewhat # arbitrarily on line length. require 'pdf/reader' reader = PDF::Reader.new('somefile.pdf') paragraph = "" paragraphs = [] reader.pages.each do |page| lines = page.text.scan(/^.+/) lines.each do |line| if line.length > 55 paragraph += " #{line}" else paragraph += " #{line}" paragraphs << paragraph paragraph = "" end end end
Version data entries
23 entries across 22 versions & 2 rubygems