Sha256: 9e63d2c3cf4552b8588d0f979ed1a1adab0bc9da0b0f54097aeda3341773ac4e

Contents?: true

Size: 1.65 KB

Versions: 2

Compression:

Stored size: 1.65 KB

Contents

require 'json'
require 'spec_helper'

RSpec.describe IiifPrint::TextExtraction::AltoReader do
  let(:fixture_path) do
    File.join(
      IiifPrint::GEM_PATH, 'spec', 'fixtures', 'files'
    )
  end

  let(:minimal_path) { File.join(fixture_path, 'minimal-alto.xml') }
  let(:ndnp_alto_path) { File.join(fixture_path, 'ndnp-alto-sample.xml') }
  let(:minimal) { File.read(minimal_path) }

  let(:reader_minimal) { described_class.new(minimal) }
  let(:reader_minimal_path) { described_class.new(minimal_path) }
  let(:reader_ndnp) { described_class.new(ndnp_alto_path) }

  describe "reads alto" do
    it "loads ALTO source" do
      expect(reader_minimal_path.source).to eq reader_minimal.source
      expect(reader_minimal_path.source.size).to eq 1383
      expect(reader_ndnp.source.size).to eq 1_050_876
    end

    it "loads document stream" do
      expect(reader_minimal_path.doc_stream).to be_kind_of Nokogiri::XML::SAX::Document
      expect(reader_minimal_path.doc_stream).to respond_to :text
      expect(reader_minimal_path.doc_stream).to respond_to :words
    end
  end

  describe "outputs text derivative formats" do
    it "outputs plain text" do
      # try simple flat text input
      expect(reader_minimal.text).to eq "This is only a test."
      expect(reader_minimal.text).to eq reader_minimal.doc_stream.text
      # try more complex input
      expect(reader_ndnp.text.size).to eq 30_519
    end

    it "passes args to WordCoordsBuilder and receives output" do
      parsed = JSON.parse(reader_minimal.json)
      expect(parsed['coords'].length).to be > 1
      parsed = JSON.parse(reader_ndnp.json)
      expect(parsed['coords'].size).to eq 2_125
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
iiif_print-1.1.0 spec/iiif_print/text_extraction/alto_reader_spec.rb
iiif_print-1.0.0 spec/iiif_print/text_extraction/alto_reader_spec.rb