require File.dirname(__FILE__) + '/../spec_helper'

def revert_changes!(file,content)
  File.open(file,'w'){|might_have_been_modified|
    might_have_been_modified.write content
  }
end


def matching_document_for(query)
  # Returns matching document for any given query only if
  # exactly one document is found.
  # Specs don't pass otherwise.
  matching_documents=Finder.new(query).matching_documents
  matching_documents.size.should == 1
  matching_documents.first
end


describe Finder do
  before(:all) do
    Globalite.language = :en
    # SVN doesn't like non-ascii filenames.
    revert_changes!('spec/test_dirs/indexed/others/bäñüßé.txt',"just to know if files are indexed with utf8 filenames")

    # To be sure this file has the right content
    revert_changes!("spec/test_dirs/indexed/others/placeholder.txt","Absorption and Adsorption cooling machines!!!")

    once_upon_a_time=Time.local(1982,2,16,20,42)
    a_bit_later=Time.local(1983,12,9,9)
    nineties=Time.local(1990)
    # Used for modification date search.
    File.utime(0, once_upon_a_time, 'spec/test_dirs/indexed/basic/basic.pdf')
    File.utime(0, a_bit_later, 'spec/test_dirs/indexed/yet_another_dir/office2003-word-template.dot')
    File.utime(0, nineties, 'spec/test_dirs/indexed/others/placeholder.txt')
    Indexer.index_every_directory(remove_first=true)
  end

  it "should find documents according to their basename when specified with basename:query" do
    matching_documents_filename=Finder.new("basename:crossed").matching_documents.collect{|d| d.filename}
    matching_documents_filename.should include("crossed.txt")
    matching_documents_filename.should include("crossed.text")
  end

  it "should find documents according to their filename when specified with file:query or filename:query" do
    Finder.new("filename:crossed.text").matching_documents.collect{|d| d.content}.should include("txt inside!")
    Finder.new("file:crossed.txt").matching_documents.collect{|d| d.content}.should include("text inside!")
  end

  it "should find documents according to their extension when specified with filetype:query" do
    Finder.new("filetype:odt").matching_documents.should_not be_empty
    Finder.new("filetype:pdf").matching_documents.should_not be_empty
  end

  it "should find documents according to their filename/basename/filetype even when unspecified" do
    Finder.new("crossed.text").matching_documents.should_not be_empty
    Finder.new("html").matching_documents.collect{|d| d.filename}.should include("zafh.net.html")
    Finder.new("crossed").total_hits.should >= 2
  end

  it "should give a boost to basename, filename and filetype in index" do
    index=Indexer.index
    index.field_infos[:basename].boost.should > 1.0
    index.field_infos[:filename].boost.should > 1.0
    index.field_infos[:filetype].boost.should > 1.0
  end

  it "should also index unreadable files with known mimetypes" do
    Finder.new("unreadable.pdf").matching_documents.should_not be_empty
    Finder.new("too_small.doc").matching_documents.should_not be_empty
  end

  it "should also index files with unknown mimetypes" do
    matching_document_for("filetype:xyz").basename.should == "ghjopdfg"
    matching_document_for("filetype:abc").filename.should == "asfg.abc"
  end

  it "should also index files with upper/mixed case extension" do
    Finder.new("filetype:pdf").matching_documents.entries.find{|doc| doc.filename=="other_basic.PDF"}.should_not be_nil
    Finder.new("filetype:doc").matching_documents.entries.find{|doc| doc.filename=="other_too_small.dOc"}.should_not be_nil
  end

  it "should also index content of files with upper/mixed case extension" do
    Finder.new("'just another content test\nin a pdf file'").matching_documents.entries.find{|doc| doc.filename=="other_basic.PDF"}.should_not be_nil
  end

  it "should also accept utf8 queries" do
    lambda{Finder.new("Éric Mößer")}.should_not raise_error
  end

  it "should find documents according to their utf8 content" do
    matching_document_for("Éric Mößer ext:pdf").basename.should == "utf8"
    matching_document_for("no me hace daño").filename.should == "utf8.txt"
    matching_document_for("Éric Mößer filetype:pdf").filename.should == "utf8.pdf"
  end

  it "should find documents according to their utf8 filenames" do
    matching_document_for("bäñüßé").content.should == "just to know if files are indexed with utf8 filenames"
  end

  it "should find documents according to their modification date" do
    Finder.new("date:<1982").matching_documents.should be_empty
    matching_document_for("19831209*").basename.should == "office2003-word-template"
    matching_document_for("date:<1983").filename.should == "basic.pdf"
    matching_document_for("date:>=1989 AND date:<=1992").filename.should == "placeholder.txt"
  end

  it "should not concatenate cells from xls file" do
    Finder.new("content:ABC").matching_documents.select{|doc| doc.extname==".xls"}.should be_empty
  end

  it "should not raise if an indexed document has been moved/deleted, but just ignore it" do
    @basic_dir='spec/test_dirs/indexed/basic/'
    @from=File.join(@basic_dir,'another_plain.text')
    @to=File.join(@basic_dir,'another_plain.text.bak')
    File.rename(@to,@from) if File.exists?(@to)
    begin
      lambda {
        File.rename(@from,@to)
      }.should change{Finder.new('filetype:text').matching_documents.size}.by(-1)
    ensure
      File.rename(@to,@from) if File.exists?(@to)
    end
  end

  it "should not index content of binary files"

  # Ferret sometimes SEGFAULT crashed with '*.pdf' queries
  it "should not crash while looking for *.pdf" do
    @finder=Finder.new("some query")
    lambda{@finder=Finder.new("*.pdf")}.should_not raise_error
    @finder.matching_documents.should_not be_empty
  end

  it "should use ? as placeholder" do
    matching_document_for("A?sorption machines").matching_content.should include("<<Absorption>> and <<Adsorption>> cooling <<machines>>!!!")
  end

  it "should use * as placeholder" do
    results=matching_document_for("A*ption machines").matching_content.should include("<<Absorption>> and <<Adsorption>> cooling <<machines>>!!!")
  end

  it "should not index those stupid Thumbs.db files" do
    Finder.new("Thumbs.db").matching_documents.should be_empty
    Finder.new("filetype:db").matching_documents.should_not be_empty
  end

  it "should keep content cached" do
    filename = "spec/test_dirs/indexed/others/placeholder.txt"
    content_before = "Absorption and Adsorption cooling machines!!!"
    some_doc=Document.new(filename)
    some_doc.content.should == content_before
    File.open(filename,'a'){|doc|
      doc.write("This line should not be indexed. It shouldn't be found in cache")
      }
    some_doc.content.should_not == content_before
    some_doc.cached.should == content_before
  end

  after(:all) do
    revert_changes!("spec/test_dirs/indexed/others/placeholder.txt","Absorption and Adsorption cooling machines!!!")
  end

#  Not sure about this spec!
#  English, or German?
#
#  TODO: Report!
#  Using custom Analyzer with StemFilter prevents * and ? to be used as placeholders
#  Better placeholders than stem!!!
#
#  it "should stem english words" do
#    complete_query="Beginning fished cats debates"
#    stem_queries=%w{beginning begin fished fish cats cat debate debater debaters fishing}
#    wrong_stem_queries=%w{beginni catty catties}
#    stem_en_file=Finder.new(complete_query).matching_document.filename
#    stem_queries.each{|q|
#      stem_results=Finder.new(q).matching_documents
#      stem_results.any?{|r| r.filename == stem_en_file}.should be_true
#    }
#    wrong_stem_queries.each{|q|
#      Finder.new(q).matching_documents.should be_empty
#    }
#  end
#
#  it "should stem german words" do
#    complete_query="Beginning fished cats debates"
#    stem_queries=%w{beginning begin fished fish cats cat debate}
#    wrong_stem_query="beginni fishe cats"
#    stem_en_file=Finder.new(complete_query).matching_document.filename
#    stem_queries.each{|q|
#      stem_results=Finder.new(q).matching_documents
#      puts q
#      stem_results.any?{|r| r.filename == stem_en_file}.should be_true
#    }
#  end
end