class Attachment < ActiveRecord::Base include Elasticsearch::Model include Elasticsearch::Model::Callbacks # after_save :save_to_git belongs_to :attachable, :polymorphic => true validates_presence_of :attachable after_update :update_version_count settings analysis: { filter: { nGram_filter: Searcher::Filters.ngram }, analyzer: { email_tokenizer: { tokenizer: "uax_url_email", filter: "lowercase" }, nGram_analyzer: Searcher::Analyzers.ngram, email_ngram_analyzer: Searcher::Analyzers.email_ngram, whitespace_analyzer: Searcher::Analyzers.whitespace }, } do mapping dynamic: 'false', index_analyzer: "nGram_analyzer", search_analyzer: "whitespace_analyzer" do indexes :id, :index => :not_analyzed indexes :type, :as => :type, :boost => 100 indexes :filename, :analyzer => 'stop', :boost => 100 indexes :content_type, :analyzer => 'snowball' indexes :body, :analyzer => 'snowball' indexes :created_at, :type => 'date', :include_in_all => false indexes :updated_at, :type => 'date', :include_in_all => false end end def self.create_from_uploaded_file(file, user, options = {}) filename = file.original_filename attachment = Attachment.create( options.merge( filename: filename, content_type: file.content_type, size: file.size, )) if attachment.errors.none? attachment.save_to_gollum( file, user, "uploading file: #{filename}" ) attachment.__elasticsearch__.index_document end attachment end def update(file, user) filename = file.original_filename if update_attributes( filename: filename, content_type: file.content_type, size: file.size) save_to_gollum( file, user, "uploading file: #{filename}" ) end end def wiki_file $wiki.file(wiki_page.path) end def file_data wiki_file.raw_data end def wiki_page $wiki.page(wiki_attachment_data, nil, wiki_dir) end def versions wiki_page.versions end def save_to_gollum(file, user, message) commit = { message: message, name: user.name, email: user.email } data_file = file.try(:tempfile) || file if wiki_page.present? $wiki.update_page(wiki_page, wiki_page.name, wiki_page.format, data_file, commit) else $wiki.write_page(wiki_attachment_data, :textile, data_file, commit, wiki_dir) end end def as_indexed_json(options = {}) json = self.as_json json[:type] = self.class.to_s.underscore json[:body] = self.extracted_text json end def extracted_text tmp_dir = File.join Rails.root, "tmp", "#{id}_#{Time.now.to_i}" text_dir = File.join tmp_dir, "text_dump" tmp_filename = File.join tmp_dir, filename FileUtils.mkdir_p tmp_dir File.open tmp_filename, "wb" do |file| file.write self.file_data end docs = Dir[File.join(tmp_dir, "*")] Docsplit.extract_text(docs, output: text_dir) texts = [] Dir[File.join(text_dir, "*")].each do |file| texts << File.read(file) end text = texts.join("\n") encoding_options = { :invalid => :replace, # Replace invalid byte sequences :undef => :replace, # Replace anything not defined in ASCII :replace => '', # Use a blank for those replacements :universal_newline => true # Always break lines with \n } text.encode(Encoding.find('ASCII'), encoding_options) rescue Rails.logger.info "document not extractable." end def save_and_destroy(deleted_by) DeletedItem.save_and_destroy(self, deleted_by) end protected def wiki_attachment_data "#{id}-data" end def wiki_dir "/attachments/#{id}" end def update_version_count update_column(:version_count, versions.count) end end