Sha256: 95e5e23c6032ea6da823f79d0fe27bf2ef949d559cc2ce5c5605b68026aaccb8
Contents?: true
Size: 1.02 KB
Versions: 1
Compression:
Stored size: 1.02 KB
Contents
require 'fileutils' require 'sequel' module La ROOT_DIRECTORY = "#{ENV['HOME']}/.la" module Import def self.[] origin, file origin = const_get origin.capitalize origin.import_sentences_from file end module Tatoeba def self.import_sentences_from file FileUtils.mkdir_p ROOT_DIRECTORY db = Sequel.sqlite "#{ROOT_DIRECTORY}/db.sqlite" db.run "CREATE VIRTUAL TABLE sentences USING fts5(sentence, language, tatoeba_id);" e = File.open(file).each_line.lazy batch = e.first 100000 sentences = [] until batch.empty? batch.each do |line| tatoeba_id, language, sentence = line.split "\t" sentences << [sentence, language, tatoeba_id] if language == 'eng' end p batch.first db[:sentences].import [:sentence, :language, :tatoeba_id], sentences p batch.last batch = e.first 100000 sentences.clear end "#{file} was imported." end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
la-0.0.0 | cli/import.rb |