Sha256: 36a3da588b6a72df7ec17d1f71af8f497c0671f7972f34613b4ee66e6479576f
Contents?: true
Size: 804 Bytes
Versions: 9
Compression:
Stored size: 804 Bytes
Contents
require 'open-uri' require 'cgi' require 'json' per_page = 500 base_url = "https://simple.wiktionary.org/w/api.php?action=query&aplimit=#{per_page}&list=allpages&format=json" filename = "evaluation/dictionary.yml" apfrom = "" num = 0 titles = [] loop do url = base_url + "&apfrom=#{apfrom}" puts "downloading page %2d: #{url}" % num body = open(url).read json = JSON.load(body) count = json["query"]["allpages"].size apfrom = CGI.escape(json["query"]["allpages"].last['title']) if count > 0 titles += json["query"]["allpages"].map {|hash| hash["title"] } num += 1 break if count != per_page end require 'yaml' File.open(filename, 'w') do |file| file.write(titles.uniq.to_yaml) end puts " Number of titles: #{titles.uniq.size} Dictionary saved: #{filename} "
Version data entries
9 entries across 9 versions & 2 rubygems