Sha256: 4af6524543ff07e99dfdb868b3f5011d1b52f1adb5dbb3670a28a4df6d96f6c8
Contents?: true
Size: 1.72 KB
Versions: 2
Compression:
Stored size: 1.72 KB
Contents
#!/usr/bin/env ruby require 'bundler/setup' require 'reality' require 'pp' # 1. sparql: "select all items of subclass <units of measurement>" query = %{ PREFIX wikibase: <http://wikiba.se/ontology#> PREFIX wd: <http://www.wikidata.org/entity/> PREFIX wdt: <http://www.wikidata.org/prop/direct/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX p: <http://www.wikidata.org/prop/> PREFIX v: <http://www.wikidata.org/prop/statement/> PREFIX schema: <http://schema.org/> SELECT ?item ?itemLabel WHERE { ?item wdt:P31/wdt:P279* wd:Q47574. SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } } items = Faraday .get('https://query.wikidata.org/sparql', query: query, format: :json).body .yield_self(&JSON.method(:parse)).dig('results', 'bindings') .map { |bind| [bind.dig('item', 'value').match(/(Q\d+)/)[1], bind.dig('itemLabel', 'value')] } .reject { |id, name| id == name } # File.write 'script/out/units.txt', res.map(&:last).sort.join("\n") # TODO: In fact, this entire script should be "self-bootstrapped" by reality: that's what it is for, # all in all. It is real goal, yet not achieved yet. existing = Dir['script/out/units/*.yml'].map { |f| File.basename(f, '.yml') } items.reject! { |id, _| existing.include?(id) } api = Reality::Describers::Wikidata::Impl::Api.new(user_agent: Reality::USER_AGENT) cache = Reality::Describers::Wikidata::LabelsCache.new(api) items.each_slice(50).each_with_index { |chunk, i| puts "#{i * 50} of #{items.count}" api.wbgetentities.ids(*chunk.map(&:first)) .props(:info, :sitelinks, :claims, :labels, :aliases).languages(:en) .response['entities'] .each { |id, data| cache.update_from(data) File.write "script/out/units/#{id}.yml", data.to_yaml } }
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
reality-0.1.0.alpha2 | script/extract_wikidata_units.rb |
reality-0.1.0.alpha | script/extract_wikidata_units.rb |