=begin
---> The Streets Example does not work as the structure of external data used changed!
Streets Example
We load german cities from wikipedia and parse the document for cities and countries(states).
Further we load a collection of popular streetnames from a web-side called »strassen-in-deutschland«
We define three vertices, State, City and Street.
These are filled with the data from the web-sides
Then we connect the cities through streets just by creating edges.
At last we print the connected cities.
=end
module DataImport
def read_german_street_names
doc = Nokogiri::HTML(open('http://www.strassen-in-deutschland.de/die-haeufigsten-strassennamen-in-deutschland.html'))
strassen = doc.css("td[data-header='Straßenname: '] a") # identified via css-inspector in browser
# search for the css and include only links, then display the text-part
strassen.children.map( &:to_s )[3..-1] # omit the first three (strassen in deutschland, straßenverzeichnis, straßen)
end
def read_german_cities_from_wikipedia
# we extract
-elements and use the text until "("
#doc.xpath("//li").at(80)
# => #, #] children=[#]>, #]>
doc = Nokogiri::HTML(open('https://en.wikipedia.org/wiki/List_of_cities_and_towns_in_Germany'))
print doc
doc.xpath("//li").map{|x| x.text[0 .. x.text.index('(')-2] if x.text.index('(').present? }.compact
end
def read_german_cities_and_states_from_wikipedia
doc =
Nokogiri::HTML(open('https://en.wikipedia.org/wiki/List_of_cities_and_towns_in_Germany'))
doc.xpath("//li").map do |x|
if x.text.index('(').present?
[ x.text[0 .. x.text.index('(')-2] , x.text[ x.text.index('(')+1 .. x.text.index(')')-1] ]
end
end.compact
end
end # module
class StreetExample
include DataImport
def initialize db, rebuild: true
if rebuild
db.delete_class :State
db.delete_class :City
db.delete_class :Street
db.delete_class :CONNECTS
db.create_vertex_class :state, :city, :street
db.create_edge_class :CONNECTS
State.create_property( :name, type: :string, index: :unique )
City.create_properties( { name: { type: :string },
state: { type: :link, :linked_class => 'State' } }
) do
{ citi_idx: :unique }
end
Street.create_property :name , type: :string, index: :notunique
CONNECTS.create_property :distance, type: :integer, index: :notunique
logger.progname = "StreetsExample#Initialize"
logger.info { "Vertex- and Edge-Classes rebuilded" }
end
end
def read_from_web
read_german_cities_and_states_from_wikipedia.each do |city,state|
state = State.update_or_create( where: { name: state }).first
City.create name: city, state: "##{state.rid}"
end
logger.progname = "StreetsExample#ReadFromWeb"
logger.info { "#{City.count} Cities imported from Wikipedia " }
cities_rids = City.all.map &:rid
read_german_street_names.each_with_index do |street, i|
street_record = Street.create name: street
count = i
cities = Array.new
while count < cities_rids.size && cities.size < 5 do
cities << cities_rids[count]
count = count + i
end
CONNECTS.create_edge :from => street_record, :to => cities
end
logger.progname = "StreetsExample#ReadFromWeb"
logger.info { "#{CONNECTS.count} Edges between Streets and Cities created " }
end
def display_streets_per_state
State.all.each do |state|
streets= Street.all.map do |street |
if street.connects.in.detect{|x| x.state == state }
street.name + " connects " + street.connects.in.map( &:name ).join('; ')
end
end.compact
unless streets.empty?
puts "..................................."
puts state.name
puts "..................................."
puts streets.join("\n")
end
end
end
end
if $0 == __FILE__
require '../config/boot'
require 'open-uri'
require 'nokogiri'
ActiveOrient::OrientDB.default_server= { user: 'root', password: 'tretretre' }
r = ActiveOrient::OrientDB.new database: 'StreetTest'
ActiveOrient::OrientDB.logger.level = Logger::INFO
s= StreetExample.new r, rebuild: true
def to_orient
#puts "here hash"
substitute_hash = Hash.new
keys.each{|k| substitute_hash[k] = self[k].to_orient}
substitute_hash
end
s.read_from_web if City.count.zero?
s.display_streets_per_state
end