# -*- coding: utf-8 -*- require 'rubygems' require 'vortex_client' require 'nokogiri' require 'open-uri' require 'uri' require 'pry' require 'net/http' require 'pathname' require 'json' require 'pp' # The method String.cleanpath seems to be missing in my version of ruby # Since it is not important to us, we can define it: class String def cleanpath return self end end # Relativize url's def relative_url(from,to) pn = Pathname.new(to) return pn.relative_path_from(from).to_s.gsub(/^\.\.\//,'') end # Read link database from file. Return hashmaps def read_linkbase_from_file old_pages = { } File.open('scrape_holocaust.log').each do |line| line = line.chop pages = line.split(/;/) if(pages[0])then old_pages[pages[0]] = pages[1] end end return old_pages end # Generate html content for right column box in vortex: def generate_related_content(old_url,new_url) related_content_html = "" doc = Nokogiri::HTML.parse(open(old_url)) doc.encoding = 'utf-8' doc.css(".related .title").each do |related_title| related_content_html += "

#{related_title.text}

\n" # puts " Title: '" + related_title.text + "'" # # puts "Old url :'" + old_url + "'" # # puts " url :'" + new_url + "'" next_element = related_title.next_element related_content_html += "\n\n" end return related_content_html end def update_links_in_tekst(html,new_url) doc = Nokogiri::HTML.parse(html) doc.css("a").each do |link| if(link.attributes["href"]) href = link.attr("href") if(@old_pages[href])then link_to = relative_url(new_url,@old_pages[href]) puts " Replace link in body:" + link_to html = html.gsub(href,link_to) end end end return html end # # Debugg code # @vortex = Vortex::Connection.new("https://nyweb4-dav.uio.no", :use_osx_keychain => true) # @old_pages = read_linkbase_from_file # old_url = "http://www.hlsenteret.no/kunnskapsbasen/tradisjoner/buddhisme/1049" # new_url = "https://nyweb4-dav.uio.no/konv/kunnskapsbasen/tradisjoner/buddhisme/hellige-skrifter-i-buddhismen.html" # # old_url = "http://www.hlsenteret.no/kunnskapsbasen/tema/religionsfrihet" # # new_url = "https://nyweb4-dav.uio.no/konv/kunnskapsbasen/tema/religionsfrihet/religions-og-livssynsfrihet.html" # src = @vortex.get(URI.parse(new_url).path) # data = JSON.parse(src) # data['properties']['hideAdditionalContent'] = "false" # # data['properties']['related-content'] = related_content_html # content = data['properties']['content'] # content = update_links_in_tekst(content,new_url) # puts content # exit # update_links_in_tekst("http://www.hlsenteret.no/kunnskapsbasen/Holocaust_og_andre_folkemord",nil) # exit @vortex = Vortex::Connection.new("https://nyweb4-dav.uio.no", :use_osx_keychain => true) @old_pages = read_linkbase_from_file count = 1 @old_pages.each do |old_url, new_url| puts count puts old_url puts new_url puts "Url: '" + URI.parse(new_url).path.to_s + "'" related_content_html = generate_related_content(old_url,new_url) src = @vortex.get(URI.parse(new_url).path) data = JSON.parse(src) data['properties']['hideAdditionalContent'] = "false" data['properties']['related-content'] = related_content_html content = data['properties']['content'] content = update_links_in_tekst(content,new_url) data['properties']['content'] = content @vortex.put_string(URI.parse(new_url).path, data.to_json) puts "-------" count += 1 # exit if count > 10 end