require 'pathname' lib = Pathname.new(__FILE__).dirname.join().expand_path.to_s $:.unshift lib require 'uri' require 'time' module Citation class Base attr_accessor :year, :sq, :desc, :url, :author, :title, :type def initialize @year = '' @sq = '' @url = '' @desc = '' @author = '' @title = '' @type = '' end def puts return {year: @year, sq: @sq, desc: @desc, author: @author, title: @title, type: @type} end def out(format) str = '' case format.downcase when 'MLA' str = "#{@author} \"#{@title}\" #{@desc} (#{@year})" when 'APA' str = "#{@author} (#{@year}) #{@title} #{@desc}" when 'ISO' str = "#{@author} #{@title} #{@desc}, #{@year}." when 'bibtex' str = "@article{#{@author.gsub(' ','').gsub(/( |\.|,)/,'').downcase[0..5]}#{@year}#{@title.split.select{|i| i.length >3 }[0].downcase}, title={#{@title}}, author={#{@author.gsub(', and ',', ')}}, year={#{@year}}, journal={#{@desc}} }" end str end end def self.parse(str, recursive=false) base = Base.new string = str.dup urls = URI.extract(string, %w(http https)) urls.each do | url | string.slice!(url) end base.url = urls # fetch square bracket bracket_matcher = string.match(/\[([\w\d_ ]).+\]/) base.sq = bracket_matcher[0] unless bracket_matcher.nil? string.slice!(base.sq) # fetch year year_matcher = string.scan(/\d{4}/) base.year = year_matcher. map(&:to_i). select{|i| i > 1970 and i < Time.now.year}. shift.to_s unless year_matcher.nil? string.slice!(base.year) # fetch title if format is MLA title_matcher = string.match(/".+"/) unless title_matcher.nil? base.title = title_matcher[0] unless title_matcher.nil? s = string.split(base.title) string.slice!(base.title) string.slice!('()') base.title.gsub!('"','') author = s.shift base.author = author base.desc = s.shift base.type = 'MLA'.to_sym string.slice!(base.author) string.slice!(base.desc) end # fetch title if format is APA title_matcher = string.match(/\(\)\. .+\. /) if base.type.empty? and not title_matcher.nil? base.title = title_matcher[0] unless title_matcher.nil? base.title.gsub!(/"|\(\)\. /,'') string.slice!(base.title) s = string.split('(). ') author = s.shift base.author = author base.desc = s.shift base.type = 'APA'.to_sym string.slice!(base.author) string.slice!(base.desc) end # fetch title if format is ISO 690 iso_matcher = string.match(/, \.$/) if base.type.empty? and not iso_matcher.nil? s = string.gsub(/ ([A-Z]). /,'_\1_ ') arr = s.split('. ') base.author = arr.shift.gsub(/_([A-Z])_ /,' \1. ') base.title = arr.shift if base.title == '' base.desc = arr.shift base.type = 'ISO 6900'.to_sym end # fetch other if base.type.empty? s = string.gsub(/ ([A-Z]). /,'_\1_ ') arr = s.split('. ') base.author = arr.shift.gsub(/_([A-Z])_ /,' \1. ') base.title = arr.shift if base.title == '' base.desc = arr end return base end end if __FILE__ == $0 test = '[B ́at09] Norbert B ́atfai. On the Running Time of the Shortest Programs. CoRR, abs/0908.1159, 2009. http://arxiv.org/abs/0908.1159.' puts Citation.parse(test, false) # MLA test = 'Bátfai, Norbert. "A disembodied developmental robotic agent called Samu B\'atfai." arXiv preprint arXiv:1511.02889 (2015).' puts Citation.parse(test, false) # APA test = 'Bátfai, N. (2015). A disembodied developmental robotic agent called Samu B\'atfai. arXiv preprint arXiv:1511.02889.' puts Citation.parse(test, false) # ISO 690 test = 'BÁTFAI, Norbert. A disembodied developmental robotic agent called Samu B\'atfai. arXiv preprint arXiv:1511.02889, 2015.' puts Citation.parse(test, false) test = 'MacKay, D. J. C., & Peto, L. C. B. (1995). A hierarchical Dirichlet language model. Natural Language Engineer- ing, 1, 289–307.' puts Citation.parse(test, false) test = '[20] Y. Teh, M. Jordan, M. Beal, and D. Blei. Hierarchical Dirichlet processes. Journal of the American Statistical Association, 101(476):1566–1581, 2007.' puts Citation.parse(test,false).out('bibtex') end