# TODO: # - DOI fetching? # - Protect math expressions with braces # - require "bibtex" module BibtexMunge @@split_fields = [ # specifying how to handle {PART 1: PART 2} # in fields where it is a frequent issue: [:title, :title, :subtitle], # * if the title contains a separator, # the second part becomes a subtitle [:booktitle, :booktitle, :booksubtitle], # * if the booktitle contains a separator, # the second part becomes a booksubtitle [:publisher, :address, :publisher] # * if the publisher contains a separator, # the first part becomes an address ] @@name_fields = [:author, :editor] # specifying which fields shouldn't have # trailing periods removed (because # they might be initials) @@prune_fields = [:isbn, :issn, :abstract] # common nuisance fields to delete # Run a block on every field in every entry def each_field(options = {}) except = options[:except] except ||= [] if block_given? each_entry do |e| e.fields.each do |k,v| yield v unless except.include? k end end end end def normalize_initials! each_entry do |e| @@name_fields.each do |k| if e[k] e[k].each do |name| if name.first name.first = name.normalize_initials end end end end end self end def depunctuate! each_field :except => @@name_fields do |f| f.chomp!(".") end self end def extend_braces! each_field do |f| f.gsub! /\ {([^ ])}([^ ]+)/, ' {\1\2}' # The initial space is to avoid # fucking up braces after an accent. # this is *probably* redundant now that we're # unicodifying everything, but better safe than # sorry end self end def remove_double_braces! each_field do |f| f.gsub! /{{(.*)}}/, '{\1}' # Eliminate overt double braces f.gsub! /^{(.*)}$/, '\1' # Eliminate whole-entry braces (which would have been # double braces in the unparsed BibTeX file, since # publisher = {{Blackwell}} # parses to # @bib[:key].publisher = "{Blackwell}" end self end def split_field!(e, oldkey, newsuperkey, newsubkey) if e[oldkey] elements = e[oldkey].split(/([.:?]) /,2) # Split after .:?, retaining the separator if elements.length == 3 e[newsubkey] = elements[2] if elements[1] == "?" # Keep the separator as part of the first e[newsuperkey] = elements[0..1].join # field if it is a ?, to handle titles of # the form {Question? An Answer} else e[newsuperkey] = elements[0] end end end end def split_fields! each_entry do |e| @@split_fields.each do |oldkey, newsuperkey, newsubkey| split_field!(e, oldkey, newsuperkey, newsubkey) end end self end def prune_fields! each_entry do |e| @@prune_fields.each do |k| e.delete(k) end end self end def normalize_page_ranges! each_entry do |e| if e.pages e.pages.gsub! /^(\d+)\s*(–|—|-|--|---)\s*(\d+)$/, '\1 -- \3' end end self end def fix_everything! remove_double_braces! depunctuate! extend_braces! split_fields! prune_fields! normalize_initials! normalize_page_ranges! end end