lib/parser.rb in nexus_parser-1.0.0 vs lib/parser.rb in nexus_parser-1.1.0
- old
+ new
@@ -1,158 +1,158 @@
-class NexusFile::Parser
+class NexusParser::Parser
def initialize(lexer, builder)
@lexer = lexer
@builder = builder
end
def parse_file
- # nf = @builder.new_nexus_file # create new local NexusFile instance, nf
+ # nf = @builder.new_nexus_file # create new local NexusParser instance, nf
blks = []
- @lexer.pop(NexusFile::Tokens::NexusStart)
+ @lexer.pop(NexusParser::Tokens::NexusStart)
- while @lexer.peek(NexusFile::Tokens::BeginBlk)
+ while @lexer.peek(NexusParser::Tokens::BeginBlk)
- @lexer.pop(NexusFile::Tokens::BeginBlk) # pop it
+ @lexer.pop(NexusParser::Tokens::BeginBlk) # pop it
- if @lexer.peek(NexusFile::Tokens::AuthorsBlk)
+ if @lexer.peek(NexusParser::Tokens::AuthorsBlk)
parse_authors_blk
# we parse these below
- elsif @lexer.peek(NexusFile::Tokens::TaxaBlk)
+ elsif @lexer.peek(NexusParser::Tokens::TaxaBlk)
- @lexer.pop(NexusFile::Tokens::TaxaBlk )
+ @lexer.pop(NexusParser::Tokens::TaxaBlk )
parse_taxa_blk
- elsif @lexer.peek(NexusFile::Tokens::ChrsBlk)
- @lexer.pop(NexusFile::Tokens::ChrsBlk)
+ elsif @lexer.peek(NexusParser::Tokens::ChrsBlk)
+ @lexer.pop(NexusParser::Tokens::ChrsBlk)
parse_characters_blk
- elsif @lexer.peek(NexusFile::Tokens::NotesBlk)
- @lexer.pop(NexusFile::Tokens::NotesBlk)
+ elsif @lexer.peek(NexusParser::Tokens::NotesBlk)
+ @lexer.pop(NexusParser::Tokens::NotesBlk)
parse_notes_blk
# we should parse this
- elsif @lexer.peek(NexusFile::Tokens::SetsBlk)
- @lexer.pop(NexusFile::Tokens::SetsBlk)
+ elsif @lexer.peek(NexusParser::Tokens::SetsBlk)
+ @lexer.pop(NexusParser::Tokens::SetsBlk)
# we don't parse these
- elsif @lexer.peek(NexusFile::Tokens::TreesBlk)
- @foo = @lexer.pop(NexusFile::Tokens::TreesBlk).value
+ elsif @lexer.peek(NexusParser::Tokens::TreesBlk)
+ @foo = @lexer.pop(NexusParser::Tokens::TreesBlk).value
- elsif @lexer.peek(NexusFile::Tokens::LabelsBlk)
- @lexer.pop(NexusFile::Tokens::LabelsBlk)
+ elsif @lexer.peek(NexusParser::Tokens::LabelsBlk)
+ @lexer.pop(NexusParser::Tokens::LabelsBlk)
- elsif @lexer.peek(NexusFile::Tokens::MqCharModelsBlk)
- @lexer.pop(NexusFile::Tokens::MqCharModelsBlk)
+ elsif @lexer.peek(NexusParser::Tokens::MqCharModelsBlk)
+ @lexer.pop(NexusParser::Tokens::MqCharModelsBlk)
- elsif @lexer.peek(NexusFile::Tokens::AssumptionsBlk)
- @lexer.pop(NexusFile::Tokens::AssumptionsBlk)
+ elsif @lexer.peek(NexusParser::Tokens::AssumptionsBlk)
+ @lexer.pop(NexusParser::Tokens::AssumptionsBlk)
- elsif @lexer.peek(NexusFile::Tokens::CodonsBlk)
- @lexer.pop(NexusFile::Tokens::CodonsBlk)
+ elsif @lexer.peek(NexusParser::Tokens::CodonsBlk)
+ @lexer.pop(NexusParser::Tokens::CodonsBlk)
end
end
end
# just removes it for the time being
def parse_authors_blk
# thing has non single word key/value pairs, like "AUTHOR NAME", SIGH
# for now just slurp it all up.
- @lexer.pop(NexusFile::Tokens::AuthorsBlk )
+ @lexer.pop(NexusParser::Tokens::AuthorsBlk )
#while true
- # if @lexer.peek(NexusFile::Tokens::EndBlk)
- # @lexer.pop(NexusFile::Tokens::EndBlk)
+ # if @lexer.peek(NexusParser::Tokens::EndBlk)
+ # @lexer.pop(NexusParser::Tokens::EndBlk)
# break
# else
- # while @lexer.peek(NexusFile::Tokens::ValuePair)
+ # while @lexer.peek(NexusParser::Tokens::ValuePair)
# # IMPORTANT, these are going to a general hash, there may ultimately be overlap of keys used in different blocks, this is ignored at present
- # @builder.add_var(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+ # @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
# end
- #@lexer.pop(NexusFile::Tokens::ID) if @lexer.peek(NexusFile::Tokens::ID)
+ #@lexer.pop(NexusParser::Tokens::ID) if @lexer.peek(NexusParser::Tokens::ID)
# end
#end
end
def parse_taxa_blk
- @lexer.pop(NexusFile::Tokens::Title) if @lexer.peek(NexusFile::Tokens::Title)
+ @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title)
# need to not ignore to test against
- parse_dimensions if @lexer.peek(NexusFile::Tokens::Dimensions)
+ parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)
while true
- if @lexer.peek(NexusFile::Tokens::EndBlk)
- @lexer.pop(NexusFile::Tokens::EndBlk)
+ if @lexer.peek(NexusParser::Tokens::EndBlk)
+ @lexer.pop(NexusParser::Tokens::EndBlk)
break
else
- if @lexer.peek(NexusFile::Tokens::Taxlabels)
- @lexer.pop(NexusFile::Tokens::Taxlabels) if @lexer.peek(NexusFile::Tokens::Taxlabels)
+ if @lexer.peek(NexusParser::Tokens::Taxlabels)
+ @lexer.pop(NexusParser::Tokens::Taxlabels) if @lexer.peek(NexusParser::Tokens::Taxlabels)
i = 0
- while @lexer.peek(NexusFile::Tokens::Label)
- @builder.update_taxon(:index => i, :name => @lexer.pop(NexusFile::Tokens::Label).value)
+ while @lexer.peek(NexusParser::Tokens::Label)
+ @builder.update_taxon(:index => i, :name => @lexer.pop(NexusParser::Tokens::Label).value)
i += 1
end
- @lexer.pop(NexusFile::Tokens::SemiColon) if @lexer.peek(NexusFile::Tokens::SemiColon) # close of tax labels, placement of this seems dubious... but tests are working
+ @lexer.pop(NexusParser::Tokens::SemiColon) if @lexer.peek(NexusParser::Tokens::SemiColon) # close of tax labels, placement of this seems dubious... but tests are working
- elsif @lexer.peek(NexusFile::Tokens::MesquiteIDs)
+ elsif @lexer.peek(NexusParser::Tokens::MesquiteIDs)
- @lexer.pop(NexusFile::Tokens::MesquiteIDs) # trashing these for now
- elsif @lexer.peek(NexusFile::Tokens::MesquiteBlockID)
- @lexer.pop(NexusFile::Tokens::MesquiteBlockID)
+ @lexer.pop(NexusParser::Tokens::MesquiteIDs) # trashing these for now
+ elsif @lexer.peek(NexusParser::Tokens::MesquiteBlockID)
+ @lexer.pop(NexusParser::Tokens::MesquiteBlockID)
end
end
end
end
def parse_characters_blk
while true
- if @lexer.peek(NexusFile::Tokens::EndBlk) # we're at the end of the block, exit after geting rid of the semi-colon
+ if @lexer.peek(NexusParser::Tokens::EndBlk) # we're at the end of the block, exit after geting rid of the semi-colon
break
else
- @lexer.pop(NexusFile::Tokens::Title) if @lexer.peek(NexusFile::Tokens::Title) # not used at present
+ @lexer.pop(NexusParser::Tokens::Title) if @lexer.peek(NexusParser::Tokens::Title) # not used at present
- parse_dimensions if @lexer.peek(NexusFile::Tokens::Dimensions)
- parse_format if @lexer.peek(NexusFile::Tokens::Format)
+ parse_dimensions if @lexer.peek(NexusParser::Tokens::Dimensions)
+ parse_format if @lexer.peek(NexusParser::Tokens::Format)
- parse_chr_state_labels if @lexer.peek(NexusFile::Tokens::CharStateLabels)
+ parse_chr_state_labels if @lexer.peek(NexusParser::Tokens::CharStateLabels)
- parse_matrix if @lexer.peek(NexusFile::Tokens::Matrix)
+ parse_matrix if @lexer.peek(NexusParser::Tokens::Matrix)
# handle "\s*OPTIONS MSTAXA = UNCERTAIN;\s\n" within a characters block (sticks in an infinite loop right now)
- @lexer.pop(NexusFile::Tokens::MesquiteIDs) if @lexer.peek(NexusFile::Tokens::MesquiteIDs) # trashing these for now
- @lexer.pop(NexusFile::Tokens::MesquiteBlockID) if @lexer.peek(NexusFile::Tokens::MesquiteBlockID) # trashing these for now
+ @lexer.pop(NexusParser::Tokens::MesquiteIDs) if @lexer.peek(NexusParser::Tokens::MesquiteIDs) # trashing these for now
+ @lexer.pop(NexusParser::Tokens::MesquiteBlockID) if @lexer.peek(NexusParser::Tokens::MesquiteBlockID) # trashing these for now
false
end
end
- @lexer.pop(NexusFile::Tokens::EndBlk)
+ @lexer.pop(NexusParser::Tokens::EndBlk)
end
# prolly pop header then fuse with parse_dimensions
def parse_format
- @lexer.pop(NexusFile::Tokens::Format)
- while @lexer.peek(NexusFile::Tokens::ValuePair)
- @builder.add_var(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+ @lexer.pop(NexusParser::Tokens::Format)
+ while @lexer.peek(NexusParser::Tokens::ValuePair)
+ @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
end
check_initialization_of_ntax_nchar
end
def parse_dimensions
- @lexer.pop(NexusFile::Tokens::Dimensions)
- while @lexer.peek(NexusFile::Tokens::ValuePair)
- @builder.add_var(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+ @lexer.pop(NexusParser::Tokens::Dimensions)
+ while @lexer.peek(NexusParser::Tokens::ValuePair)
+ @builder.add_var(@lexer.pop(NexusParser::Tokens::ValuePair).value)
end
# the last value pair with a ; is automagically handled, don't try popping it again
check_initialization_of_ntax_nchar
end
@@ -168,125 +168,125 @@
(0..(@builder.nexus_file.vars[:ntax].to_i - 1)).each {|i| @builder.stub_taxon }
end
end
def parse_chr_state_labels
- @lexer.pop(NexusFile::Tokens::CharStateLabels)
+ @lexer.pop(NexusParser::Tokens::CharStateLabels)
while true
- if @lexer.peek(NexusFile::Tokens::SemiColon)
+ if @lexer.peek(NexusParser::Tokens::SemiColon)
break
else
opts = {}
name = ""
- index = @lexer.pop(NexusFile::Tokens::Number).value.to_i
- (name = @lexer.pop(NexusFile::Tokens::Label).value) if @lexer.peek(NexusFile::Tokens::Label) # not always given a letter
+ index = @lexer.pop(NexusParser::Tokens::Number).value.to_i
+ (name = @lexer.pop(NexusParser::Tokens::Label).value) if @lexer.peek(NexusParser::Tokens::Label) # not always given a letter
- @lexer.pop(NexusFile::Tokens::BckSlash) if @lexer.peek(NexusFile::Tokens::BckSlash)
+ @lexer.pop(NexusParser::Tokens::BckSlash) if @lexer.peek(NexusParser::Tokens::BckSlash)
- if !@lexer.peek(NexusFile::Tokens::Comma) || !@lexer.peek(NexusFile::Tokens::SemiColon)
+ if !@lexer.peek(NexusParser::Tokens::Comma) || !@lexer.peek(NexusParser::Tokens::SemiColon)
i = 0
# three kludge lines, need to figure out the label/number priority, could be issue in list order w/in tokens
- while @lexer.peek(NexusFile::Tokens::Label) || @lexer.peek(NexusFile::Tokens::Number)
- opts.update({i.to_s => @lexer.pop(NexusFile::Tokens::Label).value}) if @lexer.peek(NexusFile::Tokens::Label)
- opts.update({i.to_s => @lexer.pop(NexusFile::Tokens::Number).value.to_s}) if @lexer.peek(NexusFile::Tokens::Number)
+ while @lexer.peek(NexusParser::Tokens::Label) || @lexer.peek(NexusParser::Tokens::Number)
+ opts.update({i.to_s => @lexer.pop(NexusParser::Tokens::Label).value}) if @lexer.peek(NexusParser::Tokens::Label)
+ opts.update({i.to_s => @lexer.pop(NexusParser::Tokens::Number).value.to_s}) if @lexer.peek(NexusParser::Tokens::Number)
i += 1
end
end
- @lexer.pop(NexusFile::Tokens::Comma) if @lexer.peek(NexusFile::Tokens::Comma) # we may also have hit semicolon
+ @lexer.pop(NexusParser::Tokens::Comma) if @lexer.peek(NexusParser::Tokens::Comma) # we may also have hit semicolon
opts.update({:index => (index - 1), :name => name})
raise(ParserError, "Error parsing character state labels for (or around) character #{index -1}.") if !opts[:name]
@builder.update_chr(opts)
end
end
- @lexer.pop(NexusFile::Tokens::SemiColon)
+ @lexer.pop(NexusParser::Tokens::SemiColon)
end
def parse_matrix
- @lexer.pop(NexusFile::Tokens::Matrix)
+ @lexer.pop(NexusParser::Tokens::Matrix)
i = 0
while true
- if @lexer.peek(NexusFile::Tokens::SemiColon)
+ if @lexer.peek(NexusParser::Tokens::SemiColon)
break
else
- t = @lexer.pop(NexusFile::Tokens::Label).value
+ t = @lexer.pop(NexusParser::Tokens::Label).value
@builder.update_taxon(:index => i, :name => t) # if it exists its not re-added
- @builder.code_row(i, @lexer.pop(NexusFile::Tokens::RowVec).value)
+ @builder.code_row(i, @lexer.pop(NexusParser::Tokens::RowVec).value)
i += 1
end
end
- @lexer.pop(NexusFile::Tokens::SemiColon) # pop the semicolon
+ @lexer.pop(NexusParser::Tokens::SemiColon) # pop the semicolon
end
# this suck(s/ed), it needs work when a better API for Mesquite comes out
def parse_notes_blk
- # IMPORTANT - we don't parse the (CM <note>), we just strip the "(CM" ... ")" bit for now in NexusFile::Note
+ # IMPORTANT - we don't parse the (CM <note>), we just strip the "(CM" ... ")" bit for now in NexusParser::Note
@vars = {}
inf = 0
while true
inf += 1
raise "Either you have a gazillion notes or more likely parser is caught in an infinite loop inside parse_notes_block" if inf > 100000
- if @lexer.peek(NexusFile::Tokens::EndBlk)
- @lexer.pop(NexusFile::Tokens::EndBlk)
+ if @lexer.peek(NexusParser::Tokens::EndBlk)
+ @lexer.pop(NexusParser::Tokens::EndBlk)
@builder.add_note(@vars) # one still left to add
break
else
- if @lexer.peek(NexusFile::Tokens::ValuePair)
- @vars.update(@lexer.pop(NexusFile::Tokens::ValuePair).value)
+ if @lexer.peek(NexusParser::Tokens::ValuePair)
+ @vars.update(@lexer.pop(NexusParser::Tokens::ValuePair).value)
- elsif @lexer.peek(NexusFile::Tokens::Label)
+ elsif @lexer.peek(NexusParser::Tokens::Label)
if @vars[:type] # we have the data for this row write it, and start a new one
@builder.add_note(@vars)
@vars = {}
else
- @vars.update(:type => @lexer.pop(NexusFile::Tokens::Label).value)
+ @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
end
- elsif @lexer.peek(NexusFile::Tokens::FileLbl)
- @lexer.pop(NexusFile::Tokens::FileLbl)
+ elsif @lexer.peek(NexusParser::Tokens::FileLbl)
+ @lexer.pop(NexusParser::Tokens::FileLbl)
@vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
end
end
end
end
#@vars = {}
#while true
- # break if @lexer.peek(NexusFile::Tokens::EndBlk)
+ # break if @lexer.peek(NexusParser::Tokens::EndBlk)
- # @vars.update(:type => @lexer.pop(NexusFile::Tokens::Label).value)
+ # @vars.update(:type => @lexer.pop(NexusParser::Tokens::Label).value)
# kludge to get around the funny construct that references file
- # if @lexer.peek(NexusFile::Tokens::FileLbl)
- # @lexer.pop(NexusFile::Tokens::FileLbl)
+ # if @lexer.peek(NexusParser::Tokens::FileLbl)
+ # @lexer.pop(NexusParser::Tokens::FileLbl)
# vars.update(:file => 'file') # we check for whether :file key is present and handle conditionally
# end
# while true
- # meh = @lexer.pop(NexusFile::Tokens::ValuePair)
+ # meh = @lexer.pop(NexusParser::Tokens::ValuePair)
# @vars.update(meh.value)
- # break if !@lexer.peek(NexusFile::Tokens::ValuePair)
+ # break if !@lexer.peek(NexusParser::Tokens::ValuePair)
# end
#
# @builder.add_note(@vars)
# @vars = {}
#end
- # @lexer.pop(NexusFile::Tokens::EndBlk)
+ # @lexer.pop(NexusParser::Tokens::EndBlk)
def parse_trees_blk
true
end
@@ -318,11 +318,11 @@
# def parse_children(parent)
# parse a comma-separated list of nodes
# while true
# parse_node(parent)
- # if @lexer.peek(NexusFile::Tokens::Comma)
- # @lexer.pop(NexusFile::Tokens::Comma)
+ # if @lexer.peek(NexusParser::Tokens::Comma)
+ # @lexer.pop(NexusParser::Tokens::Comma)
# else
# break
# end
# end
# end