lib/boilerpipe/sax/html_content_handler.rb in boilerpipe-ruby-0.4.4 vs lib/boilerpipe/sax/html_content_handler.rb in boilerpipe-ruby-0.5.0

- old
+ new

@@ -15,11 +15,11 @@ @token_buffer = '' @offset_blocks = 0 @flush = false @block_tag_level = -1 - @in_body = 0 + @in_body_tag = 0 @in_anchor_tag = 0 @in_ignorable_element = 0 @in_anchor_text = false @font_size_stack = [] @last_start_tag = '' @@ -90,13 +90,19 @@ @last_event = :END_TAG @last_end_tag = tag @label_stacks.pop end + def not_in_body_tag? + @in_body_tag == 0 + end + def flush_block @flush = false - if @in_body == 0 + + # set title + if not_in_body_tag? @title = @token_buffer.strip if :TITLE == @last_start_tag clear_buffers return end @@ -203,15 +209,15 @@ # should we prevent less than zero here? def decrease_in_ignorable_element! @in_ignorable_element -= 1 end - def increase_in_body! - @in_body += 1 + def enter_body_tag! + @in_body_tag += 1 end - def decrease_in_body! - @in_body -= 1 + def exit_body_tag! + @in_body_tag -= 1 end def in_ignorable_element? @in_ignorable_element > 0 end