lib/boilerpipe/sax/html_content_handler.rb in boilerpipe-ruby-0.4.4 vs lib/boilerpipe/sax/html_content_handler.rb in boilerpipe-ruby-0.5.0
- old
+ new
@@ -15,11 +15,11 @@
@token_buffer = ''
@offset_blocks = 0
@flush = false
@block_tag_level = -1
- @in_body = 0
+ @in_body_tag = 0
@in_anchor_tag = 0
@in_ignorable_element = 0
@in_anchor_text = false
@font_size_stack = []
@last_start_tag = ''
@@ -90,13 +90,19 @@
@last_event = :END_TAG
@last_end_tag = tag
@label_stacks.pop
end
+ def not_in_body_tag?
+ @in_body_tag == 0
+ end
+
def flush_block
@flush = false
- if @in_body == 0
+
+ # set title
+ if not_in_body_tag?
@title = @token_buffer.strip if :TITLE == @last_start_tag
clear_buffers
return
end
@@ -203,15 +209,15 @@
# should we prevent less than zero here?
def decrease_in_ignorable_element!
@in_ignorable_element -= 1
end
- def increase_in_body!
- @in_body += 1
+ def enter_body_tag!
+ @in_body_tag += 1
end
- def decrease_in_body!
- @in_body -= 1
+ def exit_body_tag!
+ @in_body_tag -= 1
end
def in_ignorable_element?
@in_ignorable_element > 0
end