# Title: # Xacto # # Synopsis: # Extractor is a tool for extracting code from embedded comment blocks. # # COPYRIGHT: # Copyright (c) 2006,2007 Thomas Sawyer & Tyler Rick # # LICENSE: # Distributed under the Ruby/GPL dual license. # # Authors: # - Thomas Sawyer # - Tyler Rick # # Todo: # - Should extract_block handle more than the first matching block? # - How can we handle embedded code in stadard comments? Eg. # # - Should this code be wrapped in toplevel Ratchets module? require 'fileutils' require 'open-uri' require 'facets/string/tabs' # for margin module Xact # Text Extraction class. class Extractor attr :file attr :options alias_method :uri, :file # New extractor. def initialize(file, options={}) @file = file @options = options end # Read file. def raw @raw ||= open(@file) # File.read(@file) end # def text if options[:unxml] @text = raw.gsub!(/\<(*.?)\>/, '') else @text = raw end end # Extract Pattern. def extract_pattern(pattern) pattern = Regexp.new(pattern) md = pattern.match(text) if clip = md ? md[1] : nil offset = text[0...md.begin(1)].count("\n") return clip, offset else raise "Pattern not found -- #{pattern}" return nil, nil end end # Extract Block. def extract_block(start, stop) start = Regexp.new(start) stop = Regexp.new(stop) md_start = start.match(text) if md_start md_stop = stop.match(text[md_start.end(0)..-1]) if md_stop clip = text[md_start.end(0)...(md_stop.begin(0)+md_start.end(0))] else raise "Pattern not found -- #{stop}" return nil, nil end offset = text[0...md_start.begin(0)].count("\n") #? return clip, offset else raise "Pattern not found -- #{start}" return nil, nil end end ################# # Ruby Specific # ################# # Returns a Ruby comment block with a given handle. def extract_ruby_block_comment(handle) b = Regexp.escape(handle) if b == '' pattern = /^=begin.*?\n(.*?)\n=end/mi else pattern = /^=begin[ \t]+#{b}.*?\n(.*?)\n=end/mi end extract_pattern(pattern) end # Returns a Ruby method comment. def extract_ruby_method_comment(meth) #=nil ) #if meth regexp = Regexp.escape(meth) pattern = /(\A\s*\#.*?^\s*def #{regexp}/mi extract_pattern(pattern) #else # prog.scan /^\s*\#/mi # md = pattern_inline_all.match( prog ) #end end # # Extract the matching comment block. # # def extract_block( handle='test' ) # text = File.read(file) # md = pattern_block(handle).match(text) # code = md ? md[1] : nil # unless code # puts "Code block not found -- #{handle}" # exit 0 #return nil # end # offset = text[0...md.begin(1)].count("\n") # return code, offset # end # # # Returns the comment inline regexp to match against. # # def pattern_inline( mark ) # m = Regexp.escape(mark) # /(\A\s*\#.*?^\s*def #{m}/mi # end # # def extract_inline( fname, mark=nil ) # prog = File.read( file ) # if mark # md = pattern_inline(mark).match( prog ) # else # prog.scan /^\s*\#/mi # md = pattern_inline_all.match( prog ) # end # end end end # _____ _ # |_ _|__ ___| |_ # | |/ _ \/ __| __| # | | __/\__ \ |_ # |_|\___||___/\__| # =begin test require 'test/unit' class ExtractorTest < Test::Unit::TestCase def exacto_knife @knife ||= Extractor.new('/dev/null') end def build_pattern_block(block, code) exacto_knife.pattern_block(block).match(code) end # Usual case. def test_pattern_block assert_equal "require 'foo'\nfoo", build_pattern_block('test', "=begin test\nrequire 'foo'\nfoo\n=end")[1] end # Some tests for when the block is empty ('') -- should it act as a wildcard and match *any* block, # or should Extractor::Command#initialize complain about that. def test_pattern_block_no_handle assert_equal "require 'foo'\nfoo", build_pattern_block('', "=begin\nrequire 'foo'\nfoo\n=end")[1] end def test_pattern_block_no_handle_given assert_equal "require 'foo'\nfoo", build_pattern_block('', "=begin test\nrequire 'foo'\nfoo\n=end")[1] end # Yes, I know, as a side-effect of this regexp change, it will also match some invalid "blocks", like =beginblah. But that # seems like a nonissue, given that the Ruby parser would reject that syntax anyway. def test_pattern_block_side_effects assert_equal "require 'foo'\nfoo", build_pattern_block('', "=beginblah\nrequire 'foo'\nfoo\n=end")[1] end end =end