lib/docurium/docparser.rb in docurium-0.6.0 vs lib/docurium/docparser.rb in docurium-0.7.0
- old
+ new
@@ -1,48 +1,89 @@
require 'tempfile'
require 'fileutils'
require 'ffi/clang'
+require 'open3'
include FFI::Clang
class Docurium
class DocParser
- # Entry point for this parser
- # Parse `filename` out of the hash `files`
- def parse_file(orig_filename, files)
+ # The include directory where clang has its basic type definitions is not
+ # included in our default search path, so as a workaround we execute clang
+ # in verbose mode and grab its include paths from the output.
+ def find_clang_includes
+ @includes ||=
+ begin
+ clang = if ENV["LLVM_CONFIG"]
+ bindir = `#{ENV["LLVM_CONFIG"]} --bindir`.strip
+ "#{bindir}/clang"
+ else
+ "clang"
+ end
+ output, _status = Open3.capture2e("#{clang} -v -x c -", :stdin_data => "")
+ includes = []
+ output.each_line do |line|
+ if line =~ %r{^\s+/(.*usr|.*lib/clang.*)/include}
+ includes << line.strip
+ end
+ end
+
+ includes
+ end
+ end
+
+ def self.with_files(files, opts = {})
+ parser = self.new(files, opts)
+ yield parser
+ parser.cleanup!
+ end
+
+ def initialize(files, opts = {})
# unfortunately Clang wants unsaved files to exist on disk, so
# we need to create at least empty files for each unsaved file
# we're given.
- tmpdir = Dir.mktmpdir()
-
- unsaved = files.map do |name, contents|
- full_path = File.join(tmpdir, name)
+ prefix = (opts[:prefix] ? opts[:prefix] + "-" : nil)
+ @tmpdir = Dir.mktmpdir(prefix)
+ @unsaved = files.map do |name, contents|
+ full_path = File.join(@tmpdir, name)
dirname = File.dirname(full_path)
FileUtils.mkdir_p(dirname) unless Dir.exist? dirname
File.new(full_path, File::CREAT).close()
-
UnsavedFile.new(full_path, contents)
end
+ end
- # Override the path we want to filter by
- filename = File.join(tmpdir, orig_filename)
- tu = Index.new.parse_translation_unit(filename, ["-DDOCURIUM=1"], unsaved, {:detailed_preprocessing_record => 1})
+ def cleanup!
+ FileUtils.remove_entry(@tmpdir)
+ end
- FileUtils.remove_entry(tmpdir)
+ # Entry point for this parser
+ # Parse `filename` out of the hash `files`
+ def parse_file(orig_filename, opts = {})
- cursor = tu.cursor
+ includes = find_clang_includes + [@tmpdir]
+ # Override the path we want to filter by
+ filename = File.join(@tmpdir, orig_filename)
+ debug_enable if opts[:debug]
+ debug "parsing #{filename} #{@tmpdir}"
+ args = includes.map { |path| "-I#{path}" }
+ args << '-ferror-limit=1'
+
+ tu = Index.new(true, true).parse_translation_unit(filename, args, @unsaved, {:detailed_preprocessing_record => 1})
+
recs = []
- cursor.visit_children do |cursor, parent|
- #puts "visiting #{cursor.kind} - #{cursor.spelling}"
+ tu.cursor.visit_children do |cursor, parent|
location = cursor.location
next :continue if location.file == nil
next :continue unless location.file == filename
- #puts "for file #{location.file} #{cursor.kind} #{cursor.spelling} #{cursor.comment.kind} #{location.line}"
+ loc = "%d:%d-%d:%d" % [cursor.extent.start.line, cursor.extent.start.column, cursor.extent.end.line, cursor.extent.end.column]
+ debug "#{cursor.location.file}:#{loc} - visiting #{cursor.kind}: #{cursor.spelling}, comment is #{cursor.comment.kind}"
+
#cursor.visit_children do |c|
# puts " child #{c.kind}, #{c.spelling}, #{c.comment.kind}"
# :continue
#end
@@ -55,29 +96,42 @@
:line => extent.start.line,
:lineto => extent.end.line,
:tdef => nil,
}
- case cursor.kind
+ extract = case cursor.kind
when :cursor_function
- #puts "have function"
- rec.merge! extract_function(cursor)
+ debug "have function #{cursor.spelling}"
+ rec.update extract_function(cursor)
when :cursor_enum_decl
- rec.merge! extract_enum(cursor)
+ debug "have enum #{cursor.spelling}"
+ rec.update extract_enum(cursor)
when :cursor_struct
- #puts "raw struct"
- rec.merge! extract_struct(cursor)
+ debug "have struct #{cursor.spelling}"
+ rec.update extract_struct(cursor)
when :cursor_typedef_decl
- rec.merge! extract_typedef(cursor)
+ debug "have typedef #{cursor.spelling} #{cursor.underlying_type.spelling}"
+ rec.update extract_typedef(cursor)
else
raise "No idea how to deal with #{cursor.kind}"
end
+ rec.merge! extract
+
recs << rec
:continue
end
+ if debug_enabled
+ puts "parse_file: parsed #{recs.size} records for #{filename}:"
+ recs.each do |r|
+ puts "\t#{r}"
+ end
+ end
+
+ debug_restore
+
recs
end
def extract_typedef(cursor)
child = nil
@@ -147,19 +201,31 @@
end
end
def extract_subject_desc(comment)
subject = comment.child.text
- paras = comment.find_all { |cmt| cmt.kind == :comment_paragraph }.drop(1).map { |p| p.map(&:text).join() }
+ debug "\t\tsubject: #{subject}"
+ paras = comment.find_all { |cmt| cmt.kind == :comment_paragraph }.drop(1).map { |p| p.text }
desc = paras.join("\n\n")
+ debug "\t\tdesc: #{desc}"
return subject, desc
end
def extract_function(cursor)
comment = cursor.comment
- #puts "looking at function #{cursor.spelling}, #{cursor.display_name}"
+ $buggy_functions = %w()
+ debug_set ($buggy_functions.include? cursor.spelling)
+ if debug_enabled
+ puts "\tlooking at function #{cursor.spelling}, #{cursor.display_name}"
+ puts "\tcomment: #{comment}, #{comment.kind}"
+ cursor.visit_children do |cur, parent|
+ puts "\t\tchild: #{cur.spelling}, #{cur.kind}"
+ :continue
+ end
+ end
+
cmt = extract_function_comment(comment)
args = extract_function_args(cursor, cmt)
#args = args.reject { |arg| arg[:comment].nil? }
ret = {
@@ -180,10 +246,11 @@
}.join(', ')
decl = "#{ret[:type]} #{cursor.spelling}(#{argline})"
body = "#{decl};"
+ debug_restore
#puts cursor.display_name
# Return the format that docurium expects
{
:type => :function,
:name => cursor.spelling,
@@ -198,10 +265,11 @@
}
end
def extract_function_comment(comment)
subject, desc = extract_subject_desc(comment)
+ debug "\t\textract_function_comment: #{comment}, #{comment.kind}, #{subject}, #{desc}"
args = {}
(comment.find_all { |cmt| cmt.kind == :comment_param_command }).each do |param|
args[param.name] = param.comment.strip
end
@@ -273,10 +341,10 @@
cursor.visit_children do |cchild, cparent|
values << "#{cchild.type.spelling} #{cchild.spelling}"
:continue
end
- #puts "struct value #{values}"
+ debug "\tstruct value #{values}"
rec = {
:type => :struct,
:name => cursor.spelling,
:description => subject,