require 'rdoc/parser'
require 'rdoc/parser/ruby'
require 'rdoc/known_classes'
##
# We attempt to parse C extension files. Basically we look for
# the standard patterns that you find in extensions: rb_define_class,
# rb_define_method and so on. We also try to find the corresponding
# C source for the methods and extract comments, but if we fail
# we don't worry too much.
#
# The comments associated with a Ruby method are extracted from the C
# comment block associated with the routine that _implements_ that
# method, that is to say the method whose name is given in the
# rb_define_method call. For example, you might write:
#
# /*
# * Returns a new array that is a one-dimensional flattening of this
# * array (recursively). That is, for every element that is an array,
# * extract its elements into the new array.
# *
# * s = [ 1, 2, 3 ] #=> [1, 2, 3]
# * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]]
# * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
# * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# */
# static VALUE
# rb_ary_flatten(ary)
# VALUE ary;
# {
# ary = rb_obj_dup(ary);
# rb_ary_flatten_bang(ary);
# return ary;
# }
#
# ...
#
# void
# Init_Array()
# {
# ...
# rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
#
# Here RDoc will determine from the rb_define_method line that there's a
# method called "flatten" in class Array, and will look for the implementation
# in the method rb_ary_flatten. It will then use the comment from that
# method in the HTML output. This method must be in the same source file
# as the rb_define_method.
#
# The comment blocks may include special directives:
#
# [Document-class: name]
# This comment block is documentation for the given class. Use this
# when the Init_xxx method is not named after the class.
#
# [Document-method: name]
# This comment documents the named method. Use when RDoc cannot
# automatically find the method from it's declaration
#
# [call-seq: text up to an empty line]
# Because C source doesn't give descripive names to Ruby-level parameters,
# you need to document the calling sequence explicitly
#
# In addition, RDoc assumes by default that the C method implementing a
# Ruby function is in the same source file as the rb_define_method call.
# If this isn't the case, add the comment:
#
# rb_define_method(....); // in filename
#
# As an example, we might have an extension that defines multiple classes
# in its Init_xxx method. We could document them using
#
# /*
# * Document-class: MyClass
# *
# * Encapsulate the writing and reading of the configuration
# * file. ...
# */
#
# /*
# * Document-method: read_value
# *
# * call-seq:
# * cfg.read_value(key) -> value
# * cfg.read_value(key} { |key| } -> value
# *
# * Return the value corresponding to +key+ from the configuration.
# * In the second form, if the key isn't found, invoke the
# * block and return its value.
# */
class RDoc::Parser::C < RDoc::Parser
parse_files_matching(/\.(?:([CcHh])\1?|c([+xp])\2|y)\z/)
include RDoc::Text
##
# C file the parser is parsing
attr_accessor :content
##
# Resets cross-file state. Call when parsing different projects that need
# separate documentation.
def self.reset
@@enclosure_classes = {}
@@known_bodies = {}
end
reset
##
# Prepare to parse a C file
def initialize(top_level, file_name, content, options, stats)
super
@known_classes = RDoc::KNOWN_CLASSES.dup
@content = handle_tab_width handle_ifdefs_in(@content)
@classes = Hash.new
@file_dir = File.dirname(@file_name)
end
def do_aliases
@content.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do
|var_name, new_name, old_name|
class_name = @known_classes[var_name] || var_name
class_obj = find_class(var_name, class_name)
as = class_obj.add_alias RDoc::Alias.new("", old_name, new_name, "")
@stats.add_alias as
end
end
def do_classes
@content.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do
|var_name, class_name|
handle_class_module(var_name, "module", class_name, nil, nil)
end
# The '.' lets us handle SWIG-generated files
@content.scan(/([\w\.]+)\s* = \s*rb_define_class\s*
\(
\s*"(\w+)",
\s*(\w+)\s*
\)/mx) do |var_name, class_name, parent|
handle_class_module(var_name, "class", class_name, parent, nil)
end
@content.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do
|var_name, class_name, parent|
parent = nil if parent == "0"
handle_class_module(var_name, "class", class_name, parent, nil)
end
@content.scan(/(\w+)\s* = \s*rb_define_module_under\s*
\(
\s*(\w+),
\s*"(\w+)"
\s*\)/mx) do |var_name, in_module, class_name|
handle_class_module(var_name, "module", class_name, nil, in_module)
end
@content.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s*
\(
\s*(\w+),
\s*"(\w+)",
\s*([\w\*\s\(\)\.\->]+)\s* # for SWIG
\s*\)/mx) do |var_name, in_module, class_name, parent|
handle_class_module(var_name, "class", class_name, parent, in_module)
end
end
def do_constants
@content.scan(%r{\Wrb_define_
( variable |
readonly_variable |
const |
global_const | )
\s*\(
(?:\s*(\w+),)?
\s*"(\w+)",
\s*(.*?)\s*\)\s*;
}xm) do |type, var_name, const_name, definition|
var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel"
handle_constants type, var_name, const_name, definition
end
end
##
# Look for includes of the form:
#
# rb_include_module(rb_cArray, rb_mEnumerable);
def do_includes
@content.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m|
if cls = @classes[c]
m = @known_classes[m] || m
cls.add_include RDoc::Include.new(m, "")
end
end
end
def do_methods
@content.scan(%r{rb_define_
(
singleton_method |
method |
module_function |
private_method
)
\s*\(\s*([\w\.]+),
\s*"([^"]+)",
\s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
\s*(-?\w+)\s*\)
(?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
}xm) do
|type, var_name, meth_name, meth_body, param_count, source_file|
# Ignore top-object and weird struct.c dynamic stuff
next if var_name == "ruby_top_self"
next if var_name == "nstr"
next if var_name == "envtbl"
next if var_name == "argf" # it'd be nice to handle this one
var_name = "rb_cObject" if var_name == "rb_mKernel"
handle_method(type, var_name, meth_name, meth_body, param_count,
source_file)
end
@content.scan(%r{rb_define_attr\(
\s*([\w\.]+),
\s*"([^"]+)",
\s*(\d+),
\s*(\d+)\s*\);
}xm) do |var_name, attr_name, attr_reader, attr_writer|
#var_name = "rb_cObject" if var_name == "rb_mKernel"
handle_attr(var_name, attr_name,
attr_reader.to_i != 0,
attr_writer.to_i != 0)
end
@content.scan(%r{rb_define_global_function\s*\(
\s*"([^"]+)",
\s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
\s*(-?\w+)\s*\)
(?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
}xm) do |meth_name, meth_body, param_count, source_file|
handle_method("method", "rb_mKernel", meth_name,
meth_body, param_count, source_file)
end
@content.scan(/define_filetest_function\s*\(
\s*"([^"]+)",
\s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
\s*(-?\w+)\s*\)/xm) do
|meth_name, meth_body, param_count|
handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count)
handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count)
end
end
def find_attr_comment(attr_name)
if @content =~ %r{((?>/\*.*?\*/\s+))
rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi
$1
elsif @content =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m
$1
else
''
end
end
##
# Find the C code corresponding to a Ruby method
def find_body(class_name, meth_name, meth_obj, body, quiet = false)
case body
when %r"((?>/\*.*?\*/\s*))((?:(?:static|SWIGINTERN)\s+)?(?:intern\s+)?VALUE\s+#{meth_name}
\s*(\([^)]*\))([^;]|$))"xm
comment = $1
body_text = $2
params = $3
remove_private_comments comment if comment
# see if we can find the whole body
re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}'
body_text = $& if /#{re}/m =~ body
# The comment block may have been overridden with a 'Document-method'
# block. This happens in the interpreter when multiple methods are
# vectored through to the same C method but those methods are logically
# distinct (for example Kernel.hash and Kernel.object_id share the same
# implementation
override_comment = find_override_comment class_name, meth_obj.name
comment = override_comment if override_comment
find_modifiers comment, meth_obj if comment
#meth_obj.params = params
meth_obj.start_collecting_tokens
tk = RDoc::RubyToken::Token.new nil, 1, 1
tk.set_text body_text
meth_obj.add_token tk
meth_obj.comment = strip_stars comment
when %r{((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+#{meth_name}\s+(\w+))}m
comment = $1
body_text = $2
find_body class_name, $3, meth_obj, body, true
find_modifiers comment, meth_obj
meth_obj.start_collecting_tokens
tk = RDoc::RubyToken::Token.new nil, 1, 1
tk.set_text body_text
meth_obj.add_token tk
meth_obj.comment = strip_stars(comment) + meth_obj.comment.to_s
when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
unless find_body(class_name, $1, meth_obj, body, true)
warn "No definition for #{meth_name}" unless @options.quiet
return false
end
else
# No body, but might still have an override comment
comment = find_override_comment(class_name, meth_obj.name)
if comment
find_modifiers(comment, meth_obj)
meth_obj.comment = strip_stars comment
else
warn "No definition for #{meth_name}" unless @options.quiet
return false
end
end
true
end
def find_class(raw_name, name)
unless @classes[raw_name]
if raw_name =~ /^rb_m/
container = @top_level.add_module RDoc::NormalModule, name
else
container = @top_level.add_class RDoc::NormalClass, name
end
container.record_location @top_level
@classes[raw_name] = container
end
@classes[raw_name]
end
##
# Look for class or module documentation above Init_+class_name+(void),
# in a Document-class +class_name+ (or module) comment or above an
# rb_define_class (or module). If a comment is supplied above a matching
# Init_ and a rb_define_class the Init_ comment is used.
#
# /*
# * This is a comment for Foo
# */
# Init_Foo(void) {
# VALUE cFoo = rb_define_class("Foo", rb_cObject);
# }
#
# /*
# * Document-class: Foo
# * This is a comment for Foo
# */
# Init_foo(void) {
# VALUE cFoo = rb_define_class("Foo", rb_cObject);
# }
#
# /*
# * This is a comment for Foo
# */
# VALUE cFoo = rb_define_class("Foo", rb_cObject);
def find_class_comment(class_name, class_mod)
comment = nil
if @content =~ %r{
((?>/\*.*?\*/\s+))
(static\s+)?
void\s+
Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)?\)}xmi then # )
comment = $1
elsif @content =~ %r{Document-(?:class|module):\s+#{class_name}\s*?(?:<\s+[:,\w]+)?\n((?>.*?\*/))}m then # "
comment = $1
elsif @content =~ %r{((?>/\*.*?\*/\s+))
([\w\.\s]+\s* = \s+)?rb_define_(class|module).*?"(#{class_name})"}xm then
comment = $1
end
return unless comment
comment = strip_stars comment
comment = look_for_directives_in class_mod, comment
class_mod.comment = comment
end
##
# Finds a comment matching +type+ and +const_name+ either above the
# comment or in the matching Document- section.
def find_const_comment(type, const_name)
if @content =~ %r{((?>^\s*/\*.*?\*/\s+))
rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi
$1
elsif @content =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m
$1
else
''
end
end
##
# If the comment block contains a section that looks like:
#
# call-seq:
# Array.new
# Array.new(10)
#
# use it for the parameters.
def find_modifiers(comment, meth_obj)
if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or
comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '')
meth_obj.document_self = false
end
if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or
comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '')
seq = $1
seq.gsub!(/^\s*\*\s*/, '')
meth_obj.call_seq = seq
end
end
def find_override_comment(class_name, meth_name)
name = Regexp.escape(meth_name)
if @content =~ %r{Document-method:\s+#{class_name}(?:\.|::|#)#{name}\s*?\n((?>.*?\*/))}m then
$1
elsif @content =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m then
$1
end
end
def handle_attr(var_name, attr_name, reader, writer)
rw = ''
rw << 'R' if reader
rw << 'W' if writer
class_name = @known_classes[var_name]
return unless class_name
class_obj = find_class(var_name, class_name)
if class_obj
comment = find_attr_comment(attr_name)
comment = strip_stars comment
att = RDoc::Attr.new '', attr_name, rw, comment
@stats.add_method att
class_obj.add_attribute(att)
end
end
def handle_class_module(var_name, type, class_name, parent, in_module)
parent_name = @known_classes[parent] || parent
if in_module then
enclosure = @classes[in_module] || @@enclosure_classes[in_module]
if enclosure.nil? and enclosure = @known_classes[in_module] then
type = /^rb_m/ =~ in_module ? "module" : "class"
handle_class_module in_module, type, enclosure, nil, nil
enclosure = @classes[in_module]
end
unless enclosure then
warn "Enclosing class/module '#{in_module}' for #{type} #{class_name} not known"
return
end
else
enclosure = @top_level
end
if type == "class" then
full_name = if RDoc::ClassModule === enclosure then
enclosure.full_name + "::#{class_name}"
else
class_name
end
if @content =~ %r{Document-class:\s+#{full_name}\s*<\s+([:,\w]+)} then
parent_name = $1
end
cm = enclosure.add_class RDoc::NormalClass, class_name, parent_name
@stats.add_class cm
else
cm = enclosure.add_module RDoc::NormalModule, class_name
@stats.add_module cm
end
cm.record_location enclosure.top_level
find_class_comment cm.full_name, cm
@classes[var_name] = cm
@@enclosure_classes[var_name] = cm
@known_classes[var_name] = cm.full_name
end
##
# Adds constant comments. By providing some_value: at the start ofthe
# comment you can override the C value of the comment to give a friendly
# definition.
#
# /* 300: The perfect score in bowling */
# rb_define_const(cFoo, "PERFECT", INT2FIX(300);
#
# Will override +INT2FIX(300)+ with the value +300+ in the output RDoc.
# Values may include quotes and escaped colons (\:).
def handle_constants(type, var_name, const_name, definition)
class_name = @known_classes[var_name]
return unless class_name
class_obj = find_class var_name, class_name
unless class_obj then
warn "Enclosing class/module #{const_name.inspect} not known"
return
end
comment = find_const_comment type, const_name
comment = strip_stars comment
comment = normalize_comment comment
# In the case of rb_define_const, the definition and comment are in
# "/* definition: comment */" form. The literal ':' and '\' characters
# can be escaped with a backslash.
if type.downcase == 'const' then
elements = comment.split ':'
if elements.nil? or elements.empty? then
con = RDoc::Constant.new const_name, definition, comment
else
new_definition = elements[0..-2].join(':')
if new_definition.empty? then # Default to literal C definition
new_definition = definition
else
new_definition.gsub!("\:", ":")
new_definition.gsub!("\\", '\\')
end
new_definition.sub!(/\A(\s+)/, '')
new_comment = if $1.nil? then
elements.last.lstrip
else
"#{$1}#{elements.last.lstrip}"
end
con = RDoc::Constant.new const_name, new_definition, new_comment
end
else
con = RDoc::Constant.new const_name, definition, comment
end
@stats.add_constant con
class_obj.add_constant con
end
##
# Removes #ifdefs that would otherwise confuse us
def handle_ifdefs_in(body)
body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m, '\1')
end
def handle_method(type, var_name, meth_name, meth_body, param_count,
source_file = nil)
class_name = @known_classes[var_name]
return unless class_name
class_obj = find_class var_name, class_name
if class_obj then
if meth_name == "initialize" then
meth_name = "new"
type = "singleton_method"
end
meth_obj = RDoc::AnyMethod.new '', meth_name
meth_obj.singleton = %w[singleton_method module_function].include? type
p_count = Integer(param_count) rescue -1
if p_count < 0 then
meth_obj.params = "(...)"
elsif p_count == 0
meth_obj.params = "()"
else
meth_obj.params = "(" + (1..p_count).map{|i| "p#{i}"}.join(", ") + ")"
end
if source_file then
file_name = File.join @file_dir, source_file
if File.exist? file_name then
body = (@@known_bodies[file_name] ||= File.read(file_name))
else
warn "unknown source #{source_file} for #{meth_name} in #{@file_name}"
end
else
body = @content
end
if find_body(class_name, meth_body, meth_obj, body) and meth_obj.document_self then
class_obj.add_method meth_obj
@stats.add_method meth_obj
meth_obj.visibility = :private if 'private_method' == type
end
end
end
def handle_tab_width(body)
if /\t/ =~ body
tab_width = @options.tab_width
body.split(/\n/).map do |line|
1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
line
end .join("\n")
else
body
end
end
##
# Look for directives in a normal comment block:
#
# /*
# * :title: My Awesome Project
# */
#
# This routine modifies it's parameter
def look_for_directives_in(context, comment)
preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include
preprocess.handle comment do |directive, param|
case directive
when 'main' then
@options.main_page = param
''
when 'title' then
@options.title = param
''
else
warn "Unrecognized directive :#{directive}:"
false
end
end
comment
end
##
# Removes lines that are commented out that might otherwise get picked up
# when scanning for classes and methods
def remove_commented_out_lines
@content.gsub!(%r{//.*rb_define_}, '//')
end
def remove_private_comments(comment)
comment.gsub!(/\/?\*--\n(.*?)\/?\*\+\+/m, '')
comment.sub!(/\/?\*--\n.*/m, '')
end
##
# Extract the classes/modules and methods from a C file and return the
# corresponding top-level object
def scan
remove_commented_out_lines
do_classes
do_constants
do_methods
do_includes
do_aliases
@top_level
end
end