# Parts of this source were borrowed from `rdoc/parser/c.rb` # RDoc's license is packaged along with Ruby. module YARD module Parser class CParser def initialize(source, file = '(stdin)') @file = file @namespaces = {} @content = clean_source(source) end def parse parse_modules parse_classes parse_methods parse_includes end protected def ensure_loaded!(object, max_retries = 1) return if object.is_a?(CodeObjects::RootObject) if RUBY_PLATFORM =~ /java/ || defined?(::Rubinius) unless $NO_CONTINUATION_WARNING $NO_CONTINUATION_WARNING = true log.warn "JRuby/Rubinius do not implement Kernel#callcc and cannot " + "load files in order. You must specify the correct order manually." end raise NamespaceMissingError, object end retries = 0 context = callcc {|c| c } retries += 1 if object.is_a?(CodeObjects::Proxy) if retries <= max_retries log.debug "Missing object #{object} in file `#{@file}', moving it to the back of the line." raise Parser::LoadOrderError, context end end object end def handle_module(var_name, mod_name, in_module = nil) namespace = @namespaces[in_module] || (in_module ? P(in_module.gsub(/^rb_[mc]/, '')) : :root) ensure_loaded!(namespace) obj = CodeObjects::ModuleObject.new(namespace, mod_name) obj.add_file(@file) find_namespace_docstring(obj) @namespaces[var_name] = obj end def handle_class(var_name, class_name, parent, in_module = nil) parent = nil if parent == "0" namespace = @namespaces[in_module] || (in_module ? P(in_module.gsub(/^rb_[mc]/, '')) : :root) ensure_loaded!(namespace) obj = CodeObjects::ClassObject.new(namespace, class_name) obj.superclass = @namespaces[parent] || parent.gsub(/^rb_[mc]/, '') if parent obj.add_file(@file) find_namespace_docstring(obj) @namespaces[var_name] = obj end # @todo Handle +source_file+ def handle_method(scope, var_name, name, func_name, source_file = nil) case scope when "singleton_method", "module_function"; scope = :class else; scope = :instance end namespace = @namespaces[var_name] || P(var_name.gsub(/^rb_[mc]/, '')) ensure_loaded!(namespace) obj = CodeObjects::MethodObject.new(namespace, name, scope) obj.add_file(@file) obj.parameters = [] obj.docstring.add_tag(YARD::Tags::Tag.new(:return, '', 'Boolean')) if name =~ /\?$/ obj.source_type = :c find_method_body(obj, func_name) end def handle_constants(type, var_name, const_name, definition) namespace = @namespaces[var_name] obj = CodeObjects::ConstantObject.new(namespace, const_name) comment = find_constant_docstring(type, const_name) # In the case of rb_define_const, the definition and comment are in # "/* definition: comment */" form. The literal ':' and '\' characters # can be escaped with a backslash. if type.downcase == 'const' elements = comment.split(':') new_definition = elements[0..-2].join(':') if new_definition.empty? then # Default to literal C definition new_definition = definition else new_definition.gsub!("\:", ":") new_definition.gsub!("\\", '\\') end new_definition.sub!(/\A(\s+)/, '') comment = $1.nil? ? elements.last : "#{$1}#{elements.last.lstrip}" end obj.docstring = comment end def find_namespace_docstring(object) comment = nil if @content =~ %r{((?>/\*.*?\*/\s+)) (static\s+)?void\s+Init_#{object.name}\s*(?:_\(\s*)?\(\s*(?:void\s*)\)}xmi then comment = $1 elsif @content =~ %r{Document-(?:class|module):\s#{object.path}\s*?(?:<\s+[:,\w]+)?\n((?>.*?\*/))}m comment = $1 else if @content =~ /rb_define_(class|module)/m then comments = [] @content.split(/(\/\*.*?\*\/)\s*?\n/m).each_with_index do |chunk, index| comments[index] = chunk if chunk =~ /rb_define_(class|module).*?"(#{object.name})"/m then comment = comments[index-1] break end end end end object.docstring = parse_comments(object, comment) if comment end def find_constant_docstring(type, const_name) comments = if @content =~ %r{((?>^\s*/\*.*?\*/\s+)) rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi $1 elsif @content =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m $1 else '' end parse_comments(object, comments) end def find_method_body(object, func_name, content = @content) case content when %r"((?>/\*.*?\*/\s*))(?:(?:static|SWIGINTERN)\s+)?(?:intern\s+)?VALUE\s+#{func_name} \s*(\([^)]*\))([^;]|$)"xm comment, params = $1, $2 body_text = $& remove_private_comments(comment) if comment # see if we can find the whole body re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}' body_text = $& if /#{re}/m =~ content # The comment block may have been overridden with a 'Document-method' # block. This happens in the interpreter when multiple methods are # vectored through to the same C method but those methods are logically # distinct (for example Kernel.hash and Kernel.object_id share the same # implementation # override_comment = find_override_comment(object) # comment = override_comment if override_comment object.docstring = parse_comments(object, comment) if comment object.source = body_text when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{func_name}\s+(\w+)}m comment = $1 find_method_body(object, $2, content) else # No body, but might still have an override comment # comment = find_override_comment(object) comment = nil object.docstring = parse_comments(object, comment) if comment end end def parse_comments(object, comments) spaces = nil comments = remove_private_comments(comments) comments = comments.split(/\r?\n/).map do |line| line.gsub!(/^\s*\/?\*\/?/, '') line.gsub!(/\*\/\s*$/, '') if line =~ /^\s*$/ next if spaces.nil? next "" end spaces = (line[/^(\s+)/, 1] || "").size if spaces.nil? line.gsub(/^\s{0,#{spaces}}/, '').rstrip end.compact comments.shift if comments.first =~ /^\s*Document-method:/ comments = parse_callseq(object, comments) comments.join("\n") end def parse_callseq(object, comments) return comments unless comments[0] =~ /\Acall-seq:\s*(\S.+)?/ if $1 comments[0] = " #{$1}" else comments.shift end overloads = [] while comments.first =~ /^\s+(\S.+)/ || comments.first =~ /^\s*$/ line = comments.shift.strip next if line.empty? line.sub!(/^\w+[\.#]/, '') signature, types = *line.split(/ [-=]> /) types = parse_types(object, types) if signature.sub!(/\[?\s*(\{(?:\s*\|(.+?)\|)?.*\})\s*\]?\s*$/, '') && $1 blk, blkparams = $1, $2 else blk, blkparams = nil, nil end case signature when /^(\w+)\s*=\s+(\w+)/ signature = "#{$1}=(#{$2})" when /^\w+\s+\S/ signature = signature.split(/\s+/) signature = "#{signature[1]}#{signature[2] ? '(' + signature[2..-1].join(' ') + ')' : ''}" when /^\w+\[(.+?)\]\s*(=)?/ signature = "[]#{$2}(#{$1})" when /^\w+\s+(#{CodeObjects::METHODMATCH})\s+(\w+)/ signature = "#{$1}(#{$2})" end next unless signature =~ /^#{CodeObjects::METHODNAMEMATCH}/ signature = signature.rstrip overloads << "@overload #{signature}" overloads << " @yield [#{blkparams}]" if blk overloads << " @return [#{types.join(', ')}]" unless types.empty? end comments + [""] + overloads end def parse_types(object, types) if types =~ /true or false/ ["Boolean"] else (types||"").split(/,| or /).map do |t| case t.strip.gsub(/^an?_/, '') when "class"; "Class" when "obj", "object", "anObject"; "Object" when "arr", "array", "anArray", /^\[/; "Array" when "str", "string", "new_str"; "String" when "enum", "anEnumerator"; "Enumerator" when "exc", "exception"; "Exception" when "proc", "proc_obj", "prc"; "Proc" when "binding"; "Binding" when "hsh", "hash", "aHash"; "Hash" when "ios", "io"; "IO" when "file"; "File" when "float"; "Float" when "time", "new_time"; "Time" when "dir", "aDir"; "Dir" when "regexp", "new_regexp"; "Regexp" when "matchdata"; "MatchData" when "encoding"; "Encoding" when "fixnum", "fix"; "Fixnum" when "int", "integer", "Integer"; "Integer" when "num", "numeric", "Numeric", "number"; "Numeric" when "aBignum"; "Bignum" when "nil"; "nil" when "true"; "true" when "false"; "false" when "boolean", "Boolean"; "Boolean" when "self"; object.namespace.name.to_s when /^[-+]?\d/; t end end.compact end end def parse_modules @content.scan(/(\w+)\s* = \s*rb_define_module\s* \(\s*"(\w+)"\s*\)/mx) do |var_name, class_name| handle_module(var_name, class_name) end @content.scan(/(\w+)\s* = \s*rb_define_module_under\s* \( \s*(\w+), \s*"(\w+)" \s*\)/mx) do |var_name, in_module, class_name| handle_module(var_name, class_name, in_module) end end def parse_classes # The '.' lets us handle SWIG-generated files @content.scan(/([\w\.]+)\s* = \s*(?:rb_define_class|boot_defclass)\s* \( \s*"(\w+)", \s*(\w+|0)\s* \)/mx) do |var_name, class_name, parent| handle_class(var_name, class_name, parent) end @content.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s* \( \s*(\w+), \s*"(\w+)", \s*([\w\*\s\(\)\.\->]+)\s* # for SWIG \s*\)/mx) do |var_name, in_module, class_name, parent| handle_class(var_name, class_name, parent, in_module) end end def parse_methods @content.scan(%r{rb_define_ ( singleton_method | method | module_function | private_method ) \s*\(\s*([\w\.]+), \s*"([^"]+)", \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, \s*(-?\w+)\s*\) (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? }xm) do |type, var_name, name, func_name, param_count, source_file| # Ignore top-object and weird struct.c dynamic stuff next if var_name == "ruby_top_self" next if var_name == "nstr" next if var_name == "envtbl" var_name = "rb_cObject" if var_name == "rb_mKernel" handle_method(type, var_name, name, func_name, source_file) end @content.scan(%r{rb_define_global_function\s*\( \s*"([^"]+)", \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?, \s*(-?\w+)\s*\) (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))? }xm) do |name, func_name, param_count, source_file| handle_method("method", "rb_mKernel", name, func_name, source_file) end end def parse_includes @content.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |klass, mod| if klass = @namespaces[klass] mod = @namespaces[mod] || P(mod.gsub(/^rb_[mc]/, '')) klass.mixins(:instance) << mod end end end def parse_constants @content.scan(%r{\Wrb_define_ ( variable | readonly_variable | const | global_const | ) \s*\( (?:\s*(\w+),)? \s*"(\w+)", \s*(.*?)\s*\)\s*; }xm) do |type, var_name, const_name, definition| var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel" handle_constants(type, var_name, const_name, definition) end end private def clean_source(source) source = handle_ifdefs_in(source) source = handle_tab_width(source) source = remove_commented_out_lines(source) source end def handle_ifdefs_in(body) body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m, '\1') end def handle_tab_width(body) if /\t/ =~ body tab_width = 4 body.split(/\n/).map do |line| 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #` line end .join("\n") else body end end def remove_commented_out_lines(body) body.gsub(%r{//.*rb_define_}, '//') end def remove_private_comments(comment) comment = comment.gsub(/\/?\*--\n(.*?)\/?\*\+\+/m, '') comment = comment.sub(/\/?\*--\n.*/m, '') comment end end end end