module Linguist
  class Generated
    # Public: Is the blob a generated file?
    #
    # name - String filename
    # data - String blob data. A block also may be passed in for lazy
    #        loading. This behavior is deprecated and you should always
    #        pass in a String.
    #
    # Return true or false
    def self.generated?(name, data)
      new(name, data).generated?
    end

    # Internal: Initialize Generated instance
    #
    # name - String filename
    # data - String blob data
    def initialize(name, data)
      @name = name
      @extname = File.extname(name)
      @_data = data
    end

    attr_reader :name, :extname

    # Lazy load blob data if block was passed in.
    #
    # Awful, awful stuff happening here.
    #
    # Returns String data.
    def data
      @data ||= @_data.respond_to?(:call) ? @_data.call() : @_data
    end

    # Public: Get each line of data
    #
    # Returns an Array of lines
    def lines
      # TODO: data should be required to be a String, no nils
      @lines ||= data ? data.split("\n", -1) : []
    end

    # Internal: Is the blob a generated file?
    #
    # Generated source code is suppressed in diffs and is ignored by
    # language statistics.
    #
    # Please add additional test coverage to
    # `test/test_blob.rb#test_generated` if you make any changes.
    #
    # Return true or false
    def generated?
      xcode_file? ||
      generated_net_designer_file? ||
      generated_net_specflow_feature_file? ||
      composer_lock? ||
      node_modules? ||
      go_vendor? ||
      npm_shrinkwrap_or_package_lock? ||
      godeps? ||
      generated_by_zephir? ||
      minified_files? ||
      has_source_map? ||
      source_map? ||
      compiled_coffeescript? ||
      generated_parser? ||
      generated_net_docfile? ||
      generated_postscript? ||
      compiled_cython_file? ||
      generated_go? ||
      generated_protocol_buffer? ||
      generated_javascript_protocol_buffer? ||
      generated_apache_thrift? ||
      generated_jni_header? ||
      vcr_cassette? ||
      generated_module? ||
      generated_unity3d_meta? ||
      generated_racc? ||
      generated_jflex? ||
      generated_grammarkit? ||
      generated_roxygen2? ||
      generated_jison? ||
      generated_yarn_lock? ||
      generated_grpc_cpp?
    end

    # Internal: Is the blob an Xcode file?
    #
    # Generated if the file extension is an Xcode
    # file extension.
    #
    # Returns true of false.
    def xcode_file?
      ['.nib', '.xcworkspacedata', '.xcuserstate'].include?(extname)
    end

    # Internal: Is the blob minified files?
    #
    # Consider a file minified if the average line length is
    # greater then 110c.
    #
    # Currently, only JS and CSS files are detected by this method.
    #
    # Returns true or false.
    def minified_files?
      return unless ['.js', '.css'].include? extname
      if lines.any?
        (lines.inject(0) { |n, l| n += l.length } / lines.length) > 110
      else
        false
      end
    end

    # Internal: Does the blob contain a source map reference?
    #
    # We assume that if one of the last 2 lines starts with a source map
    # reference, then the current file was generated from other files.
    #
    # We use the last 2 lines because the last line might be empty.
    #
    # We only handle JavaScript, no CSS support yet.
    #
    # Returns true or false.
    def has_source_map?
      return false unless extname.downcase == '.js'
      lines.last(2).any? { |line| line.start_with?('//# sourceMappingURL') }
    end

    # Internal: Is the blob a generated source map?
    #
    # Source Maps usually have .css.map or .js.map extensions. In case they
    # are not following the name convention, detect them based on the content.
    #
    # Returns true or false.
    def source_map?
      return false unless extname.downcase == '.map'

      name =~ /(\.css|\.js)\.map$/i ||                 # Name convention
      lines[0] =~ /^{"version":\d+,/ ||                # Revision 2 and later begin with the version number
      lines[0] =~ /^\/\*\* Begin line maps\. \*\*\/{/  # Revision 1 begins with a magic comment
    end

    # Internal: Is the blob of JS generated by CoffeeScript?
    #
    # CoffeeScript is meant to output JS that would be difficult to
    # tell if it was generated or not. Look for a number of patterns
    # output by the CS compiler.
    #
    # Return true or false
    def compiled_coffeescript?
      return false unless extname == '.js'

      # CoffeeScript generated by > 1.2 include a comment on the first line
      if lines[0] =~ /^\/\/ Generated by /
        return true
      end

      if lines[0] == '(function() {' &&     # First line is module closure opening
          lines[-2] == '}).call(this);' &&  # Second to last line closes module closure
          lines[-1] == ''                   # Last line is blank

        score = 0

        lines.each do |line|
          if line =~ /var /
            # Underscored temp vars are likely to be Coffee
            score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count

            # bind and extend functions are very Coffee specific
            score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
          end
        end

        # Require a score of 3. This is fairly arbitrary. Consider
        # tweaking later.
        score >= 3
      else
        false
      end
    end

    # Internal: Is this a generated documentation file for a .NET assembly?
    #
    # .NET developers often check in the XML Intellisense file along with an
    # assembly - however, these don't have a special extension, so we have to
    # dig into the contents to determine if it's a docfile. Luckily, these files
    # are extremely structured, so recognizing them is easy.
    #
    # Returns true or false
    def generated_net_docfile?
      return false unless extname.downcase == ".xml"
      return false unless lines.count > 3

      # .NET Docfiles always open with <doc> and their first tag is an
      # <assembly> tag
      return lines[1].include?("<doc>") &&
        lines[2].include?("<assembly>") &&
        lines[-2].include?("</doc>")
    end

    # Internal: Is this a codegen file for a .NET project?
    #
    # Visual Studio often uses code generation to generate partial classes, and
    # these files can be quite unwieldy. Let's hide them.
    #
    # Returns true or false
    def generated_net_designer_file?
      name.downcase =~ /\.designer\.cs$/
    end

    # Internal: Is this a codegen file for Specflow feature file?
    #
    # Visual Studio's SpecFlow extension generates *.feature.cs files
    # from *.feature files, they are not meant to be consumed by humans.
    # Let's hide them.
    #
    # Returns true or false
    def generated_net_specflow_feature_file?
      name.downcase =~ /\.feature\.cs$/
    end

    # Internal: Is the blob of JS a parser generated by PEG.js?
    #
    # PEG.js-generated parsers are not meant to be consumed by humans.
    #
    # Return true or false
    def generated_parser?
      return false unless extname == '.js'

      # PEG.js-generated parsers include a comment near the top  of the file
      # that marks them as such.
      if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
        return true
      end

      false
    end

    # Internal: Is the blob of PostScript generated?
    #
    # PostScript files are often generated by other programs. If they tell us so,
    # we can detect them.
    #
    # Returns true or false.
    def generated_postscript?
      return false unless ['.ps', '.eps', '.pfa'].include? extname

      # Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
      # streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
      return true if data =~ /(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[\1<)\h{8,}\1/

      # We analyze the "%%Creator:" comment, which contains the author/generator
      # of the file. If there is one, it should be in one of the first few lines.
      creator = lines[0..9].find {|line| line =~ /^%%Creator: /}
      return false if creator.nil?

      # Most generators write their version number, while human authors' or companies'
      # names don't contain numbers. So look if the line contains digits. Also
      # look for some special cases without version numbers.
      return true if creator =~ /[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB/ ||
        creator =~ /PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops/

      # EAGLE doesn't include a version number when it generates PostScript.
      # However, it does prepend its name to the document's "%%Title" field.
      !!creator.include?("EAGLE") and lines[0..4].find {|line| line =~ /^%%Title: EAGLE Drawing /}
    end

    def generated_go?
      return false unless extname == '.go'
      return false unless lines.count > 1

      return lines[0].include?("Code generated by")
    end

    PROTOBUF_EXTENSIONS = ['.py', '.java', '.h', '.cc', '.cpp']

    # Internal: Is the blob a C++, Java or Python source file generated by the
    # Protocol Buffer compiler?
    #
    # Returns true of false.
    def generated_protocol_buffer?
      return false unless PROTOBUF_EXTENSIONS.include?(extname)
      return false unless lines.count > 1

      return lines[0].include?("Generated by the protocol buffer compiler.  DO NOT EDIT!")
    end

    # Internal: Is the blob a Javascript source file generated by the
    # Protocol Buffer compiler?
    #
    # Returns true of false.
    def generated_javascript_protocol_buffer?
      return false unless extname == ".js"
      return false unless lines.count > 6

      return lines[5].include?("GENERATED CODE -- DO NOT EDIT!")
    end

    APACHE_THRIFT_EXTENSIONS = ['.rb', '.py', '.go', '.js', '.m', '.java', '.h', '.cc', '.cpp', '.php']

    # Internal: Is the blob generated by Apache Thrift compiler?
    #
    # Returns true or false
    def generated_apache_thrift?
      return false unless APACHE_THRIFT_EXTENSIONS.include?(extname)
      return lines.first(6).any? { |l| l.include?("Autogenerated by Thrift Compiler") }
    end

    # Internal: Is the blob a C/C++ header generated by the Java JNI tool javah?
    #
    # Returns true of false.
    def generated_jni_header?
      return false unless extname == '.h'
      return false unless lines.count > 2

      return lines[0].include?("/* DO NOT EDIT THIS FILE - it is machine generated */") &&
               lines[1].include?("#include <jni.h>")
    end

    # Internal: Is the blob part of node_modules/, which are not meant for humans in pull requests.
    #
    # Returns true or false.
    def node_modules?
      !!name.match(/node_modules\//)
    end

    # Internal: Is the blob part of the Go vendor/ tree,
    # not meant for humans in pull requests.
    #
    # Returns true or false.
    def go_vendor?
      !!name.match(/vendor\/((?!-)[-0-9A-Za-z]+(?<!-)\.)+(com|edu|gov|in|me|net|org|fm|io)/)
    end

    # Internal: Is the blob a generated npm shrinkwrap or package lock file?
    #
    # Returns true or false.
    def npm_shrinkwrap_or_package_lock?
      name.match(/npm-shrinkwrap\.json/) || name.match(/package-lock\.json/)
    end

    # Internal: Is the blob part of Godeps/,
    # which are not meant for humans in pull requests.
    #
    # Returns true or false.
    def godeps?
      !!name.match(/Godeps\//)
    end

    # Internal: Is the blob a generated php composer lock file?
    #
    # Returns true or false.
    def composer_lock?
      !!name.match(/composer\.lock/)
    end

    # Internal: Is the blob generated by Zephir?
    #
    # Returns true or false.
    def generated_by_zephir?
      !!name.match(/.\.zep\.(?:c|h|php)$/)
    end

    # Is the blob a VCR Cassette file?
    #
    # Returns true or false
    def vcr_cassette?
      return false unless extname == '.yml'
      return false unless lines.count > 2
      # VCR Cassettes have "recorded_with: VCR" in the second last line.
      return lines[-2].include?("recorded_with: VCR")
    end

    # Internal: Is this a compiled C/C++ file from Cython?
    #
    # Cython-compiled C/C++ files typically contain:
    # /* Generated by Cython x.x.x on ... */
    # on the first line.
    #
    # Return true or false
    def compiled_cython_file?
      return false unless ['.c', '.cpp'].include? extname
      return false unless lines.count > 1
      return lines[0].include?("Generated by Cython")
    end

    # Internal: Is it a KiCAD or GFortran module file?
    #
    # KiCAD module files contain:
    # PCBNEW-LibModule-V1  yyyy-mm-dd h:mm:ss XM
    # on the first line.
    #
    # GFortran module files contain:
    # GFORTRAN module version 'x' created from
    # on the first line.
    #
    # Return true of false
    def generated_module?
      return false unless extname == '.mod'
      return false unless lines.count > 1
      return lines[0].include?("PCBNEW-LibModule-V") ||
              lines[0].include?("GFORTRAN module version '")
    end

    # Internal: Is this a metadata file from Unity3D?
    #
    # Unity3D Meta files start with:
    #   fileFormatVersion: X
    #   guid: XXXXXXXXXXXXXXX
    #
    # Return true or false
    def generated_unity3d_meta?
      return false unless extname == '.meta'
      return false unless lines.count > 1
      return lines[0].include?("fileFormatVersion: ")
    end

    # Internal: Is this a Racc-generated file?
    #
    # A Racc-generated file contains:
    # # This file is automatically generated by Racc x.y.z
    # on the third line.
    #
    # Return true or false
    def generated_racc?
      return false unless extname == '.rb'
      return false unless lines.count > 2
      return lines[2].start_with?("# This file is automatically generated by Racc")
    end

    # Internal: Is this a JFlex-generated file?
    #
    # A JFlex-generated file contains:
    # /* The following code was generated by JFlex x.y.z on d/at/e ti:me */
    # on the first line.
    #
    # Return true or false
    def generated_jflex?
      return false unless extname == '.java'
      return false unless lines.count > 1
      return lines[0].start_with?("/* The following code was generated by JFlex ")
    end

    # Internal: Is this a GrammarKit-generated file?
    #
    # A GrammarKit-generated file typically contain:
    # // This is a generated file. Not intended for manual editing.
    # on the first line. This is not always the case, as it's possible to
    # customize the class header.
    #
    # Return true or false
    def generated_grammarkit?
      return false unless extname == '.java'
      return false unless lines.count > 1
      return lines[0].start_with?("// This is a generated file. Not intended for manual editing.")
    end

    # Internal: Is this a roxygen2-generated file?
    #
    # A roxygen2-generated file typically contain:
    # % Generated by roxygen2: do not edit by hand
    # on the first line.
    #
    # Return true or false
    def generated_roxygen2?
      return false unless extname == '.Rd'
      return false unless lines.count > 1

      return lines[0].include?("% Generated by roxygen2: do not edit by hand")
    end

    # Internal: Is this a Jison-generated file?
    #
    # Jison-generated parsers typically contain:
    # /* parser generated by jison
    # on the first line.
    #
    # Jison-generated lexers typically contain:
    # /* generated by jison-lex
    # on the first line.
    #
    # Return true or false
    def generated_jison?
      return false unless extname == '.js'
      return false unless lines.count > 1
      return lines[0].start_with?("/* parser generated by jison ") ||
             lines[0].start_with?("/* generated by jison-lex ")
    end

    # Internal: Is the blob a generated yarn lockfile?
    #
    # Returns true or false.
    def generated_yarn_lock?
      return false unless name.match(/yarn\.lock/)
      return false unless lines.count > 0
      return lines[0].include?("# THIS IS AN AUTOGENERATED FILE")
    end

    # Internal: Is this a protobuf/grpc-generated C++ file?
    #
    # A generated file contains:
    # // Generated by the gRPC C++ plugin.
    # on the first line.
    #
    # Return true or false
    def generated_grpc_cpp?
      return false unless %w{.cpp .hpp .h .cc}.include? extname
      return false unless lines.count > 1
      return lines[0].start_with?("// Generated by the gRPC")
    end
  end
end