module Datasets
  class Ggplot2Dataset < Dataset
    def initialize(ggplot2_dataset_name)
      super()
      @ggplot2_dataset_name = ggplot2_dataset_name
      @metadata.url =
        "https://ggplot2.tidyverse.org/reference/#{@ggplot2_dataset_name}.html"
      @metadata.description = lambda do
        fetch_description
      end
    end

    def each
      return to_enum(__method__) unless block_given?

      data_base_name = "#{@ggplot2_dataset_name}.csv"
      data_path = cache_dir_path + data_base_name
      data_url = "#{download_base_url}/data-raw/#{data_base_name}"
      download(data_path, data_url)
      CSV.open(data_path, headers: :first_row, converters: :all) do |csv|
        record_class = self.class::Record
        csv.each do |row|
          record = record_class.new(*row.fields)
          yield record
        end
      end
    end

    private
    def download_base_url
      "https://raw.githubusercontent.com/tidyverse/ggplot2/main"
    end

    def fetch_description
      data_r_base_name = "data.R"
      data_r_path = cache_dir_path + data_r_base_name
      data_r_url = "#{download_base_url}/R/#{data_r_base_name}"
      download(data_r_path, data_r_url)
      descriptions = {}
      comment = ""
      File.open(data_r_path) do |data_r|
        data_r.each_line do |line|
          case line.chomp
          when /\A#'/
            comment_content = Regexp.last_match.post_match
            unless comment_content.empty?
              comment_content = comment_content[1..-1]
            end
            comment << comment_content
            comment << "\n"
          when /\A"(.+)"\z/
            name = Regexp.last_match[1]
            descriptions[name] = parse_roxygen(comment.rstrip)
            comment = ""
          end
        end
        descriptions[@ggplot2_dataset_name]
      end
    end

    def parse_roxygen(roxygen)
      column_name_mapping = self.class::COLUMN_NAME_MAPPING
      roxygen
        .gsub(/\\url\{(.*?)\}/, "\\1")
        .gsub(/^@format /, "")
        .gsub(/\\describe\{(.*)\}/m) do
        content = $1
        content.gsub(/\\item\{(.*?)\}\{(.*?)\}/m) do
          column_name = $1
          description = $2
          column_name = column_name_mapping[column_name] || column_name
          description = description
                          .gsub(/\\\$/, "$")
          "* #{column_name}: #{description}"
        end
      end
    end
  end
end