RubygemsResearch

Sha256: 2124588ced9525813f9df4b0ef8b25c0aa88ce809dfb941b0378cd1a1c4f0aa5

Contents?: true

Size: 1.67 KB

Versions: 1

Compression:

Stored size: 1.67 KB

# Get data and zip it up
# instantiate Species or Articles based on user input from CLI controller

class EndangeredSpecies::Scraper

  def get_species_index
    Nokogiri::HTML(open("https://www.worldwildlife.org/species/directory"))
  end

  def scrape_species_index
    self.get_species_index.css("table.lead.gutter-bottom-2.table-to-list tbody tr")
  end

  def make_species
    scrape_species_index.each do |content|
      species = EndangeredSpecies::Species.new

      species.name = content.css("td.keep a").first.text
      species.scientific = content.css("td em").text
      species.status = content.css("td").last.text
      species.url = "https://www.worldwildlife.org#{content.css("a").attr("href").text}"

      @doc = Nokogiri::HTML(open(species.url))
      @doc.search("div.wrapper.section-pop").each do |more_info|
        species.summary = more_info.css("p").text
        species.habitat = more_info.css("ul.list-data.list-spaced > li").text.gsub("\n\n", " ").gsub("\n", "").gsub("Places","Places:").gsub("Habitats", "\nHabitats:")
        species.save
      end
    end
  end

  def get_articles_index
    Nokogiri::HTML(open("https://www.worldwildlife.org/stories/"))
  end

  def scrape_articles_index
    self.get_articles_index.css("div.span9.gutter-horiz-in")
  end

  def make_articles
    scrape_articles_index.each do |content|
      articles = EndangeredSpecies::Articles.new
      articles.title = content.css("h2 a").text
      articles.date = content.css("em").text.gsub("WWF Magazine:","")
      articles.summary = content.css("div").text.gsub("\n","")
      articles.url = "https://www.worldwildlife.org#{content.css("a").attr("href").text}"
      articles.save
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version	Path
endangered_species-0.1.1	lib/endangered_species/scraper.rb