{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# RedAmber Examples\n", "\n", "This notebook walks through the [README of RedAmber](https://github.com/heronshoes/red_amber#readme)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## `RedAmber::DataFrame`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "require 'red_amber'\n", "include RedAmber\n", "require 'datasets-arrow'\n", "\n", "{RedAmber: VERSION, Datasets: Datasets::VERSION}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example: diamonds dataset\n", "\n", "For the first loading of Datasets::Diamonds, it will take some time to download." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "dataset = Datasets::Diamonds.new\n", "diamonds = DataFrame.new(dataset)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "df = diamonds\n", " .slice { carat > 1 } # or use #filter instead of #slice\n", " .group(:cut)\n", " .mean(:price) # `pick` prior to `group` is not required if `:price` is specified here.\n", " .sort('-mean(price)')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "usdjpy = 110.0 # when the yen was stronger\n", "\n", "df.rename('mean(price)': :mean_price_USD)\n", " .assign(:mean_price_JPY) { mean_price_USD * usdjpy }" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "## Example: starwars dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "uri = URI('https://vincentarelbundock.github.io/Rdatasets/csv/dplyr/starwars.csv')\n", "\n", "starwars = DataFrame.load(uri)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "starwars\n", " .drop(0) # delete unnecessary index column\n", " .remove { species == \"NA\" } # delete unnecessary rows\n", " .group(:species) { [count(:species), mean(:height, :mass)] }\n", " .slice { count > 1 } # or use #filter instead of slice" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## `RedAmber::Vector`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "penguins = DataFrame.new(Datasets::Penguins.new)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "penguins[:bill_length_mm]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "penguins[:bill_length_mm] < 40" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ "penguins[:bill_length_mm].mean" ] } ], "metadata": { "kernelspec": { "display_name": "Ruby 3.0.2", "language": "ruby", "name": "ruby" }, "language_info": { "file_extension": ".rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "3.0.2" } }, "nbformat": 4, "nbformat_minor": 4 }