lib/jets/builders/code_builder.rb in jets-0.10.4 vs lib/jets/builders/code_builder.rb in jets-1.0.0
- old
+ new
@@ -2,11 +2,10 @@
require "open-uri"
require "colorize"
require "socket"
require "net/http"
require "action_view"
-require "bundler" # for clean_old_submodules only
# Some important folders to help understand how jets builds a project:
#
# /tmp/jets: build root where different jets projects get built.
# /tmp/jets/project: each jets project gets built in a different subdirectory.
@@ -16,11 +15,11 @@
# cache: Gemfile is here, this is where we run bundle install.
# cache/bundled/gems: Vendored gems that get created as part of bundled install.
# Initially, macosx gems but then get replaced by linux gems where appropriate.
# cache/downloads/rubies: ruby tarballs.
# cache/downloads/gems: gem tarballs.
-# app_root: Where project gets copied into in order for us to configure it.
+# code: Where project gets copied into in order for us to configure it.
# app_root/bundled/gems: Where vendored gems finally end up at. The compiled
# gems at this point are only linux gems.
# artifacts/code/code-md5sha.zip: code artifact that gets uploaded to lambda.
#
# Building Steps:
@@ -38,136 +37,253 @@
#
### build bundled in cache area
# * bundle install: cache/bundled/gems
#
### setup bundled on app root from cache
-# * copy bundled to app_root: app_root/bundled
+# * copy bundled to code: code/bundled
# * extract linux ruby: cache/downloads/rubies:
# cache/bundled/rbenv, cache/bundled/linuxbrew
# * extract linux gems: cache/downloads/gems:
# cache/bundled/gems, cache/bundled/linuxbrew
-# * setup bundled config: app_root/.bundle/config
+# * setup bundled config: code/.bundle/config
#
### zip
# * create zip file
class Jets::Builders
class CodeBuilder
+ # https://docs.aws.amazon.com/lambda/latest/dg/limits.html
+ AWS_CODE_SIZE_LIMIT = 250 * 1024 * 1024 # 250MB
+
include Jets::Timing
- include ActionView::Helpers::NumberHelper # number_to_human_size
include Jets::AwsServices
+ include Util
+ extend Memoist
attr_reader :full_project_path
def initialize
# Expanding to the full path and capture now.
# Dir.chdir gets called later and we'll lose this info.
@full_project_path = File.expand_path(Jets.root) + "/"
end
def build
- return create_zip_file(fake=true) if ENV['TEST_CODE'] # early return
-
cache_check_message
check_ruby_version
clean_start
- compile_assets # easier to do before we copy the project
+ compile_assets # easier to do before we copy the project because node and yarn has been likely setup in the that dir
+ compile_rails_assets
copy_project
- Dir.chdir(full(tmp_app_root)) do
+ Dir.chdir(full(tmp_code)) do
# These commands run from project root
- start_app_root_setup
- bundle
- finish_app_root_setup
- create_zip_file
+ code_setup
+ package_ruby
+ code_finish
end
end
time :build
- # Finds out of the app has polymorphic functions only and zero ruby functions.
- # In this case, we can skip a lot of the ruby related building and speed up the
- # deploy process.
- def poly_only?
- return true if ENV['POLY_ONLY'] # bypass to allow rapid development of handlers
- Jets::Commands::Build.poly_only?
+ # Resolves the chicken-and-egg problem with md5 checksums. The handlers need
+ # to reference files with the md5 checksum. The files are the:
+ #
+ # jets/code/rack-checksum.zip
+ # jets/code/bundled-checksum.zip
+ #
+ # We compute the checksums before we generate the node shim handlers.
+ def calculate_md5s
+ Md5.compute! # populates Md5.checksums hash
end
- def start_app_root_setup
- tidy_project
+ def generate_node_shims
+ headline "Generating shims in the handlers folder."
+ # Crucial that the Dir.pwd is in the tmp_code because for
+ # Jets::Builders::app_files because Jets.boot set ups
+ # autoload_paths and this is how project classes are loaded.
+ Jets::Builders::HandlerGenerator.build!
+ end
+
+ def create_zip_files
+ folders = Md5.stage_folders
+ folders.each do |folder|
+ zip = Md5Zip.new(folder)
+ if exist_on_s3?(zip.md5_name)
+ puts "Already exists: s3://#{s3_bucket}/jets/code/#{zip.md5_name}"
+ else
+ zip = Md5Zip.new(folder)
+ zip.create
+ end
+ end
+ end
+ time :create_zip_files
+
+ def exist_on_s3?(filename)
+ s3_key = "jets/code/#{filename}"
+ begin
+ s3.head_object(bucket: s3_bucket, key: s3_key)
+ true
+ rescue Aws::S3::Errors::NotFound
+ false
+ end
+ end
+
+ # Moves code/bundled and code/rack to build_root.
+ # These files will be packaged separated and lazy loaded as part of the
+ # node shim. This keeps the code zipfile smaller in size and helps
+ # with the 250MB extract limited. /tmp permits up to 512MB.
+ # AWS Lambda Limits: https://amzn.to/2A7y6v6
+ #
+ # > Each Lambda function receives an additional 512MB of non-persistent disk space in its own /tmp directory. The /tmp directory can be used for loading additional resources like dependency libraries or data sets during function initialization.
+ #
+ def setup_tmp
+ tmp_symlink("bundled") if Jets.lazy_load?
+ tmp_symlink("rack")
+ end
+
+ def stage_area
+ "#{Jets.build_root}/stage"
+ end
+
+ # Moves folder to a stage folder and create a symlink its place
+ # that links from /var/task to /tmp. Example:
+ #
+ # /var/task/bundled => /tmp/bundled
+ #
+ def tmp_symlink(folder)
+ src = "#{full(tmp_code)}/#{folder}"
+ return unless File.exist?(src)
+
+ dest = "#{stage_area}/#{folder}"
+ dir = File.dirname(dest)
+ FileUtils.mkdir_p(dir) unless File.exist?(dir)
+ FileUtils.mv(src, dest)
+
+ # Create symlink
+ FileUtils.ln_sf("/tmp/#{folder}", "/#{full(tmp_code)}/#{folder}")
+ end
+
+ def code_setup
reconfigure_development_webpacker
- reconfigure_ruby_version
+ end
+ time :code_setup
+
+ def code_finish
+ update_lazy_load_config # at the top, must be called before Jets.lazy_load? is used
+ store_s3_base_url
+ setup_tmp
+ calculate_md5s # must be called before generate_node_shims and create_zip_files
generate_node_shims
+ create_zip_files
end
- time :start_app_root_setup
+ time :code_finish
- def finish_app_root_setup
- return if poly_only?
+ def update_lazy_load_config
+ size_limit = AWS_CODE_SIZE_LIMIT
+ code_size = dir_size(full(tmp_code))
+ if code_size > size_limit
+ # override the setting because we dont have to a choice but to lazy load
+ Jets.config.ruby.lazy_load = true
+ end
+ end
- copy_bundled_to_app_root
- setup_bundle_config
- extract_ruby
- extract_gems
- store_s3_base_url
+ # Thanks https://stackoverflow.com/questions/9354595/recursively-getting-the-size-of-a-directory
+ # Seems to overestimate a little bit but close enough.
+ def dir_size(folder)
+ Dir.glob(File.join(folder, '**', '*'))
+ .select { |f| File.file?(f) }
+ .map{ |f| File.size(f) }
+ .inject(:+)
end
- time :finish_app_root_setup
- # At this point the minimal stack exists.
+ # Store s3 base url is needed for asset serving from s3 later. Need to package this
+ # as part of the code so we have a reference to it.
+ # At this point the minimal stack exists, so we can grab it with the AWS API.
+ # We do not want to grab this as part of the live request because it is slow.
def store_s3_base_url
- IO.write("#{full(tmp_app_root)}/config/s3_base_url.txt", s3_base_url)
+ return if poly_only?
+
+ write_s3_base_url("config/s3_base_url.txt")
+ write_s3_base_url("rack/config/s3_base_url.txt") if Jets.rack?
end
+ def write_s3_base_url(relative_path)
+ full_path = "#{full(tmp_code)}/#{relative_path}"
+ FileUtils.mkdir_p(File.dirname(full_path))
+ IO.write(full_path, s3_base_url)
+ end
+
def s3_base_url
# Allow user to set assets.base_url
#
# Jets.application.configure do
# config.assets.base_url = "https://cloudfront.com/my/base/path"
# end
#
return Jets.config.assets.base_url if Jets.config.assets.base_url
- resp = cfn.describe_stacks(stack_name: Jets::Naming.parent_stack_name)
- stack = resp.stacks.first
- output = stack["outputs"].find { |o| o["output_key"] == "S3Bucket" }
- bucket_name = output["output_value"] # s3_bucket
region = Jets.aws.region
asset_base_url = "https://s3-#{region}.amazonaws.com"
- "#{asset_base_url}/#{bucket_name}/jets/public" # s3_base_url
+ "#{asset_base_url}/#{s3_bucket}/jets" # s3_base_url
end
- def lambdagem_options
- {
- s3: "lambdagems",
- build_root: cache_area, # used in lambdagem
- project_root: full(tmp_app_root), # used in gem_replacer and lambdagem
- }
+ def s3_bucket
+ Jets.aws.s3_bucket
end
- def extract_ruby
- headline "Setting up a vendored copy of ruby."
- Lambdagem.log_level = :info
- Lambdagem::Extract::Ruby.new(Jets::RUBY_VERSION, lambdagem_options).run
- end
-
- def extract_gems
- headline "Replacing compiled gems with AWS Lambda Linux compiled versions."
- GemReplacer.new(Jets::RUBY_VERSION, lambdagem_options).run
- end
-
- # This happens in the current app directory not the tmp app_root for simplicity
+ # This happens in the current app directory not the tmp code for simplicity.
+ # This is because the node and yarn has likely been set up correctly there.
def compile_assets
+ if ENV['JETS_SKIP_ASSETS']
+ puts "Skip compiling assets".colorize(:yellow) # useful for debugging
+ return
+ end
+
headline "Compling assets in current project directory"
# Thanks: https://stackoverflow.com/questions/4195735/get-list-of-gems-being-used-by-a-bundler-project
webpacker_loaded = Gem.loaded_specs.keys.include?("webpacker")
return unless webpacker_loaded
sh("yarn install")
- webpack_bin = File.exist?("#{Jets.root}bin/webpack") ?
+ webpack_command = File.exist?("#{Jets.root}bin/webpack") ?
"bin/webpack" :
`which webpack`.strip
- sh("JETS_ENV=#{Jets.env} #{webpack_bin}")
+ sh("JETS_ENV=#{Jets.env} #{webpack_command}")
end
time :compile_assets
+ # This happens in the current app directory not the tmp code for simplicity
+ # This is because the node likely been set up correctly there.
+ def compile_rails_assets
+ return unless rails?
+
+ if ENV['JETS_SKIP_ASSETS']
+ puts "Skip compiling rack assets".colorize(:yellow) # useful for debugging
+ return
+ end
+
+ return unless Jets.rack?
+
+ Bundler.with_clean_env do
+ rails_assets(:clobber)
+ rails_assets(:precompile)
+ end
+ end
+
+ def rails_assets(cmd)
+ # rake is available in both rails 4 and 5. rails command only in 5
+ command = "rake assets:#{cmd} --trace"
+ command = "RAILS_ENV=#{Jets.env} #{fulL_cmd}" unless Jets.env.development?
+ sh("cd rack && #{command}")
+ end
+
+ # Rudimentary rails detection
+ def rails?
+ config_ru = "#{Jets.root}rack/config.ru"
+ return false unless File.exist?(config_ru)
+ !IO.readlines(config_ru).grep(/Rails.application/).empty?
+ end
+
# Cleans out non-cached files like code-*.zip in Jets.build_root
# for a clean start. Also ensure that the /tmp/jets/project build root exists.
#
# Most files are kept around after the build process for inspection and
# debugging. So we have to clean out the files. But we only want to clean out
@@ -179,15 +295,18 @@
# Copy project into temporary directory. Do this so we can keep the project
# directory untouched and we can also remove a bunch of unnecessary files like
# logs before zipping it up.
def copy_project
- headline "Copying current project directory to temporary build area: #{full(tmp_app_root)}"
- FileUtils.rm_rf(full(tmp_app_root)) # remove current app_root folder
+ headline "Copying current project directory to temporary build area: #{full(tmp_code)}"
+ FileUtils.rm_rf(stage_area) # clear out from previous build
+ FileUtils.mkdir_p(stage_area)
+ FileUtils.rm_rf(full(tmp_code)) # remove current code folder
move_node_modules(Jets.root, Jets.build_root)
begin
- FileUtils.cp_r(@full_project_path, full(tmp_app_root))
+ # puts "cp -r #{@full_project_path} #{full(tmp_code)}".colorize(:yellow) # uncomment to debug
+ FileUtils.cp_r(@full_project_path, full(tmp_code))
ensure
move_node_modules(Jets.build_root, Jets.root) # move node_modules directory back
end
end
time :copy_project
@@ -203,242 +322,49 @@
if File.exist?(source)
FileUtils.mv(source, dest)
end
end
- # Because we're removing files (something dangerous) use full paths.
- def tidy_project
- headline "Tidying project: removing ignored files to reduce package size."
- excludes.each do |exclude|
- exclude = exclude.sub(%r{^/},'') # remove leading slash
- remove_path = "#{full(tmp_app_root)}/#{exclude}"
- FileUtils.rm_rf(remove_path)
- # puts " rm -rf #{remove_path}" # uncomment to debug
- end
- end
-
- def generate_node_shims
- headline "Generating node shims in the handlers folder."
- # Crucial that the Dir.pwd is in the tmp_app_root because for
- # Jets::Builders::app_files because Jets.boot set ups
- # autoload_paths and this is how project classes are loaded.
- Jets::Commands::Build.app_files.each do |path|
- handler = Jets::Builders::HandlerGenerator.new(path)
- handler.generate
- end
- end
-
# Bit hacky but this saves the user from accidentally forgetting to change this
# when they deploy a jets project in development mode
def reconfigure_development_webpacker
return unless Jets.env.development?
headline "Reconfiguring webpacker development settings for AWS Lambda."
- webpacker_yml = "#{full(tmp_app_root)}/config/webpacker.yml"
+ webpacker_yml = "#{full(tmp_code)}/config/webpacker.yml"
return unless File.exist?(webpacker_yml)
config = YAML.load_file(webpacker_yml)
config["development"]["compile"] = false # force this to be false for deployment
new_yaml = YAML.dump(config)
IO.write(webpacker_yml, new_yaml)
end
- # This is in case the user has a 2.5.x variant.
- # Force usage of ruby version that jets supports
- # The lambda server only has ruby 2.5.0 installed.
- def reconfigure_ruby_version
- ruby_version = "#{full(tmp_app_root)}/.ruby-version"
- IO.write(ruby_version, Jets::RUBY_VERSION)
+ def ruby_packager
+ RubyPackager.new(tmp_code)
end
+ memoize :ruby_packager
- def copy_bundled_to_app_root
- app_root_bundled = "#{full(tmp_app_root)}/bundled"
- if File.exist?(app_root_bundled)
- puts "Removing current bundled from project"
- FileUtils.rm_rf(app_root_bundled)
- end
- # Leave #{Jets.build_root}/bundled behind to act as cache
- FileUtils.cp_r("#{cache_area}/bundled", app_root_bundled)
+ def rack_packager
+ RackPackager.new("#{tmp_code}/rack")
end
+ memoize :rack_packager
- def setup_bundle_config
- ensure_build_cache_bundle_config_exists!
-
- # Override project's .bundle/config and ensure that .bundle/config matches
- # at these 2 spots:
- # app_root/.bundle/config
- # bundled/gems/.bundle/config
- cache_bundle_config = "#{cache_area}/.bundle/config"
- app_bundle_config = "#{full(tmp_app_root)}/.bundle/config"
- FileUtils.mkdir_p(File.dirname(app_bundle_config))
- FileUtils.cp(cache_bundle_config, app_bundle_config)
+ def package_ruby
+ ruby_packager.install
+ reconfigure_rails
+ rack_packager.install
+ ruby_packager.finish
+ rack_packager.finish
end
+ time :package_ruby
- # On circleci the "#{Jets.build_root}/.bundle/config" doesnt exist
- # this only happens with ssh debugging, not when the ci.sh script gets ran.
- # But on macosx it exists.
- # Dont know why this is the case.
- def ensure_build_cache_bundle_config_exists!
- text =<<-EOL
----
-BUNDLE_PATH: "bundled/gems"
-BUNDLE_WITHOUT: "development:test"
-EOL
- bundle_config = "#{cache_area}/.bundle/config"
- FileUtils.mkdir_p(File.dirname(bundle_config))
- IO.write(bundle_config, text)
+ # TODO: Move logic into plugin instead
+ def reconfigure_rails
+ ReconfigureRails.new("#{full(tmp_code)}/rack").run
end
- def create_zip_file(fake=nil)
- headline "Creating zip file."
- temp_code_zipfile = "#{Jets.build_root}/code/code-temp.zip"
- FileUtils.mkdir_p(File.dirname(temp_code_zipfile))
-
- # Use fake if testing CloudFormation only
- if fake
- hello_world = "/tmp/hello.js"
- puts "Uploading tiny #{hello_world} file to S3 for quick testing.".colorize(:red)
- code = IO.read(File.expand_path("../node-hello.js", __FILE__))
- IO.write(hello_world, code)
- command = "zip --symlinks -rq #{temp_code_zipfile} #{hello_world}"
- else
- # https://serverfault.com/questions/265675/how-can-i-zip-compress-a-symlink
- command = "cd #{full(tmp_app_root)} && zip --symlinks -rq #{temp_code_zipfile} ."
- end
-
- sh(command)
-
- # we can get the md5 only after the file has been created
- md5 = Digest::MD5.file(temp_code_zipfile).to_s[0..7]
- md5_zip_dest = "#{Jets.build_root}/code/code-#{md5}.zip"
- FileUtils.mkdir_p(File.dirname(md5_zip_dest))
- FileUtils.mv(temp_code_zipfile, md5_zip_dest)
- # mv /tmp/jets/demo/code/code-temp.zip /tmp/jets/demo/code/code-a8a604aa.zip
-
- file_size = number_to_human_size(File.size(md5_zip_dest))
- puts "Zip file with code and bundled linux ruby created at: #{md5_zip_dest.colorize(:green)} (#{file_size})"
-
- # Save state
- IO.write("#{Jets.build_root}/code/current-md5-filename.txt", md5_zip_dest)
- # Much later: ship, base_child_builder need set an s3_key which requires
- # the md5_zip_dest.
- # It is a pain to pass this all the way up from the
- # CodeBuilder class.
- # Let's store the "/tmp/jets/demo/code/code-a8a604aa.zip" into a
- # file that can be read from any places where this is needed.
- # Can also just generate a "fake file" for specs
- end
- time :create_zip_file
-
- def bundle
- clean_old_submodules
- bundle_install
- end
- time :bundle
-
- # Installs gems on the current target system: both compiled and non-compiled.
- # If user is on a macosx machine, macosx gems will be installed.
- # If user is on a linux machine, linux gems will be installed.
- #
- # Copies Gemfile* to /tmp/jetss/demo/bundled folder and installs
- # gems with bundle install from there.
- #
- # We take the time to copy Gemfile and bundle into a separate directory
- # because it gets left around to act as a 'cache'. So, when the builds the
- # project gets built again not all the gems from get installed from the
- # beginning.
- def bundle_install
- return if poly_only?
-
- headline "Bundling: running bundle install in cache area: #{cache_area}."
-
- copy_gemfiles
-
- require "bundler" # dynamically require bundler so user can use any bundler
- Bundler.with_clean_env do
- # cd /tmp/jets/demo
- sh(
- "cd #{cache_area} && " \
- "env BUNDLE_IGNORE_CONFIG=1 bundle install --path bundled/gems --without development test"
- )
- end
-
- puts 'Bundle install success.'
- end
-
- # When using submodules, bundler leaves old submodules behind. Over time this inflates
- # the size of the the bundled gems. So we'll clean it up.
- def clean_old_submodules
- # https://stackoverflow.com/questions/38800129/parsing-a-gemfile-lock-with-bundler
- lockfile = "#{cache_area}/Gemfile.lock"
- return unless File.exist?(lockfile)
-
- parser = Bundler::LockfileParser.new(Bundler.read_file(lockfile))
- specs = parser.specs
-
- # specs = Bundler.load.specs
- # IE: spec.source.to_s: "https://github.com/tongueroo/webpacker.git (at jets@a8c4661)"
- submoduled_specs = specs.select do |spec|
- spec.source.to_s =~ /@\w+\)/
- end
-
- # find git shas to keep
- # IE: ["a8c4661", "abc4661"]
- git_shas = submoduled_specs.map do |spec|
- md = spec.source.to_s.match(/@(\w+)\)/)
- git_sha = md[1]
- end
-
- # IE: /tmp/jets/demo/cache/bundled/gems/ruby/2.5.0/bundler/gems/webpacker-a8c46614c675
- Dir.glob("#{cache_area}/bundled/gems/ruby/2.5.0/bundler/gems/*").each do |path|
- sha = path.split('-').last[0..6] # only first 7 chars of the git sha
- unless git_shas.include?(sha)
- puts "Removing old submoduled gem: #{path}"
- FileUtils.rm_rf(path) # REMOVE old submodule directory
- end
- end
- end
-
- def copy_gemfiles
- FileUtils.mkdir_p(cache_area)
- FileUtils.cp("#{@full_project_path}Gemfile", "#{cache_area}/Gemfile")
- FileUtils.cp("#{@full_project_path}Gemfile.lock", "#{cache_area}/Gemfile.lock")
- end
-
- def excludes
- excludes = %w[.git tmp log spec]
- excludes += get_excludes("#{full(tmp_app_root)}/.gitignore")
- excludes += get_excludes("#{full(tmp_app_root)}/.dockerignore")
- excludes = excludes.reject do |p|
- jetskeep.find do |keep|
- p.include?(keep)
- end
- end
- excludes
- end
-
- def get_excludes(file)
- path = file
- return [] unless File.exist?(path)
-
- exclude = File.read(path).split("\n")
- exclude.map {|i| i.strip}.reject {|i| i =~ /^#/ || i.empty?}
- # IE: ["/handlers", "/bundled*", "/vendor/jets]
- end
-
- # We clean out ignored files pretty aggressively. So provide
- # a way for users to keep files from being cleaned ou.
- def jetskeep
- defaults = %w[pack handlers]
- path = Jets.root + ".jetskeep"
- return defaults unless path.exist?
-
- keep = path.read.split("\n")
- keep = keep.map {|i| i.strip}.reject {|i| i =~ /^#/ || i.empty?}
- (defaults + keep).uniq
- end
-
def cache_check_message
if File.exist?("#{Jets.build_root}/cache")
puts "The #{Jets.build_root}/cache folder exists. Incrementally re-building the jets using the cache. To clear the cache: rm -rf #{Jets.build_root}/cache"
end
end
@@ -459,36 +385,15 @@
jets = {major: md[1], minor: md[2]}
ruby[:major] == jets[:major] && ruby[:minor] == jets[:minor]
end
- def cache_area
- "#{Jets.build_root}/cache" # cleaner to use full path for this setting
- end
-
- # Provide pretty clear way to desinate full path.
- # full("bundled") => /tmp/jets/demo/bundled
- def full(relative_path)
- "#{Jets.build_root}/#{relative_path}"
- end
-
# Group all the path settings together here
- def self.tmp_app_root
- Jets::Commands::Build.tmp_app_root
+ def self.tmp_code
+ Jets::Commands::Build.tmp_code
end
- def tmp_app_root
- self.class.tmp_app_root
- end
-
- def sh(command)
- puts "=> #{command}".colorize(:green)
- success = system(command)
- abort("#{command} failed to run") unless success
- success
- end
-
- def headline(message)
- puts "=> #{message}".colorize(:cyan)
+ def tmp_code
+ self.class.tmp_code
end
end
end