=begin #Carbon #Connect external data to LLMs, no matter the source. The version of the OpenAPI document: 1.0.0 =end require 'date' require 'time' module Carbon class WebscrapeRequest attr_accessor :tags attr_accessor :url attr_accessor :recursion_depth attr_accessor :max_pages_to_scrape attr_accessor :chunk_size attr_accessor :chunk_overlap attr_accessor :skip_embedding_generation attr_accessor :enable_auto_sync attr_accessor :generate_sparse_vectors attr_accessor :prepend_filename_to_chunks attr_accessor :html_tags_to_skip attr_accessor :css_classes_to_skip attr_accessor :css_selectors_to_skip attr_accessor :embedding_model # URL subpaths or directories that you want to include. For example if you want to only include URLs that start with /questions in stackoverflow.com, you will add /questions/ in this input attr_accessor :url_paths_to_include # Whether the scraper should download css and media from the page (images, fonts, etc). Scrapes might take longer to finish with this flag enabled, but the success rate is improved. attr_accessor :download_css_and_media # If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag. attr_accessor :generate_chunks_only # Attribute mapping from ruby-style variable name to JSON key. def self.attribute_map { :'tags' => :'tags', :'url' => :'url', :'recursion_depth' => :'recursion_depth', :'max_pages_to_scrape' => :'max_pages_to_scrape', :'chunk_size' => :'chunk_size', :'chunk_overlap' => :'chunk_overlap', :'skip_embedding_generation' => :'skip_embedding_generation', :'enable_auto_sync' => :'enable_auto_sync', :'generate_sparse_vectors' => :'generate_sparse_vectors', :'prepend_filename_to_chunks' => :'prepend_filename_to_chunks', :'html_tags_to_skip' => :'html_tags_to_skip', :'css_classes_to_skip' => :'css_classes_to_skip', :'css_selectors_to_skip' => :'css_selectors_to_skip', :'embedding_model' => :'embedding_model', :'url_paths_to_include' => :'url_paths_to_include', :'download_css_and_media' => :'download_css_and_media', :'generate_chunks_only' => :'generate_chunks_only' } end # Returns all the JSON keys this model knows about def self.acceptable_attributes attribute_map.values end # Attribute type mapping. def self.openapi_types { :'tags' => :'Hash', :'url' => :'String', :'recursion_depth' => :'Integer', :'max_pages_to_scrape' => :'Integer', :'chunk_size' => :'Integer', :'chunk_overlap' => :'Integer', :'skip_embedding_generation' => :'Boolean', :'enable_auto_sync' => :'Boolean', :'generate_sparse_vectors' => :'Boolean', :'prepend_filename_to_chunks' => :'Boolean', :'html_tags_to_skip' => :'Array', :'css_classes_to_skip' => :'Array', :'css_selectors_to_skip' => :'Array', :'embedding_model' => :'EmbeddingGenerators', :'url_paths_to_include' => :'Array', :'download_css_and_media' => :'Boolean', :'generate_chunks_only' => :'Boolean' } end # List of attributes with nullable: true def self.openapi_nullable Set.new([ :'tags', :'recursion_depth', :'max_pages_to_scrape', :'chunk_size', :'chunk_overlap', :'skip_embedding_generation', :'enable_auto_sync', :'generate_sparse_vectors', :'prepend_filename_to_chunks', :'html_tags_to_skip', :'css_classes_to_skip', :'css_selectors_to_skip', :'url_paths_to_include', :'download_css_and_media', ]) end # Initializes the object # @param [Hash] attributes Model attributes in the form of hash def initialize(attributes = {}) if (!attributes.is_a?(Hash)) fail ArgumentError, "The input argument (attributes) must be a hash in `Carbon::WebscrapeRequest` initialize method" end # check to see if the attribute exists and convert string to symbol for hash key attributes = attributes.each_with_object({}) { |(k, v), h| if (!self.class.attribute_map.key?(k.to_sym)) fail ArgumentError, "`#{k}` is not a valid attribute in `Carbon::WebscrapeRequest`. Please check the name to make sure it's valid. List of attributes: " + self.class.attribute_map.keys.inspect end h[k.to_sym] = v } if attributes.key?(:'tags') if (value = attributes[:'tags']).is_a?(Hash) self.tags = value end end if attributes.key?(:'url') self.url = attributes[:'url'] end if attributes.key?(:'recursion_depth') self.recursion_depth = attributes[:'recursion_depth'] else self.recursion_depth = 3 end if attributes.key?(:'max_pages_to_scrape') self.max_pages_to_scrape = attributes[:'max_pages_to_scrape'] else self.max_pages_to_scrape = 100 end if attributes.key?(:'chunk_size') self.chunk_size = attributes[:'chunk_size'] else self.chunk_size = 1500 end if attributes.key?(:'chunk_overlap') self.chunk_overlap = attributes[:'chunk_overlap'] else self.chunk_overlap = 20 end if attributes.key?(:'skip_embedding_generation') self.skip_embedding_generation = attributes[:'skip_embedding_generation'] else self.skip_embedding_generation = false end if attributes.key?(:'enable_auto_sync') self.enable_auto_sync = attributes[:'enable_auto_sync'] else self.enable_auto_sync = false end if attributes.key?(:'generate_sparse_vectors') self.generate_sparse_vectors = attributes[:'generate_sparse_vectors'] else self.generate_sparse_vectors = false end if attributes.key?(:'prepend_filename_to_chunks') self.prepend_filename_to_chunks = attributes[:'prepend_filename_to_chunks'] else self.prepend_filename_to_chunks = false end if attributes.key?(:'html_tags_to_skip') if (value = attributes[:'html_tags_to_skip']).is_a?(Array) self.html_tags_to_skip = value end end if attributes.key?(:'css_classes_to_skip') if (value = attributes[:'css_classes_to_skip']).is_a?(Array) self.css_classes_to_skip = value end end if attributes.key?(:'css_selectors_to_skip') if (value = attributes[:'css_selectors_to_skip']).is_a?(Array) self.css_selectors_to_skip = value end end if attributes.key?(:'embedding_model') self.embedding_model = attributes[:'embedding_model'] else self.embedding_model = 'OPENAI' end if attributes.key?(:'url_paths_to_include') if (value = attributes[:'url_paths_to_include']).is_a?(Array) self.url_paths_to_include = value end end if attributes.key?(:'download_css_and_media') self.download_css_and_media = attributes[:'download_css_and_media'] else self.download_css_and_media = false end if attributes.key?(:'generate_chunks_only') self.generate_chunks_only = attributes[:'generate_chunks_only'] else self.generate_chunks_only = false end end # Show invalid properties with the reasons. Usually used together with valid? # @return Array for valid properties with the reasons def list_invalid_properties invalid_properties = Array.new if @url.nil? invalid_properties.push('invalid value for "url", url cannot be nil.') end if !@recursion_depth.nil? && @recursion_depth < 0 invalid_properties.push('invalid value for "recursion_depth", must be greater than or equal to 0.') end if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1 invalid_properties.push('invalid value for "max_pages_to_scrape", must be greater than or equal to 1.') end if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10 invalid_properties.push('invalid value for "url_paths_to_include", number of items must be less than or equal to 10.') end invalid_properties end # Check to see if the all the properties in the model are valid # @return true if the model is valid def valid? return false if @url.nil? return false if !@recursion_depth.nil? && @recursion_depth < 0 return false if !@max_pages_to_scrape.nil? && @max_pages_to_scrape < 1 return false if !@url_paths_to_include.nil? && @url_paths_to_include.length > 10 true end # Custom attribute writer method with validation # @param [Object] recursion_depth Value to be assigned def recursion_depth=(recursion_depth) if !recursion_depth.nil? && recursion_depth < 0 fail ArgumentError, 'invalid value for "recursion_depth", must be greater than or equal to 0.' end @recursion_depth = recursion_depth end # Custom attribute writer method with validation # @param [Object] max_pages_to_scrape Value to be assigned def max_pages_to_scrape=(max_pages_to_scrape) if !max_pages_to_scrape.nil? && max_pages_to_scrape < 1 fail ArgumentError, 'invalid value for "max_pages_to_scrape", must be greater than or equal to 1.' end @max_pages_to_scrape = max_pages_to_scrape end # Custom attribute writer method with validation # @param [Object] url_paths_to_include Value to be assigned def url_paths_to_include=(url_paths_to_include) if !url_paths_to_include.nil? && url_paths_to_include.length > 10 fail ArgumentError, 'invalid value for "url_paths_to_include", number of items must be less than or equal to 10.' end @url_paths_to_include = url_paths_to_include end # Checks equality by comparing each attribute. # @param [Object] Object to be compared def ==(o) return true if self.equal?(o) self.class == o.class && tags == o.tags && url == o.url && recursion_depth == o.recursion_depth && max_pages_to_scrape == o.max_pages_to_scrape && chunk_size == o.chunk_size && chunk_overlap == o.chunk_overlap && skip_embedding_generation == o.skip_embedding_generation && enable_auto_sync == o.enable_auto_sync && generate_sparse_vectors == o.generate_sparse_vectors && prepend_filename_to_chunks == o.prepend_filename_to_chunks && html_tags_to_skip == o.html_tags_to_skip && css_classes_to_skip == o.css_classes_to_skip && css_selectors_to_skip == o.css_selectors_to_skip && embedding_model == o.embedding_model && url_paths_to_include == o.url_paths_to_include && download_css_and_media == o.download_css_and_media && generate_chunks_only == o.generate_chunks_only end # @see the `==` method # @param [Object] Object to be compared def eql?(o) self == o end # Calculates hash code according to all attributes. # @return [Integer] Hash code def hash [tags, url, recursion_depth, max_pages_to_scrape, chunk_size, chunk_overlap, skip_embedding_generation, enable_auto_sync, generate_sparse_vectors, prepend_filename_to_chunks, html_tags_to_skip, css_classes_to_skip, css_selectors_to_skip, embedding_model, url_paths_to_include, download_css_and_media, generate_chunks_only].hash end # Builds the object from hash # @param [Hash] attributes Model attributes in the form of hash # @return [Object] Returns the model itself def self.build_from_hash(attributes) new.build_from_hash(attributes) end # Builds the object from hash # @param [Hash] attributes Model attributes in the form of hash # @return [Object] Returns the model itself def build_from_hash(attributes) return nil unless attributes.is_a?(Hash) attributes = attributes.transform_keys(&:to_sym) self.class.openapi_types.each_pair do |key, type| if attributes[self.class.attribute_map[key]].nil? && self.class.openapi_nullable.include?(key) self.send("#{key}=", nil) elsif type =~ /\AArray<(.*)>/i # check to ensure the input is an array given that the attribute # is documented as an array but the input is not if attributes[self.class.attribute_map[key]].is_a?(Array) self.send("#{key}=", attributes[self.class.attribute_map[key]].map { |v| _deserialize($1, v) }) end elsif !attributes[self.class.attribute_map[key]].nil? self.send("#{key}=", _deserialize(type, attributes[self.class.attribute_map[key]])) end end self end # Deserializes the data based on type # @param string type Data type # @param string value Value to be deserialized # @return [Object] Deserialized data def _deserialize(type, value) case type.to_sym when :Time Time.parse(value) when :Date Date.parse(value) when :String value.to_s when :Integer value.to_i when :Float value.to_f when :Boolean if value.to_s =~ /\A(true|t|yes|y|1)\z/i true else false end when :Object # generic object (usually a Hash), return directly value when /\AArray<(?.+)>\z/ inner_type = Regexp.last_match[:inner_type] value.map { |v| _deserialize(inner_type, v) } when /\AHash<(?.+?), (?.+)>\z/ k_type = Regexp.last_match[:k_type] v_type = Regexp.last_match[:v_type] {}.tap do |hash| value.each do |k, v| hash[_deserialize(k_type, k)] = _deserialize(v_type, v) end end else # model # models (e.g. Pet) or oneOf klass = Carbon.const_get(type) klass.respond_to?(:openapi_one_of) ? klass.build(value) : klass.build_from_hash(value) end end # Returns the string representation of the object # @return [String] String presentation of the object def to_s to_hash.to_s end # to_body is an alias to to_hash (backward compatibility) # @return [Hash] Returns the object in the form of hash def to_body to_hash end # Returns the object in the form of hash # @return [Hash] Returns the object in the form of hash def to_hash hash = {} self.class.attribute_map.each_pair do |attr, param| value = self.send(attr) if value.nil? is_nullable = self.class.openapi_nullable.include?(attr) next if !is_nullable || (is_nullable && !instance_variable_defined?(:"@#{attr}")) end hash[param] = _to_hash(value) end hash end # Outputs non-array value in the form of hash # For object, use to_hash. Otherwise, just return the value # @param [Object] value Any valid value # @return [Hash] Returns the value in the form of hash def _to_hash(value) if value.is_a?(Array) value.compact.map { |v| _to_hash(v) } elsif value.is_a?(Hash) {}.tap do |hash| value.each { |k, v| hash[k] = _to_hash(v) } end elsif value.respond_to? :to_hash value.to_hash else value end end end end