=begin #Carbon #Connect external data to LLMs, no matter the source. The version of the OpenAPI document: 1.0.0 =end require 'spec_helper' require 'json' # Unit tests for Carbon::FilesApi describe 'FilesApi' do before do # run before each test @api_instance = Carbon::FilesApi.new end after do # run after each test end describe 'test an instance of FilesApi' do it 'should create an instance of FilesApi' do expect(@api_instance).to be_instance_of(Carbon::FilesApi) end end # unit tests for create_user_file_tags # Create File Tags # A tag is a key-value pair that can be added to a file. This pair can then be used for searches (e.g. embedding searches) in order to narrow down the scope of the search. A file can have any number of tags. The following are reserved keys that cannot be used: - db_embedding_id - organization_id - user_id - organization_user_file_id Carbon currently supports two data types for tag values - `string` and `list<string>`. Keys can only be `string`. If values other than `string` and `list<string>` are used, they're automatically converted to strings (e.g. 4 will become \"4\"). # @param organization_user_file_tag_create # @param [Hash] opts the optional parameters # @return [UserFile] describe 'create_user_file_tags test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for delete # Delete File Endpoint # @param file_id # @param [Hash] opts the optional parameters # @return [GenericSuccessResponse] describe 'delete test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for delete_file_tags # Delete File Tags # @param organization_user_file_tags_remove # @param [Hash] opts the optional parameters # @return [UserFile] describe 'delete_file_tags test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for delete_many # Delete Files Endpoint # @param delete_files_query_input # @param [Hash] opts the optional parameters # @return [GenericSuccessResponse] describe 'delete_many test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for get_parsed_file # Parsed File # This route is deprecated. Use `/user_files_v2` instead. # @param file_id # @param [Hash] opts the optional parameters # @return [PresignedURLResponse] describe 'get_parsed_file test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for get_raw_file # Raw File # This route is deprecated. Use `/user_files_v2` instead. # @param file_id # @param [Hash] opts the optional parameters # @return [PresignedURLResponse] describe 'get_raw_file test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for query_user_files # User Files V2 # For pre-filtering documents, using `tags_v2` is preferred to using `tags` (which is now deprecated). If both `tags_v2` and `tags` are specified, `tags` is ignored. `tags_v2` enables building complex filters through the use of \"AND\", \"OR\", and negation logic. Take the below input as an example: ```json { \"OR\": [ { \"key\": \"subject\", \"value\": \"holy-bible\", \"negate\": false }, { \"key\": \"person-of-interest\", \"value\": \"jesus christ\", \"negate\": false }, { \"key\": \"genre\", \"value\": \"religion\", \"negate\": true } { \"AND\": [ { \"key\": \"subject\", \"value\": \"tao-te-ching\", \"negate\": false }, { \"key\": \"author\", \"value\": \"lao-tzu\", \"negate\": false } ] } ] } ``` In this case, files will be filtered such that: 1. \"subject\" = \"holy-bible\" OR 2. \"person-of-interest\" = \"jesus christ\" OR 3. \"genre\" != \"religion\" OR 4. \"subject\" = \"tao-te-ching\" AND \"author\" = \"lao-tzu\" Note that the top level of the query must be either an \"OR\" or \"AND\" array. Currently, nesting is limited to 3. For tag blocks (those with \"key\", \"value\", and \"negate\" keys), the following typing rules apply: 1. \"key\" isn't optional and must be a `string` 2. \"value\" isn't optional and can be `any` or list[`any`] 3. \"negate\" is optional and must be `true` or `false`. If present and `true`, then the filter block is negated in the resulting query. It is `false` by default. # @param organization_user_files_to_sync_query_input # @param [Hash] opts the optional parameters # @return [UserFilesV2] describe 'query_user_files test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for query_user_files_deprecated # User Files # This route is deprecated. Use `/user_files_v2` instead. # @param organization_user_files_to_sync_query_input # @param [Hash] opts the optional parameters # @return [Array] describe 'query_user_files_deprecated test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for resync # Resync File # @param resync_file_query_input # @param [Hash] opts the optional parameters # @return [UserFile] describe 'resync test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for upload # Create Upload File # This endpoint is used to directly upload local files to Carbon. The `POST` request should be a multipart form request. Note that the `set_page_as_boundary` query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters: - `chunk_size`: the chunk size (in tokens) applied when splitting the document - `chunk_overlap`: the chunk overlap (in tokens) applied when splitting the document - `skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings - `set_page_as_boundary`: described above - `embedding_model`: the model used to generate embeddings for the document chunks - `use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks (only valid for PDFs currently) - `generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search. - `prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI's multimodal model; for text, we support OpenAI's `text-embedding-ada-002` and Cohere's embed-multilingual-v3.0. The model can be specified via the `embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file. # @param file # @param body_create_upload_file_uploadfile_post # @param [Hash] opts the optional parameters # @option opts [Integer] :chunk_size Chunk size in tiktoken tokens to be used when processing file. # @option opts [Integer] :chunk_overlap Chunk overlap in tiktoken tokens to be used when processing file. # @option opts [Boolean] :skip_embedding_generation Flag to control whether or not embeddings should be generated and stored when processing file. # @option opts [Boolean] :set_page_as_boundary Flag to control whether or not to set the a page's worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information. # @option opts [TextEmbeddingGenerators] :embedding_model Embedding model that will be used to embed file chunks. # @option opts [Boolean] :use_ocr Whether or not to use OCR when processing files. Only valid for PDFs. Useful for documents with tables, images, and/or scanned text. # @option opts [Boolean] :generate_sparse_vectors Whether or not to generate sparse vectors for the file. This is *required* for the file to be a candidate for hybrid search. # @option opts [Boolean] :prepend_filename_to_chunks Whether or not to prepend the file's name to chunks. # @option opts [Integer] :max_items_per_chunk Number of objects per chunk. For json files only. # @return [UserFile] describe 'upload test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for upload_from_url # Create Upload File From Url # @param upload_file_from_url_input # @param [Hash] opts the optional parameters # @return [UserFile] describe 'upload_from_url test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end # unit tests for upload_text # Create Raw Text # Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI's multimodal model; for text, we support OpenAI's `text-embedding-ada-002` and Cohere's embed-multilingual-v3.0. The model can be specified via the `embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file. # @param raw_text_input # @param [Hash] opts the optional parameters # @return [UserFile] describe 'upload_text test' do it 'should work' do # assertion here. ref: https://www.relishapp.com/rspec/rspec-expectations/docs/built-in-matchers end end end