Sha256: aee946ab6f2ab1a56eab76018c7c7c16e6699eb2a89d5d3a2ff822caf9eb05e6
Contents?: true
Size: 708 Bytes
Versions: 5
Compression:
Stored size: 708 Bytes
Contents
require_relative './recursive_character_text_splitter' module Baran class MarkdownSplitter < RecursiveCharacterTextSplitter def initialize(chunk_size: 1024, chunk_overlap: 64) separators = [ "\n# ", # h1 "\n## ", # h2 "\n### ", # h3 "\n#### ", # h4 "\n##### ", # h5 "\n###### ", # h6 "```\n\n", # code block "\n\n***\n\n", # horizontal rule "\n\n---\n\n", # horizontal rule "\n\n___\n\n", # horizontal rule "\n\n", # new line "\n", # new line " ", # space "" # empty ] super(chunk_size: chunk_size, chunk_overlap: chunk_overlap, separators: separators) end end end
Version data entries
5 entries across 5 versions & 1 rubygems