Sha256: aee946ab6f2ab1a56eab76018c7c7c16e6699eb2a89d5d3a2ff822caf9eb05e6

Contents?: true

Size: 708 Bytes

Versions: 5

Compression:

Stored size: 708 Bytes

Contents

require_relative './recursive_character_text_splitter'

module Baran
  class MarkdownSplitter < RecursiveCharacterTextSplitter
    def initialize(chunk_size: 1024, chunk_overlap: 64)
      separators = [
        "\n# ", # h1
        "\n## ", # h2
        "\n### ", # h3
        "\n#### ", # h4
        "\n##### ", # h5
        "\n###### ", # h6
        "```\n\n", # code block
        "\n\n***\n\n", # horizontal rule
        "\n\n---\n\n", # horizontal rule
        "\n\n___\n\n", # horizontal rule
        "\n\n", # new line
        "\n", # new line
        " ", # space
        "" # empty
      ]
      super(chunk_size: chunk_size, chunk_overlap: chunk_overlap, separators: separators)
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
baran-0.2.1 lib/baran/markdown_splitter.rb
baran-0.2.0 lib/baran/markdown_splitter.rb
baran-0.1.12 lib/baran/markdown_splitter.rb
baran-0.1.11 lib/baran/markdown_splitter.rb
baran-0.1.10 lib/baran/markdown_splitter.rb