purrfectmeow.tc02_mlt package

Submodules

purrfectmeow.tc02_mlt.base module

class purrfectmeow.tc02_mlt.base.Malet[source]

Bases: object

DEFAULT_MODEL_NAME = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'
DEFAULT_CHUNK_SIZE = 500
DEFAULT_CHUNK_OVERLAP = 0
DEFAULT_CHUNK_SEPARATOR = '\n\n'
classmethod chunking(text, chunk_method='token', **kwargs)[source]
Parameters:
  • text (str)

  • chunk_method (Literal['token', 'separate'] | None)

  • kwargs (Any)

Return type:

TokenTextSplitter | CharacterSeparator

purrfectmeow.tc02_mlt.separate module

class purrfectmeow.tc02_mlt.separate.SeparateSplit[source]

Bases: object

classmethod splitter(chunk_separator)[source]
Parameters:

chunk_separator (str)

Return type:

CharacterSeparator

class CharacterSeparator(separator)[source]

Bases: object

Parameters:

separator (str)

split_text(text)[source]
Parameters:

text (str)

Return type:

list[str]

purrfectmeow.tc02_mlt.token module

class purrfectmeow.tc02_mlt.token.TokenSplit[source]

Bases: object

classmethod splitter(model_name, chunk_size, chunk_overlap)[source]
Parameters:
  • model_name (str)

  • chunk_size (int)

  • chunk_overlap (int)

Return type:

TokenTextSplitter

Module contents

class purrfectmeow.tc02_mlt.Malet[source]

Bases: object

DEFAULT_CHUNK_OVERLAP = 0
DEFAULT_CHUNK_SEPARATOR = '\n\n'
DEFAULT_CHUNK_SIZE = 500
DEFAULT_MODEL_NAME = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'
classmethod chunking(text, chunk_method='token', **kwargs)[source]
Parameters:
  • text (str)

  • chunk_method (Literal['token', 'separate'] | None)

  • kwargs (Any)

Return type:

TokenTextSplitter | CharacterSeparator