Source code for purrfectmeow.tc01_spl.simple

import time
from collections.abc import Callable
from typing import Any

from purrfectmeow.meow.kitty import kitty_logger


[docs] class Simple: _logger = kitty_logger(__name__) @classmethod def _convert(cls, file_path: str, converter: Callable[[str], Any]) -> str | Any: cls._logger.debug(f"Starting conversion for '{file_path}'") start = time.time() try: res = converter(file_path) cls._logger.debug(f"Successfully converted '{file_path}'") return res finally: elasped = time.time() - start cls._logger.debug(f"Conversion time spent '{elasped:.2f}' seconds.")
[docs] @classmethod def encoding_convert(cls, file_path: str) -> str: cls._logger.debug("Using Encoding for Conversion") def reader(file_path: str) -> str: with open(file_path, encoding="utf-8") as f: return f.read() return cls._convert(file_path, lambda file_path: reader(file_path))
[docs] @classmethod def pymupdf_convert(cls, file_path: str) -> str: cls._logger.debug("Using PyMuPDF for Conversion") def reader(file_path: str) -> str: import pymupdf if file_path.endswith((".txt", ".md", ".json", ".html", ".xml")): return "".join(page.get_text() for page in pymupdf.open(file_path, filetype="txt")) else: return "".join(page.get_text() for page in pymupdf.open(file_path)) return cls._convert(file_path, lambda file_path: reader(file_path))
[docs] @classmethod def pandas_convert(cls, file_path: str) -> str: cls._logger.debug("Using Pandas for Conversion") def reader(file_path: str) -> Any: import pandas if file_path.endswith((".xls", ".xlsx")): df_x: pandas.DataFrame = pandas.read_excel(file_path) return df_x.to_string(index=False) elif file_path.endswith(".csv"): df_c: pandas.DataFrame = pandas.read_csv(file_path) return df_c.to_string(index=False) elif file_path.endswith(".json"): df_j: pandas.DataFrame = pandas.read_json(file_path) return df_j.to_string(index=False) elif file_path.endswith(".html"): df_h: list[pandas.DataFrame] = pandas.read_html(file_path) return "".join(df.to_string(index=False) for df in df_h) elif file_path.endswith(".xml"): df_m: pandas.DataFrame = pandas.read_xml(file_path) return df_m.to_string(index=False) else: return "" return cls._convert(file_path, lambda file_path: reader(file_path))