From 742376ada5363db867a2c48abea6c6d8def7017b Mon Sep 17 00:00:00 2001 From: abdeladim-s Date: Thu, 19 Dec 2024 14:01:46 -0500 Subject: [PATCH 1/4] feat(DocumentConverterResult): add save method for DocumentConverterResult --- src/markitdown/_markitdown.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 040a586..cbd1010 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -146,6 +146,16 @@ def __init__(self, title: Union[str, None] = None, text_content: str = ""): self.title: Union[str, None] = title self.text_content: str = text_content + def save(self, file_path: str, encoding: str = "utf-8") -> None: + """ + Save the converted document result `text_content` to a file. + + params: + file_path: The path to save the document result to. + encoding: The encoding to use when writing the document. + """ + with open(file_path, "w", encoding=encoding) as f: + f.write(self.text_content) class DocumentConverter: """Abstract superclass of all DocumentConverters.""" From 7aabab4e1504455c6be88c2976561e856029c7e3 Mon Sep 17 00:00:00 2001 From: abdeladim-s Date: Thu, 19 Dec 2024 14:11:34 -0500 Subject: [PATCH 2/4] update(README): save document converter result to a file --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 978327c..02e4669 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ from markitdown import MarkItDown md = MarkItDown() result = md.convert("test.xlsx") print(result.text_content) +result.save("test.md") ``` To use Large Language Models for image descriptions, provide `llm_client` and `llm_model`: From bfffcd9e3548ce9d3b227151d1ecfc10d0b6b281 Mon Sep 17 00:00:00 2001 From: abdeladim-s Date: Thu, 19 Dec 2024 15:01:42 -0500 Subject: [PATCH 3/4] chore: apply pre-commit fixes --- src/markitdown/_markitdown.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index cbd1010..34dbe2f 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -157,6 +157,7 @@ def save(self, file_path: str, encoding: str = "utf-8") -> None: with open(file_path, "w", encoding=encoding) as f: f.write(self.text_content) + class DocumentConverter: """Abstract superclass of all DocumentConverters.""" From 57ccae421b19d384787a5ff4f23c4c2a98f433e1 Mon Sep 17 00:00:00 2001 From: abdeladim-s Date: Sat, 21 Dec 2024 13:33:55 -0500 Subject: [PATCH 4/4] refactor(CLI): use save function from DocumentConverterResult --- src/markitdown/__main__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/markitdown/__main__.py b/src/markitdown/__main__.py index b6cf963..7f662e4 100644 --- a/src/markitdown/__main__.py +++ b/src/markitdown/__main__.py @@ -57,6 +57,12 @@ def main(): "--output", help="Output file name. If not provided, output is written to stdout.", ) + parser.add_argument( + "-e", + "--encoding", + help="Encoding of the output file. Defaults to utf-8.", + default="utf-8", + ) args = parser.parse_args() if args.filename is None: @@ -72,8 +78,7 @@ def main(): def _handle_output(args, result: DocumentConverterResult): """Handle output to stdout or file""" if args.output: - with open(args.output, "w", encoding="utf-8") as f: - f.write(result.text_content) + result.save(args.output, encoding=args.encoding) else: print(result.text_content)