92 lines
1.6 KiB
Nix
92 lines
1.6 KiB
Nix
{
|
|
lib,
|
|
buildPythonPackage,
|
|
fetchFromGitHub,
|
|
hatchling,
|
|
beautifulsoup4,
|
|
defusedxml,
|
|
ffmpeg-headless,
|
|
magika,
|
|
mammoth,
|
|
markdownify,
|
|
numpy,
|
|
openai,
|
|
openpyxl,
|
|
pandas,
|
|
pathvalidate,
|
|
pdfminer-six,
|
|
puremagic,
|
|
pydub,
|
|
python-pptx,
|
|
requests,
|
|
speechrecognition,
|
|
youtube-transcript-api,
|
|
olefile,
|
|
xlrd,
|
|
lxml,
|
|
pytestCheckHook,
|
|
gitUpdater,
|
|
}:
|
|
|
|
buildPythonPackage rec {
|
|
pname = "markitdown";
|
|
version = "0.1.2";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "microsoft";
|
|
repo = "markitdown";
|
|
tag = "v${version}";
|
|
hash = "sha256-7T5cuFBivazKlUk3OKXKKU3YazRAfGRt9O+gCYX3ciQ=";
|
|
};
|
|
|
|
sourceRoot = "${src.name}/packages/markitdown";
|
|
|
|
build-system = [ hatchling ];
|
|
|
|
dependencies = [
|
|
beautifulsoup4
|
|
defusedxml
|
|
ffmpeg-headless
|
|
lxml
|
|
magika
|
|
mammoth
|
|
markdownify
|
|
numpy
|
|
olefile
|
|
openai
|
|
openpyxl
|
|
pandas
|
|
pathvalidate
|
|
pdfminer-six
|
|
puremagic
|
|
pydub
|
|
python-pptx
|
|
requests
|
|
speechrecognition
|
|
xlrd
|
|
youtube-transcript-api
|
|
];
|
|
|
|
pythonImportsCheck = [ "markitdown" ];
|
|
|
|
nativeCheckInputs = [ pytestCheckHook ];
|
|
|
|
disabledTests = [
|
|
# Require network access
|
|
"test_markitdown_remote"
|
|
"test_module_vectors"
|
|
"test_cli_vectors"
|
|
"test_module_misc"
|
|
];
|
|
|
|
passthru.updateScripts = gitUpdater { };
|
|
|
|
meta = {
|
|
description = "Python tool for converting files and office documents to Markdown";
|
|
homepage = "https://github.com/microsoft/markitdown";
|
|
license = lib.licenses.mit;
|
|
maintainers = with lib.maintainers; [ drupol ];
|
|
};
|
|
}
|