59 lines
1.7 KiB
Nix
59 lines
1.7 KiB
Nix
{ lib
|
|
, python3
|
|
}:
|
|
|
|
let
|
|
fetch = pname: { version, license, hash, buildInputs, propagatedBuildInputs }: python3.pkgs.buildPythonPackage {
|
|
inherit pname version buildInputs propagatedBuildInputs;
|
|
src = python3.pkgs.fetchPypi { inherit pname version hash; };
|
|
meta = { inherit license; };
|
|
};
|
|
in
|
|
python3.pkgs.buildPythonApplication rec {
|
|
pname = "trafilatura";
|
|
version = "1.3.0";
|
|
|
|
src = python3.pkgs.fetchPypi {
|
|
inherit pname version;
|
|
hash = "sha256-pmGJ5LnVkdzmSPDMeftSpIbmeXCAkBibxPzYgGjwle8=";
|
|
};
|
|
|
|
propagatedBuildInputs = with python3.pkgs; [
|
|
certifi
|
|
charset-normalizer
|
|
lxml
|
|
urllib3
|
|
|
|
(fetch "courlan" {
|
|
version = "0.8.3";
|
|
hash = "sha256-0GxbBIsrXNXArHcwTcJLeV5Lslentgd+pAWjtema4Xk=";
|
|
license = lib.licenses.gpl3Plus;
|
|
buildInputs = [ pytest ];
|
|
propagatedBuildInputs = [ langcodes tld urllib3 ];
|
|
})
|
|
|
|
(fetch "htmldate" {
|
|
version = "1.3.0";
|
|
hash = "sha256-PDLNtpOYJRMUk7ITxWnb5NCF14UilBfpIvkHC2eoDQY=";
|
|
license = lib.licenses.gpl3Plus;
|
|
buildInputs = [ pytest ];
|
|
propagatedBuildInputs = [ charset-normalizer dateparser lxml python-dateutil urllib3 ];
|
|
})
|
|
|
|
(fetch "jusText" {
|
|
version = "3.0.0";
|
|
hash = "sha256-dkDiSCGHlfa+ZfbDX+aXMloygPy0Z10VJbzf8rhvqt8=";
|
|
license = lib.licenses.bsd2;
|
|
buildInputs = [ coverage pytest pytest-cov ];
|
|
propagatedBuildInputs = [ lxml ];
|
|
})
|
|
];
|
|
|
|
doCheck = false; # GUI tests fail with no X display
|
|
|
|
meta = {
|
|
description = "Python package and command-line tool designed to gather text on the Web, including discovery, extraction and text processing components";
|
|
homepage = "https://github.com/adbar/trafilatura";
|
|
license = lib.licenses.gpl3;
|
|
};
|
|
}
|