warbo-utilities: 53cb74ae865f47351d4ae4ed6348632ab4295a10
1: { bash, cacert, curl, fail, html2text, python3, raw, runCommand, wget, withDeps
2: , wrap, xidel, xmlstarlet }:
3:
4: with builtins;
5: with rec {
6: getContent = wrap {
7: name = "getBBCContent.py";
8: file = raw."getBBCContent.py";
9: vars = { SSL_CERT_FILE = "${cacert}/etc/ssl/certs/ca-bundle.crt"; };
10: paths = [
11: html2text
12: (python3.withPackages (p: [ p.beautifulsoup4 p.feedparser p.PyRSS2Gen ]))
13: ];
14: };
15:
16: bbcnews = wrap {
17: name = "bbcnews";
18: paths = [ bash xmlstarlet wget ];
19: vars = { inherit getContent; };
20: script = ''
21: #!${bash}/bin/bash
22: set -e
23:
24: echo "Fetching BBC News" 1>&2
25:
26: function stripCrap {
27: # Remove item elements whose guid url contains the given text
28: xmlstarlet ed -d "//guid[contains(text(),'$1')]/.."
29: }
30:
31: # shellcheck disable=SC2154
32: wget -q -O- "http://feeds.bbci.co.uk/news/rss.xml?edition=uk" |
33: stripCrap '/sport/' |
34: stripCrap '/news/magazine-' |
35: stripCrap '/news/entertainment-arts' |
36: stripCrap '/news/in-pictures' |
37: stripCrap '/news/av/' |
38: "$getContent"
39: '';
40: };
41:
42: tests = attrValues {
43: getContent = runCommand "test-get-content" {
44: inherit getContent;
45: buildInputs = [ fail xidel ];
46: HTML_EXAMPLE = raw."bbcExamplePage.html.gz";
47: RUN_TESTS = "1";
48: } ''
49: "$getContent"
50: mkdir "$out"
51: '';
52:
53: noSport = runCommand "no-sport-test" {
54: inherit bbcnews;
55: buildInputs = [ curl ];
56: } ''
57: set -e
58:
59: if curl -s "http://www.bbc.co.uk" > /dev/null
60: then
61: echo "Looks like we're online..." 1>&2
62: else
63: echo "Not online, skipping test" 1>&2
64: mkdir "$out"
65: exit 0
66: fi
67:
68: if "$bbcnews" | grep guid | grep '/sport/'
69: then
70: echo "Didn't filter out sport" 1>&2
71: exit 1
72: fi
73:
74: echo "Sport was filtered out correctly" 1>&2
75: mkdir "$out"
76: '';
77: };
78: };
79: withDeps tests bbcnews
Generated by git2html.