warbo-utilities: 2077ef89852f855cdfa3f43b9a3d4891ba889bb4
1: {
2: bash,
3: cacert,
4: curl,
5: fail,
6: html2text,
7: python3,
8: raw,
9: runCommand,
10: wget,
11: withDeps,
12: wrap,
13: xidel,
14: xmlstarlet,
15: }:
16:
17: with builtins;
18: with rec {
19: getContent = wrap {
20: name = "getBBCContent.py";
21: file = raw."getBBCContent.py";
22: vars = {
23: SSL_CERT_FILE = "${cacert}/etc/ssl/certs/ca-bundle.crt";
24: };
25: paths = [
26: html2text
27: (python3.withPackages (p: [
28: p.beautifulsoup4
29: p.feedparser
30: p.PyRSS2Gen
31: ]))
32: ];
33: };
34:
35: bbcnews = wrap {
36: name = "bbcnews";
37: paths = [
38: bash
39: xmlstarlet
40: wget
41: ];
42: vars = {
43: inherit getContent;
44: };
45: script = ''
46: #!${bash}/bin/bash
47: set -e
48:
49: echo "Fetching BBC News" 1>&2
50:
51: function stripCrap {
52: # Remove item elements whose guid url contains the given text
53: xmlstarlet ed -d "//guid[contains(text(),'$1')]/.."
54: }
55:
56: # shellcheck disable=SC2154
57: wget -q -O- "http://feeds.bbci.co.uk/news/rss.xml?edition=uk" |
58: stripCrap '/sport/' |
59: stripCrap '/news/magazine-' |
60: stripCrap '/news/entertainment-arts' |
61: stripCrap '/news/in-pictures' |
62: stripCrap '/news/av/' |
63: "$getContent"
64: '';
65: };
66:
67: tests = attrValues {
68: getContent =
69: runCommand "test-get-content"
70: {
71: inherit getContent;
72: buildInputs = [
73: fail
74: xidel
75: ];
76: HTML_EXAMPLE = raw."bbcExamplePage.html.gz";
77: RUN_TESTS = "1";
78: }
79: ''
80: "$getContent"
81: mkdir "$out"
82: '';
83:
84: noSport =
85: runCommand "no-sport-test"
86: {
87: inherit bbcnews;
88: buildInputs = [ curl ];
89: }
90: ''
91: set -e
92:
93: if curl -s "http://www.bbc.co.uk" > /dev/null
94: then
95: echo "Looks like we're online..." 1>&2
96: else
97: echo "Not online, skipping test" 1>&2
98: mkdir "$out"
99: exit 0
100: fi
101:
102: if "$bbcnews" | grep guid | grep '/sport/'
103: then
104: echo "Didn't filter out sport" 1>&2
105: exit 1
106: fi
107:
108: echo "Sport was filtered out correctly" 1>&2
109: mkdir "$out"
110: '';
111: };
112: };
113: withDeps tests bbcnews
Generated by git2html.