warbo-utilities: 53cb74ae865f47351d4ae4ed6348632ab4295a10

     1: { bash, cacert, curl, fail, html2text, python3, raw, runCommand, wget, withDeps
     2: , wrap, xidel, xmlstarlet }:
     3: 
     4: with builtins;
     5: with rec {
     6:   getContent = wrap {
     7:     name = "getBBCContent.py";
     8:     file = raw."getBBCContent.py";
     9:     vars = { SSL_CERT_FILE = "${cacert}/etc/ssl/certs/ca-bundle.crt"; };
    10:     paths = [
    11:       html2text
    12:       (python3.withPackages (p: [ p.beautifulsoup4 p.feedparser p.PyRSS2Gen ]))
    13:     ];
    14:   };
    15: 
    16:   bbcnews = wrap {
    17:     name = "bbcnews";
    18:     paths = [ bash xmlstarlet wget ];
    19:     vars = { inherit getContent; };
    20:     script = ''
    21:       #!${bash}/bin/bash
    22:       set -e
    23: 
    24:       echo "Fetching BBC News" 1>&2
    25: 
    26:       function stripCrap {
    27:         # Remove item elements whose guid url contains the given text
    28:         xmlstarlet ed -d "//guid[contains(text(),'$1')]/.."
    29:       }
    30: 
    31:       # shellcheck disable=SC2154
    32:       wget -q -O- "http://feeds.bbci.co.uk/news/rss.xml?edition=uk" |
    33:         stripCrap '/sport/'                                         |
    34:         stripCrap '/news/magazine-'                                 |
    35:         stripCrap '/news/entertainment-arts'                        |
    36:         stripCrap '/news/in-pictures'                               |
    37:         stripCrap '/news/av/'                                       |
    38:         "$getContent"
    39:     '';
    40:   };
    41: 
    42:   tests = attrValues {
    43:     getContent = runCommand "test-get-content" {
    44:       inherit getContent;
    45:       buildInputs = [ fail xidel ];
    46:       HTML_EXAMPLE = raw."bbcExamplePage.html.gz";
    47:       RUN_TESTS = "1";
    48:     } ''
    49:       "$getContent"
    50:       mkdir "$out"
    51:     '';
    52: 
    53:     noSport = runCommand "no-sport-test" {
    54:       inherit bbcnews;
    55:       buildInputs = [ curl ];
    56:     } ''
    57:       set -e
    58: 
    59:       if curl -s "http://www.bbc.co.uk" > /dev/null
    60:       then
    61:         echo "Looks like we're online..." 1>&2
    62:       else
    63:         echo "Not online, skipping test" 1>&2
    64:         mkdir "$out"
    65:         exit 0
    66:       fi
    67: 
    68:       if "$bbcnews" | grep guid | grep '/sport/'
    69:       then
    70:         echo "Didn't filter out sport" 1>&2
    71:         exit 1
    72:       fi
    73: 
    74:       echo "Sport was filtered out correctly" 1>&2
    75:       mkdir "$out"
    76:     '';
    77:   };
    78: };
    79: withDeps tests bbcnews

Generated by git2html.