warbo-utilities: 31484ac95cc319d78a3fab01d97791aa2834b241

     1: #!/usr/bin/env bash
     2: set -e
     3: shopt -s nullglob
     4: 
     5: function norm {
     6:     # Strip non-alphanumeric and convert to lowercase
     7:     tr '[:upper:]' '[:lower:]' | tr -dc '[:alnum:]'
     8: }
     9: 
    10: POSSIBILITIES=""
    11: for POSSIBLE in ~/Audio/Non-fiction/*
    12: do
    13:     POSSIBILITIES=$(printf '%s\n%s' "$POSSIBILITIES" "$POSSIBLE")
    14: done
    15: POSSIBILITIES=$(echo "$POSSIBILITIES" | grep '^.')
    16: 
    17: echo "Looking for BBC podcasts in ~/Mail/feeds" 1>&2
    18: 
    19: FEEDS=()
    20: while read -r FEED
    21: do
    22:     FEEDS+=("$FEED")
    23: done < "$HOME/.podcasts"
    24: 
    25: for D in "${FEEDS[@]}"
    26: do
    27:     echo "Checking for $D" 1>&2
    28:     DIR="$HOME/Mail/feeds/$D/new"
    29:     [[ -d "$DIR" ]] || {
    30:         echo "Not a directory '$DIR', skipping" 1>&2
    31:         continue
    32:     }
    33:     ANY=0
    34:     for F in "$DIR"/*
    35:     do
    36:         ANY=1
    37:     done
    38:     [[ "$ANY" -eq 1 ]] || continue
    39: 
    40:     GUESSTINATION=""
    41:     while read -r POSSIBLE
    42:     do
    43:         NORM_POSS=$(basename "$POSSIBLE" | norm)
    44:         NORM_D=$(echo "$D" | norm)
    45:         if echo "$NORM_POSS" | grep -q -F "$NORM_D"
    46:            # || echo "$NORM_D"    | grep -q -F "$NORM_POSS"
    47:         then
    48:             [[ -z "$GUESSTINATION" ]] || {
    49:                 echo "'$D' could be '$GUESSTINATION' or '$POSSIBLE', skip" 1>&2
    50:                 exit 1
    51:             }
    52:             GUESSTINATION=$(basename "$POSSIBLE")
    53:         fi
    54:     done < <(echo "$POSSIBILITIES")
    55:     [[ -n "$GUESSTINATION" ]] || {
    56:         echo "Couldn't guess a destination for '$D', skipping" 1>&2
    57:         continue
    58:     }
    59: 
    60:     for F in "$DIR"/*
    61:     do
    62:         echo "Guessing that '$D' should go in '$GUESSTINATION'" 1>&2
    63:         URL=""
    64:         URL=$(grep -F 'Link: http' < "$F" | grep -o 'http.*') || true
    65:         [[ -n "$URL" ]] || {
    66:             echo "No URL found in '$F', skipping" 1>&2
    67:             continue
    68:         }
    69: 
    70:         mkdir -p "$HOME/Audio/TO""DO/$GUESSTINATION"
    71:         if inDir "$HOME/Audio/TO""DO/$GUESSTINATION" get_bbc_podcast "$URL"
    72:         then
    73:             markRead "$F"
    74:         fi
    75:     done
    76: done
    77: echo "Done with BBC podcasts" 1>&2
    78: 
    79: function getFromSrc {
    80:     # A hopefully generic podcast fetcher
    81: 
    82:     # Grab our in and out directories from the first two arguments, then shift
    83:     # them off to leave an arbitrary number of grep patterns
    84:      IN="$1"
    85:     OUT="$2"
    86:     shift
    87:     shift
    88:     echo "Looking for $OUT podcasts" 1>&2
    89: 
    90:     # Look for entries in the given dir
    91:     for F in "$HOME/Mail/feeds/$IN/new"/*
    92:     do
    93:         # Grab the source of the first Web link
    94:         PAGE=$(grep -o 'http[s]*://[^ "]*' < "$F" | head -n1) || continue
    95:         echo "Found link $PAGE" 1>&2
    96: 
    97:         if echo "$PAGE" | grep -q -i '\.mp3$'
    98:         then
    99:             echo "Assuming URL '$PAGE' is the file rather than a Web page" 1>&2
   100:             URL="$PAGE"
   101:         else
   102:             URL=$(wget -q -O- "$PAGE")
   103: 
   104:             # Narrow down the page source to a URL using the remaining arguments as
   105:             # patterns for 'grep -o', applied one after the other (then apply head)
   106:             FOUND=1
   107:             for PAT in "$@"
   108:             do
   109:                 URL=$(echo "$URL" | grep -o "$PAT") || FOUND=0
   110:             done
   111:             [[ "$FOUND" -eq 1 ]] || continue
   112:             URL=$(echo "$URL" | head -n1)
   113:         fi
   114: 
   115:         # Skip if we didn't get anything (e.g. not a podcast)
   116:         [[ -n "$URL" ]] || continue
   117:         echo "Found URL $URL" 1>&2
   118: 
   119:         # Create and enter the output dir
   120:         mkdir -p "$HOME/Audio/TO""DO/$OUT"
   121:         pushd "$HOME/Audio/TO""DO/$OUT"
   122: 
   123:           # Avoid hammering a site with too many requests
   124:           sleep 2
   125: 
   126:           # Fetch the extracted file; use -c to resume previous attempts
   127:           wget -c "$URL"
   128: 
   129:           # Remove any extra suffices from the file name (might as well do all)
   130:           for GOT in *.mp3*
   131:           do
   132:               NAME=$(basename "$GOT")
   133: 
   134:               # Skip those which are already .mp3
   135:               echo "$NAME" | grep -q -i '\.mp3$' && continue
   136: 
   137:               TRIMMED=$(echo "$NAME" | grep -o '^.*\.mp3')
   138: 
   139:               echo "Renaming dodgy-looking '$NAME' to '$TRIMMED'" 1>&2
   140:               mv -v "$NAME" "$TRIMMED"
   141:           done
   142:         popd
   143: 
   144:         markRead "$F"
   145:     done
   146: }
   147: 
   148: # Read each line into the array ENTRY, with tab-separated elements
   149: while IFS=$'\t' read -r -a ENTRY
   150: do
   151:     getFromSrc "${ENTRY[@]}"
   152: done < <(grep -v '^ *#' < ~/.podcast_extractors)
Generated by git2html.