warbo-utilities: 31484ac95cc319d78a3fab01d97791aa2834b241
1: #!/usr/bin/env bash
2: set -e
3: shopt -s nullglob
4:
5: function norm {
6: # Strip non-alphanumeric and convert to lowercase
7: tr '[:upper:]' '[:lower:]' | tr -dc '[:alnum:]'
8: }
9:
10: POSSIBILITIES=""
11: for POSSIBLE in ~/Audio/Non-fiction/*
12: do
13: POSSIBILITIES=$(printf '%s\n%s' "$POSSIBILITIES" "$POSSIBLE")
14: done
15: POSSIBILITIES=$(echo "$POSSIBILITIES" | grep '^.')
16:
17: echo "Looking for BBC podcasts in ~/Mail/feeds" 1>&2
18:
19: FEEDS=()
20: while read -r FEED
21: do
22: FEEDS+=("$FEED")
23: done < "$HOME/.podcasts"
24:
25: for D in "${FEEDS[@]}"
26: do
27: echo "Checking for $D" 1>&2
28: DIR="$HOME/Mail/feeds/$D/new"
29: [[ -d "$DIR" ]] || {
30: echo "Not a directory '$DIR', skipping" 1>&2
31: continue
32: }
33: ANY=0
34: for F in "$DIR"/*
35: do
36: ANY=1
37: done
38: [[ "$ANY" -eq 1 ]] || continue
39:
40: GUESSTINATION=""
41: while read -r POSSIBLE
42: do
43: NORM_POSS=$(basename "$POSSIBLE" | norm)
44: NORM_D=$(echo "$D" | norm)
45: if echo "$NORM_POSS" | grep -q -F "$NORM_D"
46: # || echo "$NORM_D" | grep -q -F "$NORM_POSS"
47: then
48: [[ -z "$GUESSTINATION" ]] || {
49: echo "'$D' could be '$GUESSTINATION' or '$POSSIBLE', skip" 1>&2
50: exit 1
51: }
52: GUESSTINATION=$(basename "$POSSIBLE")
53: fi
54: done < <(echo "$POSSIBILITIES")
55: [[ -n "$GUESSTINATION" ]] || {
56: echo "Couldn't guess a destination for '$D', skipping" 1>&2
57: continue
58: }
59:
60: for F in "$DIR"/*
61: do
62: echo "Guessing that '$D' should go in '$GUESSTINATION'" 1>&2
63: URL=""
64: URL=$(grep -F 'Link: http' < "$F" | grep -o 'http.*') || true
65: [[ -n "$URL" ]] || {
66: echo "No URL found in '$F', skipping" 1>&2
67: continue
68: }
69:
70: mkdir -p "$HOME/Audio/TO""DO/$GUESSTINATION"
71: if inDir "$HOME/Audio/TO""DO/$GUESSTINATION" get_bbc_podcast "$URL"
72: then
73: markRead "$F"
74: fi
75: done
76: done
77: echo "Done with BBC podcasts" 1>&2
78:
79: function getFromSrc {
80: # A hopefully generic podcast fetcher
81:
82: # Grab our in and out directories from the first two arguments, then shift
83: # them off to leave an arbitrary number of grep patterns
84: IN="$1"
85: OUT="$2"
86: shift
87: shift
88: echo "Looking for $OUT podcasts" 1>&2
89:
90: # Look for entries in the given dir
91: for F in "$HOME/Mail/feeds/$IN/new"/*
92: do
93: # Grab the source of the first Web link
94: PAGE=$(grep -o 'http[s]*://[^ "]*' < "$F" | head -n1) || continue
95: echo "Found link $PAGE" 1>&2
96:
97: if echo "$PAGE" | grep -q -i '\.mp3$'
98: then
99: echo "Assuming URL '$PAGE' is the file rather than a Web page" 1>&2
100: URL="$PAGE"
101: else
102: URL=$(wget -q -O- "$PAGE")
103:
104: # Narrow down the page source to a URL using the remaining arguments as
105: # patterns for 'grep -o', applied one after the other (then apply head)
106: FOUND=1
107: for PAT in "$@"
108: do
109: URL=$(echo "$URL" | grep -o "$PAT") || FOUND=0
110: done
111: [[ "$FOUND" -eq 1 ]] || continue
112: URL=$(echo "$URL" | head -n1)
113: fi
114:
115: # Skip if we didn't get anything (e.g. not a podcast)
116: [[ -n "$URL" ]] || continue
117: echo "Found URL $URL" 1>&2
118:
119: # Create and enter the output dir
120: mkdir -p "$HOME/Audio/TO""DO/$OUT"
121: pushd "$HOME/Audio/TO""DO/$OUT"
122:
123: # Avoid hammering a site with too many requests
124: sleep 2
125:
126: # Fetch the extracted file; use -c to resume previous attempts
127: wget -c "$URL"
128:
129: # Remove any extra suffices from the file name (might as well do all)
130: for GOT in *.mp3*
131: do
132: NAME=$(basename "$GOT")
133:
134: # Skip those which are already .mp3
135: echo "$NAME" | grep -q -i '\.mp3$' && continue
136:
137: TRIMMED=$(echo "$NAME" | grep -o '^.*\.mp3')
138:
139: echo "Renaming dodgy-looking '$NAME' to '$TRIMMED'" 1>&2
140: mv -v "$NAME" "$TRIMMED"
141: done
142: popd
143:
144: markRead "$F"
145: done
146: }
147:
148: # Read each line into the array ENTRY, with tab-separated elements
149: while IFS=$'\t' read -r -a ENTRY
150: do
151: getFromSrc "${ENTRY[@]}"
152: done < <(grep -v '^ *#' < ~/.podcast_extractors)
Generated by git2html.