html-inliner: c7f2bc60cb887176b304be278fecb0a06c83607c
1: #!/usr/bin/env python
2: try:
3: from BeautifulSoup import BeautifulSoup
4: except ImportError:
5: from bs4 import BeautifulSoup
6: import os
7: import sys
8: import urllib2
9: import urlparse
10:
11: def fixup(url):
12: if url.startswith('//'): return 'http:' + url
13: if '://' in url: return url
14: base = os.getenv('BASE_URL')
15: if base:
16: if base.startswith('file://'):
17: return base + '/' + url
18: return urlparse.urljoin(base, url)
19: sys.stderr.write(
20: 'Found non-absolute URL "{0}", but no BASE_URL given'.format(url))
21:
22: html = BeautifulSoup(sys.stdin.read())
23: for script in html.findAll('script'):
24: if not script.has_key('src'):
25: continue
26: url = script['src']
27: if url is not None and not url.startswith('data:'):
28: sys.stderr.write('Found script src "{0}"\n'.format(url))
29: url = fixup(url)
30: sys.stderr.write('Fetching script src "{0}"\n'.format(url))
31: data = urllib2.urlopen(url).read()
32: script['src'] = 'data:text/javascript;base64,' + data.encode('base64')
33:
34: for css in html.findAll('link',
35: attrs={'rel': 'stylesheet', 'type': 'text/css'}):
36: if not css.has_key('href'):
37: continue
38: url = css['href']
39: if url is not None and not url.startswith('data:'):
40: sys.stderr.write('Found CSS href "{0}"\n'.format(url))
41: url = fixup(url)
42: sys.stderr.write('Fetching CSS href "{0}"\n'.format(url))
43: data = urllib2.urlopen(url).read()
44: css['href'] = 'data:text/css;base64,' + data.encode('base64')
45:
46: print html.prettify()
Generated by git2html.