]>
git.frykholm.com Git - svtplaydump.git/blob - svtplaydump.py
2 # -*- coding: utf-8 -*-
4 # (C) Copyright 2010 Mikael Frykholm <mikael@frykholm.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>
20 # 0.4 added mirror mode.
21 # 0.3 added apple streaming playlist parsing and decryption
22 # 0.2 added python 2.4 urlparse compatibility
25 from BeautifulSoup
import BeautifulSoup
26 from subprocess
import *
29 from Crypto
.Cipher
import AES
39 import urllib2
.urlparse
as urlparse
46 Try to scrape the site for video and download.
48 if not url
.startswith('http'):
49 url
= "http://www.svtplay.se" + url
51 page
= urllib2
.urlopen(url
).read()
52 soup
= BeautifulSoup(page
,convertEntities
=BeautifulSoup
.HTML_ENTITIES
)
53 video_player
= soup
.body('a',{'data-json-href':True})[0]
54 if video_player
.attrMap
['data-json-href'].startswith("/wd"):
55 flashvars
= json
.loads(urllib2
.urlopen("http://www.svt.se/%s"%video_player
.attrMap
['data-json-href']).read())
57 flashvars
= json
.loads(urllib2
.urlopen("http://www.svtplay.se/%s"%video_player
.attrMap
['data-json-href']+"?output=json").read())
58 video
['duration'] = video_player
.attrMap
.get('data-length',0)
59 video
['title'] = title
61 video
['title'] = soup
.find('meta',{'property':'og:title'}).attrMap
['content'].replace('|','_').replace('/','_')
62 if 'dynamicStreams' in flashvars
:
63 video
['url'] = flashvars
['dynamicStreams'][0].split('url:')[1].split('.mp4,')[0] +'.mp4'
64 filename
= video
['title']+".mp4"
65 print Popen(["rtmpdump",u
"-o"+filename
,"-r", url
], stdout
=PIPE
).communicate()[0]
66 if 'pathflv' in flashvars
:
67 rtmp
= flashvars
['pathflv'][0]
68 filename
= video
['title']+".flv"
69 print Popen(["mplayer","-dumpstream","-dumpfile",filename
, rtmp
], stdout
=PIPE
).communicate()[0]
70 if 'video' in flashvars
:
71 for reference
in flashvars
['video']['videoReferences']:
72 if reference
['url'].endswith("m3u8"):
73 video
['url']=reference
['url']
74 video
['filename'] = video
['title']+'.ts'
75 if 'statistics' in flashvars
:
76 video
['category'] = flashvars
['statistics']['category']
77 download_from_playlist(video
)
79 print "Could not find any streams"
83 def download_from_playlist(video
):
84 playlist
= parse_playlist(urllib2
.urlopen(video
['url']).read())
85 videourl
= sorted(playlist
, key
=lambda k
: int(k
['BANDWIDTH']))[-1]['url']
86 segments
, metadata
= parse_segment_playlist(urllib2
.urlopen(videourl
).read())
87 if "EXT-X-KEY" in metadata
:
88 key
= urllib2
.urlopen(metadata
["EXT-X-KEY"]['URI'].strip('"')).read()
92 with
open("%s"%video
['filename'],"w") as ofile
:
96 ufile
= urllib2
.urlopen(url
)
97 print "\r{} MB".format(size
/1024/1024),
100 iv
=struct
.pack("IIII",segment
,0,0,0)
101 decryptor
= AES
.new(key
, AES
.MODE_CBC
, iv
)
103 buf
= ufile
.read(1024)
106 buf
= decryptor
.decrypt(buf
)
114 def parse_playlist(playlist
):
115 if not playlist
.startswith("#EXTM3U"):
118 playlist
= playlist
.splitlines()[1:]
120 for (metadata_string
,url
) in zip(playlist
[0::2], playlist
[1::2]):
122 assert 'EXT-X-STREAM-INF' in metadata_string
.split(':')[0]
123 for item
in metadata_string
.split(':')[1].split(','):
125 md
.update([item
.split('='),])
130 def parse_segment_playlist(playlist
):
131 assert playlist
.startswith("#EXTM3U")
132 PATTERN
= re
.compile(r
'''((?:[^,"']|"[^"]*"|'[^']*')+)''')
136 for row
in playlist
.splitlines():
143 if "EXT-X-KEY" in row
:
144 row
= row
.split(':',1)[1] #skip first part
145 parts
= PATTERN
.split(row
)[1:-1] #do magic re split and keep quotes
146 metadata
["EXT-X-KEY"] = dict([part
.split('=',1) for part
in parts
if '=' in part
]) #throw away the commas and make dict of the pairs
147 return(segments
, metadata
)
148 def parse_videolist():
149 page
= urllib2
.urlopen("http://www.svtplay.se/ajax/videos?antal=100").read()
150 soup
= BeautifulSoup(page
,convertEntities
=BeautifulSoup
.HTML_ENTITIES
)
152 for article
in soup
.findAll('article'):
153 meta
= dict(article
.attrs
)
155 video
['title'] = meta
['data-title']
156 video
['description'] = meta
['data-description']
157 video
['url'] = dict(article
.find('a').attrs
)['href']
161 if __name__
== "__main__":
162 parser
= argparse
.ArgumentParser()
163 group
= parser
.add_mutually_exclusive_group(required
=True)
164 group
.add_argument("-r", "--rss", help="Download all files in rss")
165 group
.add_argument("-u", "--url", help="Download video in url")
166 group
.add_argument("-m", "--mirror", help="Mirror all files", action
="store_true")
167 args
= parser
.parse_args()
169 d
= feedparser
.parse(args
.url
)
171 print("Downloading: %s"%e.title
)
172 filename
= main(e
.link
, e
.title
)
173 print Popen(["avconv","-i",filename
,"-vcodec","copy","-acodec","copy", filename
+'.mkv'], stdout
=PIPE
).communicate()[0]
174 #print(e.description)
176 for video
in parse_videolist():
177 video
['title'] = video
['title'].replace('/','_')
178 print video
['title']+'.mkv',
179 if os
.path
.exists(video
['title']+'.mkv'):
182 print("Downloading...")
183 ret
= main(video
['url'], video
['title'])
185 print Popen(["avconv","-i",video
['title']+'.ts',"-vcodec","copy","-acodec","copy", video
['title']+'.mkv'], stdout
=PIPE
).communicate()[0]
187 os
.unlink(video
['title']+'.ts')
189 import pdb
;pdb
.set_trace()
191 video
= main(args
.url
, None)
192 print("Downloaded {}".format(video
['title']))