90251dda991fb8acaf0587e95d5da692661dbbb0
2 # -*- coding: utf-8 -*-
4 # (C) Copyright 2010 Mikael Frykholm <mikael@frykholm.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>
20 # 0.4 added mirror mode.
21 # 0.3 added apple streaming playlist parsing and decryption
22 # 0.2 added python 2.4 urlparse compatibility
25 from bs4
import BeautifulSoup
26 from subprocess
import *
28 from Crypto
.Cipher
import AES
34 def scrape_player_page(url
, title
):
36 Try to scrape the site for video and download.
38 if not url
.startswith('http'):
39 url
= "http://www.svtplay.se" + url
41 soup
= BeautifulSoup(requests
.get(url
).text
)
42 video_player
= soup
.body('a',{'data-json-href':True})[0]
43 if 'oppetarkiv.se' in url
:
44 flashvars
= requests
.get("http://www.oppetarkiv.se/%s"%video_player
.attrs
['data-json-href']+"?output=json").json()
46 if video_player
.attrs
['data-json-href'].startswith("/wd"):
47 flashvars
= requests
.get("http://www.svt.se/%s"%video_player
.attrs
['data-json-href']).json()
49 flashvars
= requests
.get("http://www.svtplay.se/%s"%video_player
.attrs
['data-json-href']+"?output=json").json()
50 video
['duration'] = video_player
.attrs
.get('data-length',0)
51 video
['title'] = title
53 video
['title'] = soup
.find('meta',{'property':'og:title'}).attrs
['content'].replace('|','_').replace('/','_')
54 if 'dynamicStreams' in flashvars
:
55 video
['url'] = flashvars
['dynamicStreams'][0].split('url:')[1].split('.mp4,')[0] +'.mp4'
56 filename
= video
['title']+".mp4"
57 print(Popen(["rtmpdump","-o"+filename
,"-r", url
], stdout
=PIPE
).communicate()[0])
58 if 'pathflv' in flashvars
:
59 rtmp
= flashvars
['pathflv'][0]
60 filename
= video
['title']+".flv"
61 print(Popen(["mplayer","-dumpstream","-dumpfile",filename
, rtmp
], stdout
=PIPE
).communicate()[0])
62 if 'video' in flashvars
:
63 for reference
in flashvars
['video']['videoReferences']:
64 if 'm3u8' in reference
['url']:
65 video
['url']=reference
['url']
66 video
['filename'] = video
['title']+'.ts'
67 if 'statistics' in flashvars
:
68 video
['category'] = flashvars
['statistics']['category']
69 download_from_playlist(video
)
70 if not 'url' in video
:
71 print("Could not find any streams")
75 def download_from_playlist(video
):
76 playlist
= parse_playlist(requests
.get(video
['url']).text
)
79 videourl
= sorted(playlist
, key
=lambda k
: int(k
['BANDWIDTH']))[-1]['url']
80 if not videourl
.startswith('http'): #if relative path
81 videourl
= "{}/{}".format(os
.path
.dirname(video
['url']), videourl
)
82 segments
, metadata
= parse_segment_playlist(videourl
)
83 if "EXT-X-KEY" in metadata
:
84 key
= requests
.get(metadata
["EXT-X-KEY"]['URI'].strip('"')).text
88 with
open("%s"%video
['filename'],"wb") as ofile
:
92 ufile
= requests
.get(url
, stream
=True).raw
93 print("\r{} MB".format(size
/1024/1024))
96 iv
=struct
.pack("IIII",segment
,0,0,0)
97 decryptor
= AES
.new(key
, AES
.MODE_CBC
, iv
)
99 buf
= ufile
.read(4096)
103 buf
= decryptor
.decrypt(buf
)
108 def parse_playlist(playlist
):
109 if not playlist
.startswith("#EXTM3U"):
112 playlist
= playlist
.splitlines()
113 while not 'EXT-X-STREAM-INF' in playlist
[0]:
114 playlist
= playlist
[1:]
116 for (metadata_string
,url
) in zip(playlist
[0::2], playlist
[1::2]):
118 if not 'EXT-X-STREAM-INF' in metadata_string
.split(':')[0]:
120 for item
in metadata_string
.split(':')[1].split(','):
122 md
.update([item
.split('='),])
127 def parse_segment_playlist(playlisturl
):
128 playlist
= requests
.get(playlisturl
).text
129 assert playlist
.startswith("#EXTM3U")
130 PATTERN
= re
.compile(r
'''((?:[^,"']|"[^"]*"|'[^']*')+)''')
134 for row
in playlist
.splitlines():
136 if not row
.startswith('http'): #if relative path
137 row
= "{}/{}".format(os
.path
.dirname(playlisturl
), row
)
143 if "EXT-X-KEY" in row
:
144 row
= row
.split(':',1)[1] #skip first part
145 parts
= PATTERN
.split(row
)[1:-1] #do magic re split and keep quotes
146 metadata
["EXT-X-KEY"] = dict([part
.split('=',1) for part
in parts
if '=' in part
]) #throw away the commas and make dict of the pairs
147 return(segments
, metadata
)
149 def parse_videolist():
151 soup
= BeautifulSoup(requests
.get("http://www.svtplay.se/ajax/videospager").text
)#this call does not work for getting the pages, we use it for the page totals only
152 page_tot
= int(soup
.find('a',{'data-currentpage':True}).attrs
['data-lastpage'])
155 while(page_num
<= page_tot
):
156 base_url
= "http://www.svtplay.se/ajax/videos?sida={}".format(page_num
)
157 soup
= BeautifulSoup(requests
.get(base_url
).text
)
158 for article
in soup
.findAll('article'):
159 meta
= dict(article
.attrs
)
161 video
['title'] = meta
['data-title']
162 video
['description'] = meta
['data-description']
163 video
['url'] = dict(article
.find('a').attrs
)['href']
164 video
['thumb-url'] = dict(article
.find('img',{}).attrs
)['src']
165 video
['num'] = video_num
166 video
['total'] = page_tot
* videos_per_page
172 basename
= video
['filename'].split('.ts')[0]
173 print(Popen(["avconv","-i",video
['filename'],"-vcodec","copy","-acodec","copy", basename
+'.mkv'], stdout
=PIPE
).communicate()[0])
175 os
.unlink(video
['filename'])
179 if __name__
== "__main__":
180 parser
= argparse
.ArgumentParser()
181 group
= parser
.add_mutually_exclusive_group(required
=True)
182 group
.add_argument("-r", "--rss", help="Download all files in rss")
183 group
.add_argument("-u", "--url", help="Download video in url")
184 group
.add_argument("-m", "--mirror", help="Mirror all files", action
="store_true")
185 parser
.add_argument("-n", "--no_act", help="Just print what would be done, don't do any downloading.", action
="store_true")
186 parser
.add_argument("--no_remux", help="Don't remux into mkv", action
="store_true")
188 args
= parser
.parse_args()
191 d
= feedparser
.parse(args
.rss
)
193 print(("Downloading: %s"%e.title
))
196 filename
= scrape_player_page(e
.link
, e
.title
)
199 self
.remux({'title':e
.title
})
200 #print(e.description)
202 for video
in parse_videolist():
203 video
['title'] = video
['title'].replace('/','_')
204 print(video
['title']+'.mkv')
205 print("{} of {}".format(video
['num'], video
['total']))
206 if os
.path
.exists(video
['title']+'.mkv'):
209 print("Downloading...")
212 video
= scrape_player_page(video
['url'], video
['title'])
218 video
= scrape_player_page(args
.url
, None)
219 if not args
.no_remux
:
220 remux({'title':e
.title
})
221 print(("Downloaded {}".format(args
.url
)))