]>
git.frykholm.com Git - svtplaydump.git/blob - svtplaydump.py
3fbb16af8a2e870fc3acedc8c78c4fa45ceaabaa
2 # -*- coding: utf-8 -*-
4 # (C) Copyright 2010 Mikael Frykholm <mikael@frykholm.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>
20 # 0.4 added mirror mode.
21 # 0.3 added apple streaming playlist parsing and decryption
22 # 0.2 added python 2.4 urlparse compatibility
25 from BeautifulSoup
import BeautifulSoup
26 from subprocess
import *
29 from Crypto
. Cipher
import AES
39 import urllib2
. urlparse
as urlparse
44 def scrape_player_page ( url
, title
):
46 Try to scrape the site for video and download.
48 if not url
. startswith ( 'http' ):
49 url
= "http://www.svtplay.se" + url
51 page
= urllib2
. urlopen ( url
). read ()
52 soup
= BeautifulSoup ( page
, convertEntities
= BeautifulSoup
. HTML_ENTITIES
)
53 video_player
= soup
. body ( 'a' ,{ 'data-json-href' : True })[ 0 ]
54 if video_player
. attrMap
[ 'data-json-href' ]. startswith ( "/wd" ):
55 flashvars
= json
. loads ( urllib2
. urlopen ( "http://www.svt.se/ %s " % video_player
. attrMap
[ 'data-json-href' ]). read ())
57 flashvars
= json
. loads ( urllib2
. urlopen ( "http://www.svtplay.se/ %s " % video_player
. attrMap
[ 'data-json-href' ]+ "?output=json" ). read ())
58 video
[ 'duration' ] = video_player
. attrMap
. get ( 'data-length' , 0 )
59 video
[ 'title' ] = title
61 video
[ 'title' ] = soup
. find ( 'meta' ,{ 'property' : 'og:title' }). attrMap
[ 'content' ]. replace ( '|' , '_' ). replace ( '/' , '_' )
62 if 'dynamicStreams' in flashvars
:
63 video
[ 'url' ] = flashvars
[ 'dynamicStreams' ][ 0 ]. split ( 'url:' )[ 1 ]. split ( '.mp4,' )[ 0 ] + '.mp4'
64 filename
= video
[ 'title' ]+ ".mp4"
65 print Popen ([ "rtmpdump" , u
"-o" + filename
, "-r" , url
], stdout
= PIPE
). communicate ()[ 0 ]
66 if 'pathflv' in flashvars
:
67 rtmp
= flashvars
[ 'pathflv' ][ 0 ]
68 filename
= video
[ 'title' ]+ ".flv"
69 print Popen ([ "mplayer" , "-dumpstream" , "-dumpfile" , filename
, rtmp
], stdout
= PIPE
). communicate ()[ 0 ]
70 if 'video' in flashvars
:
71 for reference
in flashvars
[ 'video' ][ 'videoReferences' ]:
72 if reference
[ 'url' ]. endswith ( "m3u8" ):
73 video
[ 'url' ]= reference
[ 'url' ]
74 video
[ 'filename' ] = video
[ 'title' ]+ '.ts'
75 if 'statistics' in flashvars
:
76 video
[ 'category' ] = flashvars
[ 'statistics' ][ 'category' ]
77 download_from_playlist ( video
)
79 print "Could not find any streams"
83 def download_from_playlist ( video
):
84 playlist
= parse_playlist ( urllib2
. urlopen ( video
[ 'url' ]). read ())
85 videourl
= sorted ( playlist
, key
= lambda k
: int ( k
[ 'BANDWIDTH' ]))[- 1 ][ 'url' ]
86 segments
, metadata
= parse_segment_playlist ( urllib2
. urlopen ( videourl
). read ())
87 if "EXT-X-KEY" in metadata
:
88 key
= urllib2
. urlopen ( metadata
[ "EXT-X-KEY" ][ 'URI' ]. strip ( '"' )). read ()
92 with
open ( " %s " % video
[ 'filename' ], "w" ) as ofile
:
96 ufile
= urllib2
. urlopen ( url
)
97 print " \r {} MB" . format ( size
/ 1024 / 1024 ),
100 iv
= struct
. pack ( "IIII" , segment
, 0 , 0 , 0 )
101 decryptor
= AES
. new ( key
, AES
. MODE_CBC
, iv
)
103 buf
= ufile
. read ( 1024 )
106 buf
= decryptor
. decrypt ( buf
)
114 def parse_playlist ( playlist
):
115 if not playlist
. startswith ( "#EXTM3U" ):
118 playlist
= playlist
. splitlines ()[ 1 :]
120 for ( metadata_string
, url
) in zip ( playlist
[ 0 :: 2 ], playlist
[ 1 :: 2 ]):
122 assert 'EXT-X-STREAM-INF' in metadata_string
. split ( ':' )[ 0 ]
123 for item
in metadata_string
. split ( ':' )[ 1 ]. split ( ',' ):
125 md
. update ([ item
. split ( '=' ),])
130 def parse_segment_playlist ( playlist
):
131 assert playlist
. startswith ( "#EXTM3U" )
132 PATTERN
= re
. compile ( r
'''((?:[^,"']|"[^"]*"|'[^']*')+)''' )
136 for row
in playlist
. splitlines ():
143 if "EXT-X-KEY" in row
:
144 row
= row
. split ( ':' , 1 )[ 1 ] #skip first part
145 parts
= PATTERN
. split ( row
)[ 1 :- 1 ] #do magic re split and keep quotes
146 metadata
[ "EXT-X-KEY" ] = dict ([ part
. split ( '=' , 1 ) for part
in parts
if '=' in part
]) #throw away the commas and make dict of the pairs
147 return ( segments
, metadata
)
148 def parse_videolist ():
150 page
= urllib2
. urlopen ( "http://www.svtplay.se/ajax/videospager" ). read () #this call does not work for getting the pages, we use it for the page totals only
151 soup
= BeautifulSoup ( page
, convertEntities
= BeautifulSoup
. HTML_ENTITIES
)
152 page_tot
= int ( soup
. find ( 'a' ,{ 'data-currentpage' : True }). attrMap
[ 'data-lastpage' ])
155 while ( page_num
<= page_tot
):
156 base_url
= "http://www.svtplay.se/ajax/videos?sida={}" . format ( page_num
)
157 page
= urllib2
. urlopen ( base_url
). read ()
158 soup
= BeautifulSoup ( page
, convertEntities
= BeautifulSoup
. HTML_ENTITIES
)
159 for article
in soup
. findAll ( 'article' ):
160 meta
= dict ( article
. attrs
)
162 video
[ 'title' ] = meta
[ 'data-title' ]
163 video
[ 'description' ] = meta
[ 'data-description' ]
164 video
[ 'url' ] = dict ( article
. find ( 'a' ). attrs
)[ 'href' ]
165 video
[ 'thumb-url' ] = dict ( article
. find ( 'img' ,{}). attrs
)[ 'src' ]
166 video
[ 'num' ] = video_num
167 video
[ 'total' ] = page_tot
* videos_per_page
173 if __name__
== "__main__" :
174 parser
= argparse
. ArgumentParser ()
175 group
= parser
. add_mutually_exclusive_group ( required
= True )
176 group
. add_argument ( "-r" , "--rss" , help = "Download all files in rss" )
177 group
. add_argument ( "-u" , "--url" , help = "Download video in url" )
178 group
. add_argument ( "-m" , "--mirror" , help = "Mirror all files" , action
= "store_true" )
179 parser
. add_argument ( "-n" , "--no_act" , help = "Just print what would be done, don't do any downloading." , action
= "store_true" )
180 args
= parser
. parse_args ()
182 d
= feedparser
. parse ( args
. rss
)
184 print ( "Downloading: %s " %e. title
)
187 filename
= scrape_player_page ( e
. link
, e
. title
)
188 print Popen ([ "avconv" , "-i" , filename
, "-vcodec" , "copy" , "-acodec" , "copy" , filename
+ '.mkv' ], stdout
= PIPE
). communicate ()[ 0 ]
189 #print(e.description)
191 for video
in parse_videolist ():
192 video
[ 'title' ] = video
[ 'title' ]. replace ( '/' , '_' )
193 print video
[ 'title' ]+ '.mkv' ,
194 print u
"{} of {}" . format ( video
[ 'num' ], video
[ 'total' ])
195 if os
. path
. exists ( video
[ 'title' ]+ '.mkv' ):
198 print ( "Downloading..." )
201 ret
= scrape_player_page ( video
[ 'url' ], video
[ 'title' ])
203 print Popen ([ "avconv" , "-i" , video
[ 'title' ]+ '.ts' , "-vcodec" , "copy" , "-acodec" , "copy" , video
[ 'title' ]+ '.mkv' ], stdout
= PIPE
). communicate ()[ 0 ]
205 os
. unlink ( video
[ 'title' ]+ '.ts' )
207 import pdb
; pdb
. set_trace ()
210 video
= scrape_player_page ( args
. url
, None )
211 print ( u
"Downloaded {}" . format ( args
. url
))