2 # -*- coding: utf-8 -*- 
   4 #   (C) Copyright 2010 Mikael Frykholm <mikael@frykholm.com> 
   6 #   This program is free software: you can redistribute it and/or modify 
   7 #   it under the terms of the GNU General Public License as published by 
   8 #   the Free Software Foundation, either version 3 of the License, or 
   9 #   (at your option) any later version. 
  11 #   This program is distributed in the hope that it will be useful, 
  12 #   but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13 #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14 #   GNU General Public License for more details. 
  16 #   You should have received a copy of the GNU General Public License 
  17 #   along with this program.  If not, see <http://www.gnu.org/licenses/> 
  20 # 0.4 added mirror mode. 
  21 # 0.3 added apple streaming playlist parsing and decryption 
  22 # 0.2 added python 2.4 urlparse compatibility 
  25 from bs4 
import BeautifulSoup
, Doctype
 
  26 from subprocess 
import * 
  28 from Crypto
.Cipher 
import AES
 
  36     def __init__(self
, *args
, **kwargs
): 
  37         self
.update(dict(*args
, **kwargs
))  # use the free update to set keys 
  39     def __setattr__(self
, name
, value
): 
  40         return self
.__setitem
__(name
,value
) 
  42     def __getattr__(self
, name
): 
  43         return self
.__getitem
__(name
) 
  45     def is_downloaded(self
): 
  46         raise("NotImplemented") 
  48 def scrape_player_page(video
): 
  50     Try to scrape the site for video and download.  
  52     if not video
['url'].startswith('http'): 
  53         video
['url'] = "http://www.svtplay.se" + video
['url'] 
  54     soup 
= BeautifulSoup(requests
.get(video
['url']).text
) 
  55     video_player 
= soup
.body('a',{'data-json-href':True}
)[0] 
  56     if 'oppetarkiv.se' in video
['url']: 
  57         flashvars 
= requests
.get("http://www.oppetarkiv.se/%s"%video_player
.attrs
['data-json-href']+"?output=json").json() 
  59         if video_player
.attrs
['data-json-href'].startswith("/wd"): 
  60             flashvars 
= requests
.get("http://www.svt.se/%s"%video_player
.attrs
['data-json-href']).json() 
  62             flashvars 
= requests
.get("http://www.svtplay.se/%s"%video_player
.attrs
['data-json-href']+"?output=json").json() 
  63     video
['duration'] = video_player
.attrs
.get('data-length',0) 
  64     if not video
['title']: 
  65         video
['title'] = soup
.find('meta',{'property':'og:title'}
).attrs
['content'].replace('|','_').replace('/','_') 
  66     if not 'genre' in video
: 
  67         if soup
.find(text
='Kategori:'): 
  68             video
['genre'] = soup
.find(text
='Kategori:').parent
.parent
.a
.text
 
  70             video
['genre'] = 'Ingen Genre'  
  71     if 'dynamicStreams' in flashvars
: 
  72         video
['url'] = flashvars
['dynamicStreams'][0].split('url:')[1].split('.mp4,')[0] +'.mp4' 
  73         filename 
= video
['title']+".mp4" 
  74         print(Popen(["rtmpdump","-o"+filename
,"-r", url
], stdout
=PIPE
).communicate()[0]) 
  75     if 'pathflv' in flashvars
: 
  76         rtmp 
= flashvars
['pathflv'][0] 
  77         filename 
= video
['title']+".flv" 
  78         print(Popen(["mplayer","-dumpstream","-dumpfile",filename
, rtmp
], stdout
=PIPE
).communicate()[0]) 
  79     if 'video' in flashvars
: 
  80         for reference 
in flashvars
['video']['videoReferences']: 
  81             if 'm3u8' in reference
['url']: 
  82                 video
['url']=reference
['url'] 
  83                 video
['filename'] = video
['title']+'.ts' 
  84                 if 'statistics' in flashvars
: 
  85                     video
['category'] = flashvars
['statistics']['category'] 
  86         download_from_playlist(video
) 
  87     if not 'url' in video
: 
  88         print("Could not find any streams") 
  92 def download_from_playlist(video
): 
  93     playlist 
= parse_playlist(requests
.get(video
['url']).text
) 
  96     videourl 
= sorted(playlist
, key
=lambda k
: int(k
['BANDWIDTH']))[-1]['url'] 
  97     if not videourl
.startswith('http'): #if relative path 
  98         videourl 
= "{}/{}".format(os
.path
.dirname(video
['url']), videourl
)  
  99     segments
, metadata 
= parse_segment_playlist(videourl
) 
 100     if "EXT-X-KEY" in metadata
: 
 101         key 
= requests
.get(metadata
["EXT-X-KEY"]['URI'].strip('"')).text
 
 105     with open("%s"%video
['filename'],"wb") as ofile
: 
 109             ufile 
= requests
.get(url
, stream
=True).raw
 
 110             print("\r{0:.2f} MB".format(size
/1024/1024),end
="") 
 113                 iv
=struct
.pack("IIII",segment
,0,0,0) 
 114                 decryptor 
= AES
.new(key
, AES
.MODE_CBC
, iv
) 
 117                     buf 
= ufile
.read(4096) 
 118                 except socket
.error 
as e
: 
 119                     print("Error reading, skipping file") 
 125                     buf 
= decryptor
.decrypt(buf
) 
 130     if 'thumb-url' in video
: 
 131         video
['thumb'] = requests
.get(video
['thumb-url'],stream
=True).raw
 
 133 def parse_playlist(playlist
): 
 134     if not playlist
.startswith("#EXTM3U"): 
 137     playlist 
= playlist
.splitlines() 
 138     while not 'EXT-X-STREAM-INF' in playlist
[0]: 
 139         playlist 
= playlist
[1:] 
 141     for (metadata_string
,url
) in zip(playlist
[0::2], playlist
[1::2]): 
 143         if not 'EXT-X-STREAM-INF' in metadata_string
.split(':')[0]: 
 145         for item 
in metadata_string
.split(':')[1].split(','): 
 147                 md
.update([item
.split('='),])  
 152 def parse_segment_playlist(playlisturl
): 
 153     playlist 
= requests
.get(playlisturl
).text
 
 154     assert playlist
.startswith("#EXTM3U") 
 155     PATTERN 
= re
.compile(r
'''((?:[^,"']|"[^"]*"|'[^']*')+)''') 
 159     for row 
in playlist
.splitlines(): 
 161             if not row
.startswith('http'): #if relative path 
 162                 row 
= "{}/{}".format(os
.path
.dirname(playlisturl
), row
)  
 168         if "EXT-X-KEY" in row
: 
 169              row 
= row
.split(':',1)[1] #skip first part 
 170              parts 
= PATTERN
.split(row
)[1:-1] #do magic re split and keep quotes 
 171              metadata
["EXT-X-KEY"] = dict([part
.split('=',1) for part 
in parts 
if '=' in part
]) #throw away the commas and make dict of the pairs 
 172     return(segments
, metadata
) 
 174 def parse_videolist(): 
 176     soup 
= BeautifulSoup(requests
.get("http://www.svtplay.se/ajax/videospager").text
)#this call does not work for getting the pages, we use it for the page totals only 
 177     page_tot 
= int(soup
.find('a',{'data-currentpage':True}
).attrs
['data-lastpage']) 
 180     while(page_num 
<= page_tot
): 
 181         base_url 
= "http://www.svtplay.se/ajax/videos?sida={}".format(page_num
) 
 182         soup 
= BeautifulSoup(requests
.get(base_url
).text
) 
 183         for article 
in soup
.findAll('article'): 
 184             meta 
= dict(article
.attrs
) 
 186             video
['title'] = meta
['data-title'] 
 187             video
['description'] = meta
['data-description'] 
 188             video
['url'] = dict(article
.find('a').attrs
)['href'] 
 189             video
['thumb-url'] = dict(article
.find('img',{}).attrs
)['src'] 
 190             video
['num'] = video_num
 
 191             video
['total'] = page_tot 
* videos_per_page
 
 196 def remux(video
, xml
=None): 
 197     basename 
= video
['filename'].split('.ts')[0] 
 199         if not os
.path
.exists(video
['genre']): 
 200             os
.mkdir(video
['genre']) 
 201         video
['path'] = os
.path
.join(video
['genre'],basename
+'.mkv') 
 203         video
['path'] = basename
+'.mkv' 
 204     command 
= ["mkvmerge","-o",video
['path'], '--title',video
['title']] 
 207         with open(basename
+'.xml','w') as f
: 
 209             command
.extend(['--global-tags',basename
+'.xml'])            
 211         with open('thumbnail.jpg','wb') as f
: #FIXME use title instead for many downloaders 
 212             f
.write(video
['thumb'].read()) 
 213             command
.extend(['--attachment-description', "Thumbnail", 
 214                  '--attachment-mime-type', 'image/jpeg', 
 215                  '--attach-file', 'thumbnail.jpg']) 
 216     command
.append(video
['filename']) 
 217     print(Popen(command
, stdout
=PIPE
).communicate()[0]) 
 218     for fname 
in (video
['filename'], basename
+'.xml','thumbnail.jpg'): 
 224 def mkv_metadata(video
): 
 225     root 
= BeautifulSoup(features
='xml') 
 226     root
.append(Doctype('Tags SYSTEM "matroskatags.dtd"')) 
 227     tags 
= root
.new_tag("Tags") 
 228     tag 
= root
.new_tag("Tag") 
 231     keep 
= ('title','description', 'url','genre') 
 232     targets 
= root
.new_tag("Targets") 
 233     ttv 
= root
.new_tag("TargetTypeValue") 
 240         simple 
= root
.new_tag('Simple') 
 241         name 
= root
.new_tag('Name') 
 242         name
.string
=key
.upper() 
 244         sstring 
= root
.new_tag('String') 
 245         sstring
.string
=video
[key
] 
 246         simple
.append(sstring
) 
 250 if __name__ 
== "__main__": 
 251     parser 
= argparse
.ArgumentParser() 
 252     group 
= parser
.add_mutually_exclusive_group(required
=True) 
 253     group
.add_argument("-r", "--rss", help="Download all files in rss") 
 254     group
.add_argument("-u", "--url", help="Download video in url") 
 255     group
.add_argument("-m", "--mirror", help="Mirror all files", action
="store_true") 
 256     parser
.add_argument("-n", "--no_act", help="Just print what would be done, don't do any downloading.", action
="store_true") 
 257     parser
.add_argument("--no_remux", help="Don't remux into mkv", action
="store_true") 
 259     args 
= parser
.parse_args() 
 262         d 
= feedparser
.parse(args
.rss
) 
 264             print(("Downloading: %s"%e.title
)) 
 267             video 
= scrape_player_page({'title':e.title,'url':e.link}
) 
 271         #print(e.description) 
 273         if not os
.path
.exists('.seen'): 
 275         for video 
in parse_videolist(): 
 276             video
['title'] = video
['title'].replace('/','_') 
 277             print(video
['title']+'.mkv') 
 278             print("{} of {}".format(video
['num'], video
['total'])) 
 280             if os
.path
.exists(os
.path
.join('.seen',video
['title'])): 
 283             print("Downloading...") 
 286             open(os
.path
.join('.seen',video
['title']),'w').close() #touch 
 287             video 
= scrape_player_page(video
) 
 290             xml 
= mkv_metadata(video
) 
 295             video 
= scrape_player_page({'url':args.url}
) 
 296         if not args
.no_remux
: 
 297             remux({'title':e.title}
) 
 298         print(("Downloaded {}".format(args
.url
)))