[svtplaydump.git] / svtplaydump.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#   (C) Copyright 2010 Mikael Frykholm <mikael@frykholm.com>
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#   
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#   
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>
#
# Changelog:
# 0.3 added apple streaming playlist parsing and decryption
# 0.2 added python 2.4 urlparse compatibility
# 0.1 initial release

from BeautifulSoup import BeautifulSoup
from subprocess import *
import re
import json
from Crypto.Cipher import AES
import struct
try:
    import urlparse
except ImportError:
    pass
import urllib2
try:
    import urllib2.urlparse as urlparse
except ImportError:
    pass
import sys

def main(argv=None):
    if argv is None:
        argv=sys.argv
    try:
        videoid = re.findall("/video/(.*)[/]*",argv[1])[0]
        soup = BeautifulSoup(urllib2.urlopen("http://www.svtplay.se/video/%s/?type=embed"%videoid).read())
        flashvars = json.loads(soup.find("param", {"name":"flashvars",'value':True})['value'][5:])
    except(IndexError):
        page = urllib2.urlopen(argv[1]).read()
        videoid = re.findall("svt_article_id=(.*)[&]*",page)[0]
        flashvars = json.loads(urllib2.urlopen("http://www.svt.se/wd?widgetId=248134&sectionId=1024&articleId=%s&position=0&format=json&type=embed&contextSectionId=1024"%videoid).read())
    try:
        title = flashvars['statistics']['title']
    except:
        title = "unnamed"
    if 'dynamicStreams' in flashvars:
        url = flashvars['dynamicStreams'][0].split('url:')[1].split('.mp4,')[0] +'.mp4'
        filename = title+".mp4"
        print Popen(["rtmpdump",u"-o"+filename,"-r", url], stdout=PIPE).communicate()[0]
    if 'pathflv' in flashvars:
        rtmp = flashvars['pathflv'][0]
        filename = title+".flv"
        print Popen(["mplayer","-dumpstream","-dumpfile",filename, rtmp], stdout=PIPE).communicate()[0]
    if 'video' in flashvars:
        for reference in flashvars['video']['videoReferences']:
            if reference['url'].endswith("m3u8"):
                url=reference['url']
        download_from_playlist(url, title+'.ts')
    else:
        print "Could not find any streams"
        return

def download_from_playlist(url, title):
    playlist = parse_playlist(urllib2.urlopen(url).read())
    videourl = sorted(playlist, key=lambda k: int(k['BANDWIDTH']))[-1]['url']
    segments, metadata = parse_segment_playlist(urllib2.urlopen(videourl).read())
    if "EXT-X-KEY" in metadata:
        key = urllib2.urlopen(metadata["EXT-X-KEY"]['URI'].strip('"')).read()
        decrypt=True
    else:
        decrypt=False
    with open("%s"%title,"w") as ofile:
        segment=0
        for url in segments:
            print "Downloading: %s"%(url)
            ufile = urllib2.urlopen(url)
            if decrypt:
                iv=struct.pack("IIII",segment,0,0,0)
                decryptor = AES.new(key, AES.MODE_CBC, iv)
            while(True):
                buf = ufile.read(1024)
                if buf:
                    if decrypt:
                        buf = decryptor.decrypt(buf)
                    ofile.write(buf)
                else:
                    ufile.close()
                    break
            segment += 1

def parse_playlist(playlist):
    assert playlist.startswith("#EXTM3U")
    playlist = playlist.splitlines()[1:]
    items=[]
    for (metadata_string,url) in zip(playlist[0::2], playlist[1::2]):
        md = dict()
        assert 'EXT-X-STREAM-INF' in metadata_string.split(':')[0]
        for item in metadata_string.split(':')[1].split(','):
            if '=' in item:
                md.update([item.split('='),]) 
        md['url']=url
        items.append(md)
    return items 

def parse_segment_playlist(playlist):
    assert playlist.startswith("#EXTM3U")
    PATTERN = re.compile(r'''((?:[^,"']|"[^"]*"|'[^']*')+)''')
    segments = []
    next_is_url=False
    metadata = {}
    for row in playlist.splitlines():
        if next_is_url:
            segments.append(row)
            next_is_url=False
            continue
        if 'EXTINF' in row:
            next_is_url=True
        if "EXT-X-KEY" in row:
             row = row.split(':',1)[1] #skip first part
             parts = PATTERN.split(row)[1:-1] #do magic re split and keep quoting
             metadata["EXT-X-KEY"] = dict([part.split('=',1) for part in parts if '=' in part]) #throw away the commas and make dict of the pairs
    return(segments, metadata)   

if __name__ == "__main__":
    sys.exit(main())
Commit	Line	Data
ca2553c7	1	#!/usr/bin/env python
56181f0a	2	# -- coding: utf-8 --
ca2553c7 MF	3	#
	4	# (C) Copyright 2010 Mikael Frykholm <mikael@frykholm.com>
	5	#
	6	# This program is free software: you can redistribute it and/or modify
	7	# it under the terms of the GNU General Public License as published by
	8	# the Free Software Foundation, either version 3 of the License, or
	9	# (at your option) any later version.
	10	#
	11	# This program is distributed in the hope that it will be useful,
	12	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	# GNU General Public License for more details.
	15	#
	16	# You should have received a copy of the GNU General Public License
	17	# along with this program. If not, see <http://www.gnu.org/licenses/>
	18	#
	19	# Changelog:
56181f0a	20	# 0.3 added apple streaming playlist parsing and decryption
ca2553c7 MF	21	# 0.2 added python 2.4 urlparse compatibility
	22	# 0.1 initial release
	23
	24	from BeautifulSoup import BeautifulSoup
	25	from subprocess import *
89a00fa0 MF	26	import re
89a00fa0 MF	27	import json
56181f0a MF	28	from Crypto.Cipher import AES
56181f0a MF	29	import struct
ca2553c7 MF	30	try:
	31	import urlparse
	32	except ImportError:
	33	pass
	34	import urllib2
	35	try:
	36	import urllib2.urlparse as urlparse
	37	except ImportError:
	38	pass
	39	import sys
	40
	41	def main(argv=None):
	42	if argv is None:
	43	argv=sys.argv
56181f0a MF	44	try:
	45	videoid = re.findall("/video/(.)[/]",argv[1])[0]
	46	soup = BeautifulSoup(urllib2.urlopen("http://www.svtplay.se/video/%s/?type=embed"%videoid).read())
	47	flashvars = json.loads(soup.find("param", {"name":"flashvars",'value':True})['value'][5:])
	48	except(IndexError):
	49	page = urllib2.urlopen(argv[1]).read()
	50	videoid = re.findall("svt_article_id=(.)[&]",page)[0]
	51	flashvars = json.loads(urllib2.urlopen("http://www.svt.se/wd?widgetId=248134&sectionId=1024&articleId=%s&position=0&format=json&type=embed&contextSectionId=1024"%videoid).read())
ca2553c7	52	try:
89a00fa0	53	title = flashvars['statistics']['title']
ca2553c7 MF	54	except:
	55	title = "unnamed"
	56	if 'dynamicStreams' in flashvars:
	57	url = flashvars['dynamicStreams'][0].split('url:')[1].split('.mp4,')[0] +'.mp4'
	58	filename = title+".mp4"
	59	print Popen(["rtmpdump",u"-o"+filename,"-r", url], stdout=PIPE).communicate()[0]
	60	if 'pathflv' in flashvars:
	61	rtmp = flashvars['pathflv'][0]
	62	filename = title+".flv"
	63	print Popen(["mplayer","-dumpstream","-dumpfile",filename, rtmp], stdout=PIPE).communicate()[0]
89a00fa0	64	if 'video' in flashvars:
56181f0a MF	65	for reference in flashvars['video']['videoReferences']:
	66	if reference['url'].endswith("m3u8"):
	67	url=reference['url']
	68	download_from_playlist(url, title+'.ts')
ca2553c7 MF	69	else:
	70	print "Could not find any streams"
	71	return
	72
56181f0a MF	73	def download_from_playlist(url, title):
	74	playlist = parse_playlist(urllib2.urlopen(url).read())
	75	videourl = sorted(playlist, key=lambda k: int(k['BANDWIDTH']))[-1]['url']
	76	segments, metadata = parse_segment_playlist(urllib2.urlopen(videourl).read())
	77	if "EXT-X-KEY" in metadata:
	78	key = urllib2.urlopen(metadata["EXT-X-KEY"]['URI'].strip('"')).read()
	79	decrypt=True
	80	else:
	81	decrypt=False
	82	with open("%s"%title,"w") as ofile:
	83	segment=0
	84	for url in segments:
	85	print "Downloading: %s"%(url)
	86	ufile = urllib2.urlopen(url)
	87	if decrypt:
	88	iv=struct.pack("IIII",segment,0,0,0)
	89	decryptor = AES.new(key, AES.MODE_CBC, iv)
	90	while(True):
	91	buf = ufile.read(1024)
	92	if buf:
	93	if decrypt:
	94	buf = decryptor.decrypt(buf)
	95	ofile.write(buf)
	96	else:
	97	ufile.close()
	98	break
	99	segment += 1
	100
	101	def parse_playlist(playlist):
	102	assert playlist.startswith("#EXTM3U")
	103	playlist = playlist.splitlines()[1:]
	104	items=[]
	105	for (metadata_string,url) in zip(playlist[0::2], playlist[1::2]):
	106	md = dict()
	107	assert 'EXT-X-STREAM-INF' in metadata_string.split(':')[0]
	108	for item in metadata_string.split(':')[1].split(','):
	109	if '=' in item:
	110	md.update([item.split('='),])
	111	md['url']=url
	112	items.append(md)
	113	return items
	114
	115	def parse_segment_playlist(playlist):
	116	assert playlist.startswith("#EXTM3U")
	117	PATTERN = re.compile(r'''((?:[^,"']\|"[^"]"\|'[^']')+)''')
	118	segments = []
	119	next_is_url=False
	120	metadata = {}
	121	for row in playlist.splitlines():
	122	if next_is_url:
	123	segments.append(row)
	124	next_is_url=False
	125	continue
	126	if 'EXTINF' in row:
	127	next_is_url=True
	128	if "EXT-X-KEY" in row:
	129	row = row.split(':',1)[1] #skip first part
	130	parts = PATTERN.split(row)[1:-1] #do magic re split and keep quoting
	131	metadata["EXT-X-KEY"] = dict([part.split('=',1) for part in parts if '=' in part]) #throw away the commas and make dict of the pairs
	132	return(segments, metadata)
	133
ca2553c7 MF	134	if __name__ == "__main__":
ca2553c7 MF	135	sys.exit(main())