-def main(url):
- page = urllib2.urlopen(url).read()
- soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
- videoid = re.findall("svt_article_id=(.*)[&]*",page)[0]
- flashvars = json.loads(urllib2.urlopen("http://www.svt.se/wd?widgetId=248134§ionId=1024&articleId=%s&position=0&format=json&type=embed&contextSectionId=1024"%videoid).read())
- try:
- title = soup.find('meta',{'property':'og:title'}).attrMap['content']
- except:
- title = "unnamed"
+def scrape_player_page(url, title):
+ """
+ Try to scrape the site for video and download.
+ """
+ if not url.startswith('http'):
+ url = "http://www.svtplay.se" + url
+ video = {}
+ soup = BeautifulSoup(requests.get(url).text)
+ video_player = soup.body('a',{'data-json-href':True})[0]
+ if 'oppetarkiv.se' in url:
+ flashvars = requests.get("http://www.oppetarkiv.se/%s"%video_player.attrs['data-json-href']+"?output=json").json()
+ else:
+ if video_player.attrs['data-json-href'].startswith("/wd"):
+ flashvars = requests.get("http://www.svt.se/%s"%video_player.attrs['data-json-href']).json()
+ else:
+ flashvars = requests.get("http://www.svtplay.se/%s"%video_player.attrs['data-json-href']+"?output=json").json()
+ video['duration'] = video_player.attrs.get('data-length',0)
+ video['title'] = title
+ if not title:
+ video['title'] = soup.find('meta',{'property':'og:title'}).attrs['content'].replace('|','_').replace('/','_')