+def parse_videolist():
+ page_num = 1
+ page = urllib2.urlopen("http://www.svtplay.se/ajax/videospager").read() #this call does not work for getting the pages, we use it for the page totals only
+ soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
+ page_tot = int(soup.find('a',{'data-currentpage':True}).attrMap['data-lastpage'])
+ videos_per_page = 8
+ video_num = 0
+ while(page_num <= page_tot):
+ base_url = "http://www.svtplay.se/ajax/videos?sida={}".format(page_num)
+ page = urllib2.urlopen(base_url).read()
+ soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
+ for article in soup.findAll('article'):
+ meta = dict(article.attrs)
+ video = {}
+ video['title'] = meta['data-title']
+ video['description'] = meta['data-description']
+ video['url'] = dict(article.find('a').attrs)['href']
+ video['thumb-url'] = dict(article.find('img',{}).attrs)['src']
+ video['num'] = video_num
+ video['total'] = page_tot * videos_per_page
+ video_num += 1
+ yield video
+ page_num += 1
+