- page = urllib2.urlopen("http://www.svtplay.se/ajax/videos?antal=100").read()
- soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
- videos = []
- for article in soup.findAll('article'):
- meta = dict(article.attrs)
- video = {}
- video['title'] = meta['data-title']
- video['description'] = meta['data-description']
- video['url'] = dict(article.find('a').attrs)['href']
- videos.append(video)
- return videos
+ page_num = 1
+ soup = BeautifulSoup(requests.get("http://www.svtplay.se/ajax/videospager").text)#this call does not work for getting the pages, we use it for the page totals only
+ page_tot = int(soup.find('a',{'data-currentpage':True}).attrs['data-lastpage'])
+ videos_per_page = 8
+ video_num = 0
+ while(page_num <= page_tot):
+ base_url = "http://www.svtplay.se/ajax/videos?sida={}".format(page_num)
+ soup = BeautifulSoup(requests.get(base_url).text)
+ for article in soup.findAll('article'):
+ meta = dict(article.attrs)
+ video = {}
+ video['title'] = meta['data-title']
+ video['description'] = meta['data-description']
+ video['url'] = dict(article.find('a').attrs)['href']
+ video['thumb-url'] = dict(article.find('img',{}).attrs)['src']
+ video['num'] = video_num
+ video['total'] = page_tot * videos_per_page
+ video_num += 1
+ yield video
+ page_num += 1
+