|
MythTV
0.26-pre
|
00001 #!/usr/bin/env python 00002 # -*- coding: UTF-8 -*- 00003 # ---------------------- 00004 # Name: thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/) 00005 # Python Script 00006 # Author: R.D. Vaughan 00007 # Purpose: This python script is intended to perform a variety of utility functions to search and 00008 # access text metadata, video and image URLs from The WB. 00009 # 00010 # License:Creative Commons GNU GPL v2 00011 # (http://creativecommons.org/licenses/GPL/2.0/) 00012 #------------------------------------- 00013 __title__ ="thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)" 00014 __author__="R.D. Vaughan" 00015 __purpose__=''' 00016 This python script is intended to perform a variety of utility functions to search and access text 00017 meta data, video and image URLs from thewb. These routines process RSS feeds provided by The WB 00018 (http://www.thewb.com/). The specific "The WB" RSS feeds that are processed are controled through 00019 a user XML preference file usually found at "~/.mythtv/MythNetvision/userGrabberPrefs/thewb.xml" 00020 ''' 00021 00022 __version__="v0.1.3" 00023 # 0.1.0 Initial development 00024 # 0.1.1 Changed the logger to only output to stderr rather than a file 00025 # 0.1.2 Removed the need for python MythTV bindings and added "%SHAREDIR%" to icon directory path 00026 # 0.1.3 Fixes to accomodate changes to TheWB web site. 00027 00028 import os, struct, sys, re, time, datetime, urllib 00029 import logging 00030 from socket import gethostname, gethostbyname 00031 from threading import Thread 00032 from copy import deepcopy 00033 00034 from thewb_exceptions import (TheWBUrlError, TheWBHttpError, TheWBRssError, TheWBVideoNotFound, TheWBConfigFileError, TheWBUrlDownloadError) 00035 00036 class OutStreamEncoder(object): 00037 """Wraps a stream with an encoder""" 00038 def __init__(self, outstream, encoding=None): 00039 self.out = outstream 00040 if not encoding: 00041 self.encoding = sys.getfilesystemencoding() 00042 else: 00043 self.encoding = encoding 00044 00045 def write(self, obj): 00046 """Wraps the output stream, encoding Unicode strings with the specified encoding""" 00047 if isinstance(obj, unicode): 00048 try: 00049 self.out.write(obj.encode(self.encoding)) 00050 except IOError: 00051 pass 00052 else: 00053 try: 00054 self.out.write(obj) 00055 except IOError: 00056 pass 00057 00058 def __getattr__(self, attr): 00059 """Delegate everything but write to the stream""" 00060 return getattr(self.out, attr) 00061 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8') 00062 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8') 00063 00064 00065 try: 00066 from StringIO import StringIO 00067 from lxml import etree 00068 except Exception, e: 00069 sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e) 00070 sys.exit(1) 00071 00072 # Check that the lxml library is current enough 00073 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html) 00074 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later" 00075 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package 00076 version = '' 00077 for digit in etree.LIBXML_VERSION: 00078 version+=str(digit)+'.' 00079 version = version[:-1] 00080 if version < '2.7.2': 00081 sys.stderr.write(u''' 00082 ! Error - The installed version of the "lxml" python library "libxml" version is too old. 00083 At least "libxml" version 2.7.2 must be installed. Your version is (%s). 00084 ''' % version) 00085 sys.exit(1) 00086 00087 00088 def can_int(x): 00089 """Takes a string, checks if it is numeric. 00090 >>> _can_int("2") 00091 True 00092 >>> _can_int("A test") 00093 False 00094 """ 00095 if x == None: 00096 return False 00097 try: 00098 int(x) 00099 except ValueError: 00100 return False 00101 else: 00102 return True 00103 # end _can_int 00104 00105 00106 class Videos(object): 00107 """Main interface to http://www.thewb.com/ 00108 This is done to support a common naming framework for all python Netvision plugins no matter their site 00109 target. 00110 00111 Supports search methods 00112 The apikey is a not required to access http://www.thewb.com/ 00113 """ 00114 def __init__(self, 00115 apikey, 00116 mythtv = True, 00117 interactive = False, 00118 select_first = False, 00119 debug = False, 00120 custom_ui = None, 00121 language = None, 00122 search_all_languages = False, 00123 ): 00124 """apikey (str/unicode): 00125 Specify the target site API key. Applications need their own key in some cases 00126 00127 mythtv (True/False): 00128 When True, the returned meta data is being returned has the key and values massaged to match MythTV 00129 When False, the returned meta data is being returned matches what target site returned 00130 00131 interactive (True/False): (This option is not supported by all target site apis) 00132 When True, uses built-in console UI is used to select the correct show. 00133 When False, the first search result is used. 00134 00135 select_first (True/False): (This option is not supported currently implemented in any grabbers) 00136 Automatically selects the first series search result (rather 00137 than showing the user a list of more than one series). 00138 Is overridden by interactive = False, or specifying a custom_ui 00139 00140 debug (True/False): 00141 shows verbose debugging information 00142 00143 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers) 00144 A callable subclass of interactive class (overrides interactive option) 00145 00146 language (2 character language abbreviation): (This option is not supported by all target site apis) 00147 The language of the returned data. Is also the language search 00148 uses. Default is "en" (English). For full list, run.. 00149 00150 search_all_languages (True/False): (This option is not supported by all target site apis) 00151 By default, a Netvision grabber will only search in the language specified using 00152 the language option. When this is True, it will search for the 00153 show in any language 00154 00155 """ 00156 self.config = {} 00157 00158 if apikey is not None: 00159 self.config['apikey'] = apikey 00160 else: 00161 pass # TheWB does not require an apikey 00162 00163 self.config['debug_enabled'] = debug # show debugging messages 00164 self.common = common 00165 self.common.debug = debug # Set the common function debug level 00166 00167 self.log_name = u'TheWB_Grabber' 00168 self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name) 00169 self.logger = self.common.logger # Setups the logger (self.log.debug() etc) 00170 00171 self.config['custom_ui'] = custom_ui 00172 00173 self.config['interactive'] = interactive 00174 00175 self.config['select_first'] = select_first 00176 00177 self.config['search_all_languages'] = search_all_languages 00178 00179 self.error_messages = {'TheWBUrlError': u"! Error: The URL (%s) cause the exception error (%s)\n", 'TheWBHttpError': u"! Error: An HTTP communications error with The WB was raised (%s)\n", 'TheWBRssError': u"! Error: Invalid RSS meta data\nwas received from The WB error (%s). Skipping item.\n", 'TheWBVideoNotFound': u"! Error: Video search with The WB did not return any results (%s)\n", 'TheWBConfigFileError': u"! Error: thewb_config.xml file missing\nit should be located in and named as (%s).\n", 'TheWBUrlDownloadError': u"! Error: Downloading a RSS feed or Web page (%s).\n", } 00180 00181 # Channel details and search results 00182 self.channel = {'channel_title': u'The WB', 'channel_link': u'http://www.thewb.com/', 'channel_description': u"Watch full episodes of your favorite shows on The WB.com, like Friends, The O.C., Veronica Mars, Pushing Daisies, Smallville, Buffy The Vampire Slayer, One Tree Hill and Gilmore Girls.", 'channel_numresults': 0, 'channel_returned': 1, u'channel_startindex': 0} 00183 00184 00185 # Season and Episode detection regex patterns 00186 self.s_e_Patterns = [ 00187 # Season 3: Ep. 13 (01:04:30) 00188 re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE), 00189 # Season 3: Ep. 13 (04:30) 00190 re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE), 00191 # Season 3: Ep. 13 00192 re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE), 00193 # Ep. 13 (01:04:30) 00194 re.compile(u'''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE), 00195 # Ep. 13 (04:30) 00196 re.compile(u'''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE), 00197 # Ep. 13 00198 re.compile(u'''Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE), 00199 ] 00200 00201 self.channel_icon = u'%SHAREDIR%/mythnetvision/icons/thewb.png' 00202 # end __init__() 00203 00204 ########################################################################################################### 00205 # 00206 # Start - Utility functions 00207 # 00208 ########################################################################################################### 00209 00210 def getSeasonEpisode(self, title): 00211 ''' Check is there is any season or episode number information in an item's title 00212 return array of season and/or episode numbers plus any duration in minutes and seconds 00213 return array with None values 00214 ''' 00215 s_e = [] 00216 for index in range(len(self.s_e_Patterns)): 00217 match = self.s_e_Patterns[index].match(title) 00218 if not match: 00219 continue 00220 return match.groups() 00221 return s_e 00222 # end getSeasonEpisode() 00223 00224 def getTheWBConfig(self): 00225 ''' Read the MNV The WB grabber "thewb_config.xml" configuration file 00226 return nothing 00227 ''' 00228 # Read the grabber thewb_config.xml configuration file 00229 url = u'file://%s/nv_python_libs/configs/XML/thewb_config.xml' % (baseProcessingDir, ) 00230 if not os.path.isfile(url[7:]): 00231 raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], )) 00232 00233 if self.config['debug_enabled']: 00234 print url 00235 print 00236 try: 00237 self.thewb_config = etree.parse(url) 00238 except Exception, e: 00239 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg)) 00240 return 00241 # end getTheWBConfig() 00242 00243 00244 def getUserPreferences(self): 00245 '''Read the thewb_config.xml and user preference thewb.xml file. 00246 If the thewb.xml file does not exist then create it. 00247 If the thewb.xml file is too old then update it. 00248 return nothing 00249 ''' 00250 # Get thewb_config.xml 00251 self.getTheWBConfig() 00252 00253 # Check if the thewb.xml file exists 00254 userPreferenceFile = self.thewb_config.find('userPreferenceFile').text 00255 if userPreferenceFile[0] == '~': 00256 self.thewb_config.find('userPreferenceFile').text = u"%s%s" % (os.path.expanduser(u"~"), userPreferenceFile[1:]) 00257 if os.path.isfile(self.thewb_config.find('userPreferenceFile').text): 00258 # Read the grabber thewb_config.xml configuration file 00259 url = u'file://%s' % (self.thewb_config.find('userPreferenceFile').text, ) 00260 if self.config['debug_enabled']: 00261 print url 00262 print 00263 try: 00264 self.userPrefs = etree.parse(url) 00265 except Exception, e: 00266 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg)) 00267 # Check if the thewb.xml file is too old 00268 nextUpdateSecs = int(self.userPrefs.find('updateDuration').text)*86400 # seconds in a day 00269 nextUpdate = time.localtime(os.path.getmtime(self.thewb_config.find('userPreferenceFile').text)+nextUpdateSecs) 00270 now = time.localtime() 00271 if nextUpdate > now: 00272 return 00273 create = False 00274 else: 00275 create = True 00276 00277 # If required create/update the thewb.xml file 00278 self.updateTheWB(create) 00279 return 00280 # end getUserPreferences() 00281 00282 def updateTheWB(self, create=False): 00283 ''' Create or update the thewb.xml user preferences file 00284 return nothing 00285 ''' 00286 # Read the default user preferences file 00287 url = u'file://%s/nv_python_libs/configs/XML/defaultUserPrefs/thewb.xml' % (baseProcessingDir, ) 00288 if not os.path.isfile(url[7:]): 00289 raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], )) 00290 00291 if self.config['debug_enabled']: 00292 print 'updateTheWB url(%s)' % url 00293 print 00294 try: 00295 userTheWB = etree.parse(url) 00296 except Exception, e: 00297 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg)) 00298 00299 # Get the current show links from the TheWB web site 00300 linksTree = self.common.getUrlData(self.thewb_config.find('treeviewUrls')) 00301 00302 if self.config['debug_enabled']: 00303 print "create(%s)" % create 00304 print "linksTree:" 00305 sys.stdout.write(etree.tostring(linksTree, encoding='UTF-8', pretty_print=True)) 00306 print 00307 00308 # Check that at least several show directories were returned 00309 if not create: 00310 if not len(linksTree.xpath('//results//a')) > 10: 00311 return self.userPrefs 00312 00313 # Assemble the feeds and formats 00314 root = etree.XML(u'<xml></xml>') 00315 for directory in linksTree.xpath('//results'): 00316 tmpDirectory = etree.SubElement(root, u'showDirectories') 00317 tmpDirectory.attrib['name'] = directory.find('name').text 00318 for show in directory.xpath('.//a'): 00319 showName = show.text 00320 # Skip any DVD references as they are not on-line videos 00321 if showName.lower().find('dvd') != -1 or show.attrib['href'].lower().find('dvd') != -1: 00322 continue 00323 tmpShow = etree.XML(u'<url></url>') 00324 tmpShow.attrib['enabled'] = u'true' 00325 tmpShow.attrib['name'] = self.common.massageText(showName.strip()) 00326 tmpShow.text = self.common.ampReplace(show.attrib['href'].replace(u'/shows/', u'').replace(u'/', u'').strip()) 00327 tmpDirectory.append(tmpShow) 00328 00329 if self.config['debug_enabled']: 00330 print "Before any merging userTheWB:" 00331 sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True)) 00332 print 00333 00334 # If there was an existing thewb.xml file then add any relevant user settings to 00335 # this new thewb.xml 00336 if not create: 00337 userTheWB.find('updateDuration').text = self.userPrefs.find('updateDuration').text 00338 if self.userPrefs.find('showDirectories').get('globalmax'): 00339 root.find('showDirectories').attrib['globalmax'] = self.userPrefs.find('showDirectories').attrib['globalmax'] 00340 for rss in self.userPrefs.xpath("//url[@enabled='false']"): 00341 elements = root.xpath("//url[text()=$URL]", URL=rss.text.strip()) 00342 if len(elements): 00343 elements[0].attrib['enabled'] = u'false' 00344 if rss.get('max'): 00345 elements[0].attrib['max'] = rss.attrib['max'] 00346 00347 if self.config['debug_enabled']: 00348 print "After any merging userTheWB:" 00349 sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True)) 00350 print 00351 00352 # Save the thewb.xml file 00353 prefDir = self.thewb_config.find('userPreferenceFile').text.replace(u'/thewb.xml', u'') 00354 if not os.path.isdir(prefDir): 00355 os.makedirs(prefDir) 00356 fd = open(self.thewb_config.find('userPreferenceFile').text, 'w') 00357 fd.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True)[:-len(u'</userTheWB>')-1]+u''.join(etree.tostring(element, encoding='UTF-8', pretty_print=True) for element in root.xpath('/xml/*'))+u'</userTheWB>') 00358 fd.close() 00359 00360 # Input the refreshed user preference data 00361 try: 00362 self.userPrefs = etree.parse(self.thewb_config.find('userPreferenceFile').text) 00363 except Exception, e: 00364 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg)) 00365 return 00366 # end updateTheWB() 00367 00368 ########################################################################################################### 00369 # 00370 # End of Utility functions 00371 # 00372 ########################################################################################################### 00373 00374 00375 def searchTitle(self, title, pagenumber, pagelen, ignoreError=False): 00376 '''Key word video search of the TheWB web site 00377 return an array of matching item elements 00378 return 00379 ''' 00380 orgURL = self.thewb_config.find('searchURLS').xpath(".//href")[0].text 00381 00382 try: 00383 searchVar = u'?q=%s' % (urllib.quote(title.encode("utf-8")).replace(u' ', u'+')) 00384 except UnicodeDecodeError: 00385 searchVar = u'?q=%s' % (urllib.quote(title).replace(u' ', u'+')) 00386 url = self.thewb_config.find('searchURLS').xpath(".//href")[0].text+searchVar 00387 00388 if self.config['debug_enabled']: 00389 print "Search url(%s)" % url 00390 print 00391 00392 self.thewb_config.find('searchURLS').xpath(".//href")[0].text = url 00393 00394 # Perform a search 00395 try: 00396 resultTree = self.common.getUrlData(self.thewb_config.find('searchURLS'), pageFilter=None) 00397 except Exception, errormsg: 00398 self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL 00399 raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg)) 00400 00401 self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL 00402 00403 if self.config['debug_enabled']: 00404 print "resultTree count(%s)" % len(resultTree) 00405 print etree.tostring(resultTree, encoding='UTF-8', pretty_print=True) 00406 print 00407 00408 if resultTree is None: 00409 if ignoreError: 00410 return [None, None] 00411 raise TheWBVideoNotFound(u"No TheWB.com Video matches found for search value (%s)" % title) 00412 00413 searchResults = resultTree.xpath('//result/div') 00414 if not len(searchResults): 00415 if ignoreError: 00416 return [None, None] 00417 raise TheWBVideoNotFound(u"No TheWB.com Video matches found for search value (%s)" % title) 00418 00419 # Set the number of search results returned 00420 self.channel['channel_numresults'] = len(searchResults) 00421 00422 # TheWB search results fo not have a pubDate so use the current data time 00423 # e.g. "Sun, 06 Jan 2008 21:44:36 GMT" 00424 pubDate = datetime.datetime.now().strftime(self.common.pubDateFormat) 00425 00426 # Translate the search results into MNV RSS item format 00427 thumbNailFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//img') 00428 textFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p') 00429 titleFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@title') 00430 descFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p[@class="overlay_extra overlay_spacer_top"]/text()') 00431 linkFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@href') 00432 itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces) 00433 itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces) 00434 itemDict = {} 00435 for result in searchResults: 00436 if linkFilter(result) != None: # Make sure that this result actually has a video 00437 thewbItem = etree.XML(self.common.mnvItem) 00438 # These videos are only viewable in the US so add a country indicator 00439 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = u'us' 00440 # Extract and massage data 00441 thumbNail = self.common.ampReplace(thumbNailFilter(result)[0].attrib['src']) 00442 title = titleFilter(result)[0].strip() 00443 link = u'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, result.attrib['id'].replace(u'video_', u'')) 00444 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true' 00445 descriptionElement = textFilter(result)[0] 00446 description = u'' 00447 tmptitle = None 00448 seasonNum = None 00449 episodeNum = None 00450 for e in descriptionElement.xpath('./*'): 00451 try: 00452 eText = unicode(e.tail, 'UTF-8').strip() 00453 except: 00454 continue 00455 if eText.startswith(u'Season ') or eText.startswith(u'EP'): 00456 sed = self.getSeasonEpisode(eText) 00457 if not len(sed): 00458 continue 00459 infoList = u'S%02dE%02d' % (int(sed[0]), int(sed[1])) 00460 seasonNum = u'%d' % int(sed[0]) 00461 episodeNum = u'%d' % int(sed[1]) 00462 if len(sed) == 5: 00463 videoSeconds = int(sed[2])*3600+int(sed[3])*60+int(sed[4]) 00464 itemDwnLink(thewbItem)[0].attrib['duration'] = unicode(videoSeconds) 00465 elif len(sed) == 4: 00466 videoSeconds = int(sed[2])*60+int(sed[3]) 00467 itemDwnLink(thewbItem)[0].attrib['duration'] = unicode(videoSeconds) 00468 00469 index = title.find(u':') 00470 if index != -1: 00471 tmptitle = u'%s: %s %s' % (title[:index].strip(), infoList, title[index+1:].strip()) 00472 else: 00473 tmptitle = u'%s: %s' % (title.strip(), infoList) 00474 if tmptitle: 00475 title = tmptitle 00476 title = self.common.massageText(title.strip()) 00477 description = self.common.massageText(descFilter(result)[0].strip()) 00478 00479 # Insert data into a new item element 00480 thewbItem.find('title').text = title 00481 thewbItem.find('author').text = "The WB.com" 00482 thewbItem.find('pubDate').text = pubDate 00483 thewbItem.find('description').text = description 00484 thewbItem.find('link').text = link 00485 itemThumbNail(thewbItem)[0].attrib['url'] = thumbNail 00486 itemDwnLink(thewbItem)[0].attrib['url'] = link 00487 if seasonNum: 00488 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum 00489 if episodeNum: 00490 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum 00491 itemDict[title.lower()] = thewbItem 00492 00493 if not len(itemDict.keys()): 00494 if ignoreError: 00495 return [None, None] 00496 raise TheWBVideoNotFound(u"No TheWB Video matches found for search value (%s)" % title) 00497 00498 return [itemDict, resultTree.xpath('//pageInfo')[0].text] 00499 # end searchTitle() 00500 00501 00502 def searchForVideos(self, title, pagenumber): 00503 """Common name for a video search. Used to interface with MythTV plugin NetVision 00504 """ 00505 # Get thewb_config.xml 00506 self.getTheWBConfig() 00507 00508 if self.config['debug_enabled']: 00509 print "self.thewb_config:" 00510 sys.stdout.write(etree.tostring(self.thewb_config, encoding='UTF-8', pretty_print=True)) 00511 print 00512 00513 # Easier for debugging 00514 # print self.searchTitle(title, pagenumber, self.page_limit) 00515 # print 00516 # sys.exit() 00517 00518 try: 00519 data = self.searchTitle(title, pagenumber, self.page_limit) 00520 except TheWBVideoNotFound, msg: 00521 sys.stderr.write(u"%s\n" % msg) 00522 sys.exit(0) 00523 except TheWBUrlError, msg: 00524 sys.stderr.write(u'%s\n' % msg) 00525 sys.exit(1) 00526 except TheWBHttpError, msg: 00527 sys.stderr.write(self.error_messages['TheWBHttpError'] % msg) 00528 sys.exit(1) 00529 except TheWBRssError, msg: 00530 sys.stderr.write(self.error_messages['TheWBRssError'] % msg) 00531 sys.exit(1) 00532 except Exception, e: 00533 sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e)) 00534 sys.exit(1) 00535 00536 # Create RSS element tree 00537 rssTree = etree.XML(self.common.mnvRSS+u'</rss>') 00538 00539 # Set the paging values 00540 itemCount = len(data[0].keys()) 00541 if data[1] == 'true': 00542 self.channel['channel_returned'] = itemCount 00543 self.channel['channel_startindex'] = itemCount 00544 self.channel['channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1) 00545 else: 00546 self.channel['channel_returned'] = itemCount 00547 self.channel['channel_startindex'] = self.channel['channel_returned'] 00548 self.channel['channel_numresults'] = self.channel['channel_returned'] 00549 00550 # Add the Channel element tree 00551 channelTree = self.common.mnvChannelElement(self.channel) 00552 rssTree.append(channelTree) 00553 00554 lastKey = None 00555 00556 for key in sorted(data[0].keys()): 00557 if lastKey != key: 00558 channelTree.append(data[0][key]) 00559 lastKey = key 00560 00561 # Output the MNV search results 00562 sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n') 00563 sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True)) 00564 sys.exit(0) 00565 # end searchForVideos() 00566 00567 def displayTreeView(self): 00568 '''Gather the The WB feeds then get a max page of videos meta data in each of them 00569 Display the results and exit 00570 ''' 00571 # Get the user preferences that specify which shows and formats they want to be in the treeview 00572 try: 00573 self.getUserPreferences() 00574 except Exception, e: 00575 sys.stderr.write(u'%s\n' % e) 00576 sys.exit(1) 00577 00578 # Verify that there is at least one RSS feed that user wants to download 00579 showFeeds = self.userPrefs.xpath("//showDirectories//url[@enabled='true']") 00580 totalFeeds = self.userPrefs.xpath("//url[@enabled='true']") 00581 00582 if self.config['debug_enabled']: 00583 print "self.userPrefs show count(%s) total feed count(%s):" % (len(showFeeds), len(totalFeeds)) 00584 sys.stdout.write(etree.tostring(self.userPrefs, encoding='UTF-8', pretty_print=True)) 00585 print 00586 00587 if not len(totalFeeds): 00588 sys.stderr.write(u'There are no show or treeviewURLS elements "enabled" in your "thewb.xml" user preferences\nfile (%s)\n' % self.thewb_config.find('userPreferenceFile').text) 00589 sys.exit(1) 00590 00591 # Massage channel icon 00592 self.channel_icon = self.common.ampReplace(self.channel_icon) 00593 00594 # Create RSS element tree 00595 rssTree = etree.XML(self.common.mnvRSS+u'</rss>') 00596 00597 # Add the Channel element tree 00598 channelTree = self.common.mnvChannelElement(self.channel) 00599 rssTree.append(channelTree) 00600 00601 # Process any user specified searches 00602 showItems = {} 00603 if len(showFeeds) != None: 00604 for searchDetails in showFeeds: 00605 try: 00606 data = self.searchTitle(searchDetails.text.strip(), 1, self.page_limit, ignoreError=True) 00607 if data[0] == None: 00608 continue 00609 except TheWBVideoNotFound, msg: 00610 sys.stderr.write(u"%s\n" % msg) 00611 continue 00612 except TheWBUrlError, msg: 00613 sys.stderr.write(u'%s\n' % msg) 00614 continue 00615 except TheWBHttpError, msg: 00616 sys.stderr.write(self.error_messages['TheWBHttpError'] % msg) 00617 continue 00618 except TheWBRssError, msg: 00619 sys.stderr.write(self.error_messages['TheWBRssError'] % msg) 00620 continue 00621 except Exception, e: 00622 sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (searchDetails.text.strip(), e)) 00623 continue 00624 data.append(searchDetails.attrib['name']) 00625 showItems[self.common.massageText(searchDetails.text.strip())] = data 00626 continue 00627 00628 if self.config['debug_enabled']: 00629 print "After searches count(%s):" % len(showItems) 00630 for key in showItems.keys(): 00631 print "Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0])) 00632 print 00633 00634 # Filter out any items that are not specifically for the show 00635 for showNameKey in showItems.keys(): 00636 tmpList = {} 00637 for key in showItems[showNameKey][0].keys(): 00638 tmpLink = showItems[showNameKey][0][key].find('link').text.replace(self.thewb_config.find('searchURLS').xpath(".//href")[0].text, u'') 00639 if tmpLink.startswith(showNameKey): 00640 tmpList[key] = showItems[showNameKey][0][key] 00641 showItems[showNameKey][0] = tmpList 00642 00643 if self.config['debug_enabled']: 00644 print "After search filter of non-show items count(%s):" % len(showItems) 00645 for key in showItems.keys(): 00646 print "Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0])) 00647 print 00648 00649 # Create a structure of feeds that concurrently have videos 00650 rssData = etree.XML(u'<xml></xml>') 00651 rssFeedsUrl = u'http://www.thewb.com/shows/feed/' 00652 for feedType in self.userPrefs.findall('showDirectories'): 00653 for rssFeed in self.userPrefs.xpath("//showDirectories/url[@enabled='true']"): 00654 link = rssFeedsUrl+rssFeed.text 00655 urlName = rssFeed.attrib.get('name') 00656 if urlName: 00657 uniqueName = u'%s;%s' % (urlName, link) 00658 else: 00659 uniqueName = u'RSS;%s' % (link) 00660 url = etree.XML(u'<url></url>') 00661 etree.SubElement(url, "name").text = uniqueName 00662 etree.SubElement(url, "href").text = link 00663 etree.SubElement(url, "filter").text = u"//channel/title" 00664 etree.SubElement(url, "filter").text = u"//item" 00665 etree.SubElement(url, "parserType").text = u'xml' 00666 rssData.append(url) 00667 00668 if self.config['debug_enabled']: 00669 print "rssData:" 00670 sys.stdout.write(etree.tostring(rssData, encoding='UTF-8', pretty_print=True)) 00671 print 00672 00673 # Get the RSS Feed data 00674 self.channelLanguage = u'en' 00675 self.itemAuthor = u'The WB.com' 00676 self.itemFilter = etree.XPath('.//item', namespaces=self.common.namespaces) 00677 self.titleFilter = etree.XPath('.//title', namespaces=self.common.namespaces) 00678 self.linkFilter = etree.XPath('.//link', namespaces=self.common.namespaces) 00679 self.descFilter1 = etree.XPath('.//description', namespaces=self.common.namespaces) 00680 self.descFilter2 = etree.XPath("//text()") 00681 self.pubdateFilter = etree.XPath('.//pubDate', namespaces=self.common.namespaces) 00682 self.thumbNailFilter = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces) 00683 self.itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces) 00684 self.itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces) 00685 self.rssName = etree.XPath('title', namespaces=self.common.namespaces) 00686 self.feedFilter = etree.XPath('//url[text()=$url]') 00687 self.HTMLparser = etree.HTMLParser() 00688 if rssData.find('url') != None: 00689 try: 00690 resultTree = self.common.getUrlData(rssData) 00691 except Exception, errormsg: 00692 raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg)) 00693 00694 if self.config['debug_enabled']: 00695 print "resultTree:" 00696 sys.stdout.write(etree.tostring(resultTree, encoding='UTF-8', pretty_print=True)) 00697 print 00698 00699 # Process each directory of the user preferences that have an enabled rss feed 00700 for result in resultTree.findall('results'): 00701 names = result.find('name').text.split(u';') 00702 names[0] = self.common.massageText(names[0]) 00703 if names[0] == 'RSS': 00704 names[0] = self.common.massageText(self.rssName(result.find('result'))[0].text.strip()) 00705 urlName = names[0] 00706 else: 00707 urlName = result.find('url').text.replace(rssFeedsUrl, u'').strip() 00708 00709 urlMax = None 00710 url = self.feedFilter(self.userPrefs, url=names[1]) 00711 if len(url): 00712 if url[0].attrib.get('max'): 00713 try: 00714 urlMax = int(url[0].attrib.get('max')) 00715 except: 00716 pass 00717 elif url[0].getparent().attrib.get('globalmax'): 00718 try: 00719 urlMax = int(url[0].getparent().attrib.get('globalmax')) 00720 except: 00721 pass 00722 if urlMax == 0: 00723 urlMax = None 00724 if self.config['debug_enabled']: 00725 print "Results: #Items(%s) for (%s)" % (len(self.itemFilter(result)), names) 00726 print 00727 self.createItems(showItems, result, urlName, names[0], urlMax=urlMax) 00728 continue 00729 00730 # Add all the shows and rss items to the channel 00731 for key in sorted(showItems.keys()): 00732 if not len(showItems[key][0]): 00733 continue 00734 # Create a new directory and/or subdirectory if required 00735 directoryElement = etree.SubElement(channelTree, u'directory') 00736 directoryElement.attrib['name'] = showItems[key][2] 00737 directoryElement.attrib['thumbnail'] = self.channel_icon 00738 00739 if self.config['debug_enabled']: 00740 print "Results: #Items(%s) for (%s)" % (len(showItems[key][0]), showItems[key][2]) 00741 print 00742 00743 # Copy all the items into the MNV RSS directory 00744 for itemKey in sorted(showItems[key][0].keys()): 00745 directoryElement.append(showItems[key][0][itemKey]) 00746 00747 if self.config['debug_enabled']: 00748 print "Final results: #Items(%s)" % len(rssTree.xpath('//item')) 00749 print 00750 00751 # Check that there was at least some items 00752 if len(rssTree.xpath('//item')): 00753 # Output the MNV search results 00754 sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n') 00755 sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True)) 00756 00757 sys.exit(0) 00758 # end displayTreeView() 00759 00760 def createItems(self, showItems, result, urlName, showName, urlMax=None): 00761 '''Create a dictionary of MNV compliant RSS items from the results of a RSS feed show search. 00762 Also merge with any items that were found by using the Web search. Identical items use the RSS 00763 feed item data over the search item as RSS provides better results. 00764 return nothing as the show item dictionary will have all the results 00765 ''' 00766 # Initalize show if it has not already had a search result 00767 if not urlName in showItems.keys(): 00768 showItems[urlName] = [{}, None, showName] 00769 00770 # Convert each RSS item into a MNV item 00771 count = 0 00772 for thewbItem in self.itemFilter(result): 00773 newItem = etree.XML(self.common.mnvItem) 00774 # These videos are only viewable in the US so add a country indicator 00775 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = u'us' 00776 # Extract and massage data 00777 tmpLink = self.linkFilter(thewbItem)[0].text.strip() 00778 link = self.common.ampReplace(u'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, tmpLink[tmpLink.rfind(u'/')+1:])) 00779 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true' 00780 # Convert the pubDate '2010-05-02T11:23:25-07:00' to a MNV pubdate format 00781 pubdate = self.pubdateFilter(thewbItem) 00782 if len(pubdate): 00783 pubdate = pubdate[0].text[:-6] 00784 pubdate = time.strptime(pubdate, '%Y-%m-%dT%H:%M:%S') 00785 pubdate = time.strftime(self.common.pubDateFormat, pubdate) 00786 else: 00787 pubdate = datetime.datetime.now().strftime(self.common.pubDateFormat) 00788 title = self.common.massageText(self.titleFilter(thewbItem)[0].text.strip()) 00789 tmptitle = None 00790 descList = self.descFilter2(etree.parse(StringIO(self.descFilter1(thewbItem)[0].text), self.HTMLparser)) 00791 description = None 00792 seasonNum = None 00793 episodeNum = None 00794 for eText in descList: 00795 if eText == '\n\t': 00796 continue 00797 eText = eText.strip().encode('UTF-8') 00798 if not description: 00799 description = eText 00800 continue 00801 try: 00802 if eText.startswith(u'Season: ') or eText.startswith(u'EP: '): 00803 s_e = eText.replace(u'Season:',u'').replace(u', Episode:',u'').replace(u'EP:',u'').strip().split(u' ') 00804 if len(s_e) == 1 and can_int(s_e[0].strip()): 00805 eText = u'Ep(%02d)' % int(s_e[0].strip()) 00806 episodeNum = s_e[0].strip() 00807 elif len(s_e) == 2 and can_int(s_e[0].strip()) and can_int(s_e[1].strip()): 00808 eText = u'S%02dE%02d' % (int(s_e[0].strip()), int(s_e[1].strip())) 00809 seasonNum = s_e[0].strip() 00810 episodeNum = s_e[1].strip() 00811 title = title.replace(u'-', u'–') 00812 index = title.find(u'–') 00813 if index != -1: 00814 tmptitle = u'%s: %s %s' % (title[:index].strip(), eText.strip(), title[index:].strip()) 00815 else: 00816 tmptitle = u'%s %s' % (title, eText.strip()) 00817 continue 00818 elif eText.startswith(u'Running Time: '): 00819 videoDuration = eText.replace(u'Running Time: ', u'').strip().split(u':') 00820 if not len(videoDuration): 00821 continue 00822 videoSeconds = False 00823 try: 00824 if len(videoDuration) == 1: 00825 videoSeconds = int(videoDuration[0]) 00826 elif len(videoDuration) == 2: 00827 videoSeconds = int(videoDuration[0])*60+int(videoDuration[1]) 00828 elif len(videoDuration) == 3: 00829 videoSeconds = int(videoDuration[0])*3600+int(videoDuration[1])*60+int(videoDuration[2]) 00830 if videoSeconds: 00831 self.itemDwnLink(newItem)[0].attrib['duration'] = unicode(videoSeconds) 00832 except: 00833 pass 00834 except UnicodeDecodeError: 00835 continue 00836 00837 if tmptitle: 00838 title = tmptitle 00839 title = self.common.massageText(title.strip()) 00840 description = self.common.massageText(description.strip()) 00841 # Insert data into a new item element 00842 newItem.find('title').text = title 00843 newItem.find('author').text = self.itemAuthor 00844 newItem.find('pubDate').text = pubdate 00845 newItem.find('description').text = description 00846 newItem.find('link').text = link 00847 self.itemDwnLink(newItem)[0].attrib['url'] = link 00848 try: 00849 self.itemThumbNail(newItem)[0].attrib['url'] = self.common.ampReplace(self.itemThumbNail(thewbItem)[0].attrib['url']) 00850 except IndexError: 00851 pass 00852 self.itemDwnLink(newItem)[0].attrib['lang'] = self.channelLanguage 00853 if seasonNum: 00854 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum 00855 if episodeNum: 00856 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum 00857 # Merge RSS results with search results and override any duplicates with the RSS item 00858 showItems[urlName][0][title.lower()] = newItem 00859 if urlMax: # Check of the maximum items to processes has been met 00860 count+=1 00861 if count > urlMax: 00862 break 00863 return 00864 # end createItems() 00865 # end Videos() class
1.7.6.1