MythTV  0.26-pre
thewb_api.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: UTF-8 -*-
00003 # ----------------------
00004 # Name: thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)
00005 # Python Script
00006 # Author:   R.D. Vaughan
00007 # Purpose:  This python script is intended to perform a variety of utility functions to search and
00008 #           access text metadata, video and image URLs from The WB.
00009 #
00010 # License:Creative Commons GNU GPL v2
00011 # (http://creativecommons.org/licenses/GPL/2.0/)
00012 #-------------------------------------
00013 __title__ ="thewb_api - Simple-to-use Python interface to the The WB RSS feeds (http://www.thewb.com/)"
00014 __author__="R.D. Vaughan"
00015 __purpose__='''
00016 This python script is intended to perform a variety of utility functions to search and access text
00017 meta data, video and image URLs from thewb. These routines process RSS feeds provided by The WB
00018 (http://www.thewb.com/). The specific "The WB" RSS feeds that are processed are controled through
00019 a user XML preference file usually found at "~/.mythtv/MythNetvision/userGrabberPrefs/thewb.xml"
00020 '''
00021 
00022 __version__="v0.1.3"
00023 # 0.1.0 Initial development
00024 # 0.1.1 Changed the logger to only output to stderr rather than a file
00025 # 0.1.2 Removed the need for python MythTV bindings and added "%SHAREDIR%" to icon directory path
00026 # 0.1.3 Fixes to accomodate changes to TheWB web site.
00027 
00028 import os, struct, sys, re, time, datetime, urllib
00029 import logging
00030 from socket import gethostname, gethostbyname
00031 from threading import Thread
00032 from copy import deepcopy
00033 
00034 from thewb_exceptions import (TheWBUrlError, TheWBHttpError, TheWBRssError, TheWBVideoNotFound, TheWBConfigFileError, TheWBUrlDownloadError)
00035 
00036 class OutStreamEncoder(object):
00037     """Wraps a stream with an encoder"""
00038     def __init__(self, outstream, encoding=None):
00039         self.out = outstream
00040         if not encoding:
00041             self.encoding = sys.getfilesystemencoding()
00042         else:
00043             self.encoding = encoding
00044 
00045     def write(self, obj):
00046         """Wraps the output stream, encoding Unicode strings with the specified encoding"""
00047         if isinstance(obj, unicode):
00048             try:
00049                 self.out.write(obj.encode(self.encoding))
00050             except IOError:
00051                 pass
00052         else:
00053             try:
00054                 self.out.write(obj)
00055             except IOError:
00056                 pass
00057 
00058     def __getattr__(self, attr):
00059         """Delegate everything but write to the stream"""
00060         return getattr(self.out, attr)
00061 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
00062 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
00063 
00064 
00065 try:
00066     from StringIO import StringIO
00067     from lxml import etree
00068 except Exception, e:
00069     sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
00070     sys.exit(1)
00071 
00072 # Check that the lxml library is current enough
00073 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
00074 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
00075 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
00076 version = ''
00077 for digit in etree.LIBXML_VERSION:
00078     version+=str(digit)+'.'
00079 version = version[:-1]
00080 if version < '2.7.2':
00081     sys.stderr.write(u'''
00082 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
00083           At least "libxml" version 2.7.2 must be installed. Your version is (%s).
00084 ''' % version)
00085     sys.exit(1)
00086 
00087 
00088 def can_int(x):
00089     """Takes a string, checks if it is numeric.
00090     >>> _can_int("2")
00091     True
00092     >>> _can_int("A test")
00093     False
00094     """
00095     if x == None:
00096         return False
00097     try:
00098         int(x)
00099     except ValueError:
00100         return False
00101     else:
00102         return True
00103 # end _can_int
00104 
00105 
00106 class Videos(object):
00107     """Main interface to http://www.thewb.com/
00108     This is done to support a common naming framework for all python Netvision plugins no matter their site
00109     target.
00110 
00111     Supports search methods
00112     The apikey is a not required to access http://www.thewb.com/
00113     """
00114     def __init__(self,
00115                 apikey,
00116                 mythtv = True,
00117                 interactive = False,
00118                 select_first = False,
00119                 debug = False,
00120                 custom_ui = None,
00121                 language = None,
00122                 search_all_languages = False,
00123                 ):
00124         """apikey (str/unicode):
00125             Specify the target site API key. Applications need their own key in some cases
00126 
00127         mythtv (True/False):
00128             When True, the returned meta data is being returned has the key and values massaged to match MythTV
00129             When False, the returned meta data  is being returned matches what target site returned
00130 
00131         interactive (True/False): (This option is not supported by all target site apis)
00132             When True, uses built-in console UI is used to select the correct show.
00133             When False, the first search result is used.
00134 
00135         select_first (True/False): (This option is not supported currently implemented in any grabbers)
00136             Automatically selects the first series search result (rather
00137             than showing the user a list of more than one series).
00138             Is overridden by interactive = False, or specifying a custom_ui
00139 
00140         debug (True/False):
00141              shows verbose debugging information
00142 
00143         custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
00144             A callable subclass of interactive class (overrides interactive option)
00145 
00146         language (2 character language abbreviation): (This option is not supported by all target site apis)
00147             The language of the returned data. Is also the language search
00148             uses. Default is "en" (English). For full list, run..
00149 
00150         search_all_languages (True/False): (This option is not supported by all target site apis)
00151             By default, a Netvision grabber will only search in the language specified using
00152             the language option. When this is True, it will search for the
00153             show in any language
00154 
00155         """
00156         self.config = {}
00157 
00158         if apikey is not None:
00159             self.config['apikey'] = apikey
00160         else:
00161             pass    # TheWB does not require an apikey
00162 
00163         self.config['debug_enabled'] = debug # show debugging messages
00164         self.common = common
00165         self.common.debug = debug   # Set the common function debug level
00166 
00167         self.log_name = u'TheWB_Grabber'
00168         self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name)
00169         self.logger = self.common.logger # Setups the logger (self.log.debug() etc)
00170 
00171         self.config['custom_ui'] = custom_ui
00172 
00173         self.config['interactive'] = interactive
00174 
00175         self.config['select_first'] = select_first
00176 
00177         self.config['search_all_languages'] = search_all_languages
00178 
00179         self.error_messages = {'TheWBUrlError': u"! Error: The URL (%s) cause the exception error (%s)\n", 'TheWBHttpError': u"! Error: An HTTP communications error with The WB was raised (%s)\n", 'TheWBRssError': u"! Error: Invalid RSS meta data\nwas received from The WB error (%s). Skipping item.\n", 'TheWBVideoNotFound': u"! Error: Video search with The WB did not return any results (%s)\n", 'TheWBConfigFileError': u"! Error: thewb_config.xml file missing\nit should be located in and named as (%s).\n", 'TheWBUrlDownloadError': u"! Error: Downloading a RSS feed or Web page (%s).\n", }
00180 
00181         # Channel details and search results
00182         self.channel = {'channel_title': u'The WB', 'channel_link': u'http://www.thewb.com/', 'channel_description': u"Watch full episodes of your favorite shows on The WB.com, like Friends, The O.C., Veronica Mars, Pushing Daisies, Smallville, Buffy The Vampire Slayer, One Tree Hill and Gilmore Girls.", 'channel_numresults': 0, 'channel_returned': 1, u'channel_startindex': 0}
00183 
00184 
00185         # Season and Episode detection regex patterns
00186         self.s_e_Patterns = [
00187             # Season 3: Ep. 13 (01:04:30)
00188             re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
00189             # Season 3: Ep. 13 (04:30)
00190             re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
00191             # Season 3: Ep. 13
00192             re.compile(u'''Season\\ (?P<seasno>[0-9]+)\\:\\ Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
00193             # Ep. 13 (01:04:30)
00194             re.compile(u'''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<hours>[0-9]+)\\:(?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
00195             # Ep. 13 (04:30)
00196             re.compile(u'''Ep.\\ (?P<epno>[0-9]+)\\ \\((?P<minutes>[0-9]+)\\:(?P<seconds>[0-9]+).*$''', re.UNICODE),
00197             # Ep. 13
00198             re.compile(u'''Ep.\\ (?P<epno>[0-9]+).*$''', re.UNICODE),
00199             ]
00200 
00201         self.channel_icon = u'%SHAREDIR%/mythnetvision/icons/thewb.png'
00202     # end __init__()
00203 
00204 ###########################################################################################################
00205 #
00206 # Start - Utility functions
00207 #
00208 ###########################################################################################################
00209 
00210     def getSeasonEpisode(self, title):
00211         ''' Check is there is any season or episode number information in an item's title
00212         return array of season and/or episode numbers plus any duration in minutes and seconds
00213         return array with None values
00214         '''
00215         s_e = []
00216         for index in range(len(self.s_e_Patterns)):
00217             match = self.s_e_Patterns[index].match(title)
00218             if not match:
00219                 continue
00220             return match.groups()
00221         return s_e
00222     # end getSeasonEpisode()
00223 
00224     def getTheWBConfig(self):
00225         ''' Read the MNV The WB grabber "thewb_config.xml" configuration file
00226         return nothing
00227         '''
00228         # Read the grabber thewb_config.xml configuration file
00229         url = u'file://%s/nv_python_libs/configs/XML/thewb_config.xml' % (baseProcessingDir, )
00230         if not os.path.isfile(url[7:]):
00231             raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
00232 
00233         if self.config['debug_enabled']:
00234             print url
00235             print
00236         try:
00237             self.thewb_config = etree.parse(url)
00238         except Exception, e:
00239             raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
00240         return
00241     # end getTheWBConfig()
00242 
00243 
00244     def getUserPreferences(self):
00245         '''Read the thewb_config.xml and user preference thewb.xml file.
00246         If the thewb.xml file does not exist then create it.
00247         If the thewb.xml file is too old then update it.
00248         return nothing
00249         '''
00250         # Get thewb_config.xml
00251         self.getTheWBConfig()
00252 
00253         # Check if the thewb.xml file exists
00254         userPreferenceFile = self.thewb_config.find('userPreferenceFile').text
00255         if userPreferenceFile[0] == '~':
00256              self.thewb_config.find('userPreferenceFile').text = u"%s%s" % (os.path.expanduser(u"~"), userPreferenceFile[1:])
00257         if os.path.isfile(self.thewb_config.find('userPreferenceFile').text):
00258             # Read the grabber thewb_config.xml configuration file
00259             url = u'file://%s' % (self.thewb_config.find('userPreferenceFile').text, )
00260             if self.config['debug_enabled']:
00261                 print url
00262                 print
00263             try:
00264                 self.userPrefs = etree.parse(url)
00265             except Exception, e:
00266                 raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
00267             # Check if the thewb.xml file is too old
00268             nextUpdateSecs = int(self.userPrefs.find('updateDuration').text)*86400 # seconds in a day
00269             nextUpdate = time.localtime(os.path.getmtime(self.thewb_config.find('userPreferenceFile').text)+nextUpdateSecs)
00270             now = time.localtime()
00271             if nextUpdate > now:
00272                 return
00273             create = False
00274         else:
00275             create = True
00276 
00277         # If required create/update the thewb.xml file
00278         self.updateTheWB(create)
00279         return
00280     # end getUserPreferences()
00281 
00282     def updateTheWB(self, create=False):
00283         ''' Create or update the thewb.xml user preferences file
00284         return nothing
00285         '''
00286         # Read the default user preferences file
00287         url = u'file://%s/nv_python_libs/configs/XML/defaultUserPrefs/thewb.xml' % (baseProcessingDir, )
00288         if not os.path.isfile(url[7:]):
00289             raise TheWBConfigFileError(self.error_messages['TheWBConfigFileError'] % (url[7:], ))
00290 
00291         if self.config['debug_enabled']:
00292             print 'updateTheWB url(%s)' % url
00293             print
00294         try:
00295             userTheWB = etree.parse(url)
00296         except Exception, e:
00297             raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
00298 
00299         # Get the current show links from the TheWB web site
00300         linksTree = self.common.getUrlData(self.thewb_config.find('treeviewUrls'))
00301 
00302         if self.config['debug_enabled']:
00303             print "create(%s)" % create
00304             print "linksTree:"
00305             sys.stdout.write(etree.tostring(linksTree, encoding='UTF-8', pretty_print=True))
00306             print
00307 
00308         # Check that at least several show directories were returned
00309         if not create:
00310             if not len(linksTree.xpath('//results//a')) > 10:
00311                 return self.userPrefs
00312 
00313         # Assemble the feeds and formats
00314         root = etree.XML(u'<xml></xml>')
00315         for directory in linksTree.xpath('//results'):
00316             tmpDirectory = etree.SubElement(root, u'showDirectories')
00317             tmpDirectory.attrib['name'] = directory.find('name').text
00318             for show in directory.xpath('.//a'):
00319                 showName = show.text
00320                 # Skip any DVD references as they are not on-line videos
00321                 if showName.lower().find('dvd') != -1 or show.attrib['href'].lower().find('dvd') != -1:
00322                     continue
00323                 tmpShow = etree.XML(u'<url></url>')
00324                 tmpShow.attrib['enabled'] = u'true'
00325                 tmpShow.attrib['name'] = self.common.massageText(showName.strip())
00326                 tmpShow.text = self.common.ampReplace(show.attrib['href'].replace(u'/shows/', u'').replace(u'/', u'').strip())
00327                 tmpDirectory.append(tmpShow)
00328 
00329         if self.config['debug_enabled']:
00330             print "Before any merging userTheWB:"
00331             sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
00332             print
00333 
00334         # If there was an existing thewb.xml file then add any relevant user settings to
00335         # this new thewb.xml
00336         if not create:
00337             userTheWB.find('updateDuration').text = self.userPrefs.find('updateDuration').text
00338             if self.userPrefs.find('showDirectories').get('globalmax'):
00339                 root.find('showDirectories').attrib['globalmax'] = self.userPrefs.find('showDirectories').attrib['globalmax']
00340             for rss in self.userPrefs.xpath("//url[@enabled='false']"):
00341                 elements = root.xpath("//url[text()=$URL]", URL=rss.text.strip())
00342                 if len(elements):
00343                     elements[0].attrib['enabled'] = u'false'
00344                     if rss.get('max'):
00345                         elements[0].attrib['max'] = rss.attrib['max']
00346 
00347         if self.config['debug_enabled']:
00348             print "After any merging userTheWB:"
00349             sys.stdout.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True))
00350             print
00351 
00352         # Save the thewb.xml file
00353         prefDir = self.thewb_config.find('userPreferenceFile').text.replace(u'/thewb.xml', u'')
00354         if not os.path.isdir(prefDir):
00355             os.makedirs(prefDir)
00356         fd = open(self.thewb_config.find('userPreferenceFile').text, 'w')
00357         fd.write(etree.tostring(userTheWB, encoding='UTF-8', pretty_print=True)[:-len(u'</userTheWB>')-1]+u''.join(etree.tostring(element, encoding='UTF-8', pretty_print=True) for element in root.xpath('/xml/*'))+u'</userTheWB>')
00358         fd.close()
00359 
00360         # Input the refreshed user preference data
00361         try:
00362             self.userPrefs = etree.parse(self.thewb_config.find('userPreferenceFile').text)
00363         except Exception, e:
00364             raise TheWBUrlError(self.error_messages['TheWBUrlError'] % (url, errormsg))
00365         return
00366     # end updateTheWB()
00367 
00368 ###########################################################################################################
00369 #
00370 # End of Utility functions
00371 #
00372 ###########################################################################################################
00373 
00374 
00375     def searchTitle(self, title, pagenumber, pagelen, ignoreError=False):
00376         '''Key word video search of the TheWB web site
00377         return an array of matching item elements
00378         return
00379         '''
00380         orgURL = self.thewb_config.find('searchURLS').xpath(".//href")[0].text
00381 
00382         try:
00383             searchVar = u'?q=%s' % (urllib.quote(title.encode("utf-8")).replace(u' ', u'+'))
00384         except UnicodeDecodeError:
00385             searchVar = u'?q=%s' % (urllib.quote(title).replace(u' ', u'+'))
00386         url = self.thewb_config.find('searchURLS').xpath(".//href")[0].text+searchVar
00387 
00388         if self.config['debug_enabled']:
00389             print "Search url(%s)" % url
00390             print
00391 
00392         self.thewb_config.find('searchURLS').xpath(".//href")[0].text = url
00393 
00394         # Perform a search
00395         try:
00396             resultTree = self.common.getUrlData(self.thewb_config.find('searchURLS'), pageFilter=None)
00397         except Exception, errormsg:
00398             self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
00399             raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
00400 
00401         self.thewb_config.find('searchURLS').xpath(".//href")[0].text = orgURL
00402 
00403         if self.config['debug_enabled']:
00404             print "resultTree count(%s)" % len(resultTree)
00405             print etree.tostring(resultTree, encoding='UTF-8', pretty_print=True)
00406             print
00407 
00408         if resultTree is None:
00409             if ignoreError:
00410                 return [None, None]
00411             raise TheWBVideoNotFound(u"No TheWB.com Video matches found for search value (%s)" % title)
00412 
00413         searchResults = resultTree.xpath('//result/div')
00414         if not len(searchResults):
00415             if ignoreError:
00416                 return [None, None]
00417             raise TheWBVideoNotFound(u"No TheWB.com Video matches found for search value (%s)" % title)
00418 
00419         # Set the number of search results returned
00420         self.channel['channel_numresults'] = len(searchResults)
00421 
00422         # TheWB search results fo not have a pubDate so use the current data time
00423         # e.g. "Sun, 06 Jan 2008 21:44:36 GMT"
00424         pubDate = datetime.datetime.now().strftime(self.common.pubDateFormat)
00425 
00426         # Translate the search results into MNV RSS item format
00427         thumbNailFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//img')
00428         textFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p')
00429         titleFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@title')
00430         descFilter = etree.XPath('.//div[@class="overlay-bg-middle"]/p[@class="overlay_extra overlay_spacer_top"]/text()')
00431         linkFilter = etree.XPath('.//div[@class="overlay_thumb_area"]//a[@title!=""]/@href')
00432         itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
00433         itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
00434         itemDict = {}
00435         for result in searchResults:
00436             if linkFilter(result) != None:   # Make sure that this result actually has a video
00437                 thewbItem = etree.XML(self.common.mnvItem)
00438                 # These videos are only viewable in the US so add a country indicator
00439                 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = u'us'
00440                 # Extract and massage data
00441                 thumbNail = self.common.ampReplace(thumbNailFilter(result)[0].attrib['src'])
00442                 title = titleFilter(result)[0].strip()
00443                 link = u'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, result.attrib['id'].replace(u'video_', u''))
00444                 etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
00445                 descriptionElement = textFilter(result)[0]
00446                 description = u''
00447                 tmptitle = None
00448                 seasonNum = None
00449                 episodeNum = None
00450                 for e in descriptionElement.xpath('./*'):
00451                     try:
00452                         eText = unicode(e.tail, 'UTF-8').strip()
00453                     except:
00454                         continue
00455                     if eText.startswith(u'Season ') or eText.startswith(u'EP'):
00456                         sed = self.getSeasonEpisode(eText)
00457                         if not len(sed):
00458                             continue
00459                         infoList =  u'S%02dE%02d' % (int(sed[0]), int(sed[1]))
00460                         seasonNum = u'%d' % int(sed[0])
00461                         episodeNum = u'%d' % int(sed[1])
00462                         if len(sed) == 5:
00463                             videoSeconds = int(sed[2])*3600+int(sed[3])*60+int(sed[4])
00464                             itemDwnLink(thewbItem)[0].attrib['duration'] = unicode(videoSeconds)
00465                         elif len(sed) == 4:
00466                             videoSeconds = int(sed[2])*60+int(sed[3])
00467                             itemDwnLink(thewbItem)[0].attrib['duration'] = unicode(videoSeconds)
00468 
00469                         index = title.find(u':')
00470                         if index != -1:
00471                             tmptitle = u'%s: %s %s' % (title[:index].strip(), infoList, title[index+1:].strip())
00472                         else:
00473                             tmptitle = u'%s: %s' % (title.strip(), infoList)
00474                 if tmptitle:
00475                     title = tmptitle
00476                 title = self.common.massageText(title.strip())
00477                 description = self.common.massageText(descFilter(result)[0].strip())
00478 
00479                 # Insert data into a new item element
00480                 thewbItem.find('title').text = title
00481                 thewbItem.find('author').text = "The WB.com"
00482                 thewbItem.find('pubDate').text = pubDate
00483                 thewbItem.find('description').text = description
00484                 thewbItem.find('link').text = link
00485                 itemThumbNail(thewbItem)[0].attrib['url'] = thumbNail
00486                 itemDwnLink(thewbItem)[0].attrib['url'] = link
00487                 if seasonNum:
00488                     etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
00489                 if episodeNum:
00490                     etree.SubElement(thewbItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
00491                 itemDict[title.lower()] = thewbItem
00492 
00493         if not len(itemDict.keys()):
00494             if ignoreError:
00495                 return [None, None]
00496             raise TheWBVideoNotFound(u"No TheWB Video matches found for search value (%s)" % title)
00497 
00498         return [itemDict, resultTree.xpath('//pageInfo')[0].text]
00499         # end searchTitle()
00500 
00501 
00502     def searchForVideos(self, title, pagenumber):
00503         """Common name for a video search. Used to interface with MythTV plugin NetVision
00504         """
00505         # Get thewb_config.xml
00506         self.getTheWBConfig()
00507 
00508         if self.config['debug_enabled']:
00509             print "self.thewb_config:"
00510             sys.stdout.write(etree.tostring(self.thewb_config, encoding='UTF-8', pretty_print=True))
00511             print
00512 
00513         # Easier for debugging
00514 #        print self.searchTitle(title, pagenumber, self.page_limit)
00515 #        print
00516 #        sys.exit()
00517 
00518         try:
00519             data = self.searchTitle(title, pagenumber, self.page_limit)
00520         except TheWBVideoNotFound, msg:
00521             sys.stderr.write(u"%s\n" % msg)
00522             sys.exit(0)
00523         except TheWBUrlError, msg:
00524             sys.stderr.write(u'%s\n' % msg)
00525             sys.exit(1)
00526         except TheWBHttpError, msg:
00527             sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
00528             sys.exit(1)
00529         except TheWBRssError, msg:
00530             sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
00531             sys.exit(1)
00532         except Exception, e:
00533             sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
00534             sys.exit(1)
00535 
00536         # Create RSS element tree
00537         rssTree = etree.XML(self.common.mnvRSS+u'</rss>')
00538 
00539         # Set the paging values
00540         itemCount = len(data[0].keys())
00541         if data[1] == 'true':
00542             self.channel['channel_returned'] = itemCount
00543             self.channel['channel_startindex'] = itemCount
00544             self.channel['channel_numresults'] = itemCount+(self.page_limit*(int(pagenumber)-1)+1)
00545         else:
00546             self.channel['channel_returned'] = itemCount
00547             self.channel['channel_startindex'] = self.channel['channel_returned']
00548             self.channel['channel_numresults'] = self.channel['channel_returned']
00549 
00550         # Add the Channel element tree
00551         channelTree = self.common.mnvChannelElement(self.channel)
00552         rssTree.append(channelTree)
00553 
00554         lastKey = None
00555 
00556         for key in sorted(data[0].keys()):
00557             if lastKey != key:
00558                 channelTree.append(data[0][key])
00559                 lastKey = key
00560 
00561         # Output the MNV search results
00562         sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
00563         sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
00564         sys.exit(0)
00565     # end searchForVideos()
00566 
00567     def displayTreeView(self):
00568         '''Gather the The WB feeds then get a max page of videos meta data in each of them
00569         Display the results and exit
00570         '''
00571         # Get the user preferences that specify which shows and formats they want to be in the treeview
00572         try:
00573             self.getUserPreferences()
00574         except Exception, e:
00575             sys.stderr.write(u'%s\n' % e)
00576             sys.exit(1)
00577 
00578         # Verify that there is at least one RSS feed that user wants to download
00579         showFeeds = self.userPrefs.xpath("//showDirectories//url[@enabled='true']")
00580         totalFeeds = self.userPrefs.xpath("//url[@enabled='true']")
00581 
00582         if self.config['debug_enabled']:
00583             print "self.userPrefs show count(%s) total feed count(%s):" % (len(showFeeds), len(totalFeeds))
00584             sys.stdout.write(etree.tostring(self.userPrefs, encoding='UTF-8', pretty_print=True))
00585             print
00586 
00587         if not len(totalFeeds):
00588             sys.stderr.write(u'There are no show or treeviewURLS elements "enabled" in your "thewb.xml" user preferences\nfile (%s)\n' % self.thewb_config.find('userPreferenceFile').text)
00589             sys.exit(1)
00590 
00591         # Massage channel icon
00592         self.channel_icon = self.common.ampReplace(self.channel_icon)
00593 
00594         # Create RSS element tree
00595         rssTree = etree.XML(self.common.mnvRSS+u'</rss>')
00596 
00597         # Add the Channel element tree
00598         channelTree = self.common.mnvChannelElement(self.channel)
00599         rssTree.append(channelTree)
00600 
00601         # Process any user specified searches
00602         showItems = {}
00603         if len(showFeeds) != None:
00604             for searchDetails in showFeeds:
00605                 try:
00606                     data = self.searchTitle(searchDetails.text.strip(), 1, self.page_limit, ignoreError=True)
00607                     if data[0] == None:
00608                         continue
00609                 except TheWBVideoNotFound, msg:
00610                     sys.stderr.write(u"%s\n" % msg)
00611                     continue
00612                 except TheWBUrlError, msg:
00613                     sys.stderr.write(u'%s\n' % msg)
00614                     continue
00615                 except TheWBHttpError, msg:
00616                     sys.stderr.write(self.error_messages['TheWBHttpError'] % msg)
00617                     continue
00618                 except TheWBRssError, msg:
00619                     sys.stderr.write(self.error_messages['TheWBRssError'] % msg)
00620                     continue
00621                 except Exception, e:
00622                     sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (searchDetails.text.strip(), e))
00623                     continue
00624                 data.append(searchDetails.attrib['name'])
00625                 showItems[self.common.massageText(searchDetails.text.strip())] = data
00626                 continue
00627 
00628         if self.config['debug_enabled']:
00629             print "After searches count(%s):" % len(showItems)
00630             for key in showItems.keys():
00631                 print "Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0]))
00632             print
00633 
00634         # Filter out any items that are not specifically for the show
00635         for showNameKey in showItems.keys():
00636             tmpList = {}
00637             for key in showItems[showNameKey][0].keys():
00638                 tmpLink = showItems[showNameKey][0][key].find('link').text.replace(self.thewb_config.find('searchURLS').xpath(".//href")[0].text, u'')
00639                 if tmpLink.startswith(showNameKey):
00640                     tmpList[key] = showItems[showNameKey][0][key]
00641             showItems[showNameKey][0] = tmpList
00642 
00643         if self.config['debug_enabled']:
00644             print "After search filter of non-show items count(%s):" % len(showItems)
00645             for key in showItems.keys():
00646                 print "Show(%s) name(%s) item count(%s)" % (key, showItems[key][2], len(showItems[key][0]))
00647             print
00648 
00649         # Create a structure of feeds that concurrently have videos
00650         rssData = etree.XML(u'<xml></xml>')
00651         rssFeedsUrl = u'http://www.thewb.com/shows/feed/'
00652         for feedType in self.userPrefs.findall('showDirectories'):
00653             for rssFeed in self.userPrefs.xpath("//showDirectories/url[@enabled='true']"):
00654                 link = rssFeedsUrl+rssFeed.text
00655                 urlName = rssFeed.attrib.get('name')
00656                 if urlName:
00657                      uniqueName = u'%s;%s' % (urlName, link)
00658                 else:
00659                     uniqueName = u'RSS;%s' % (link)
00660                 url = etree.XML(u'<url></url>')
00661                 etree.SubElement(url, "name").text = uniqueName
00662                 etree.SubElement(url, "href").text = link
00663                 etree.SubElement(url, "filter").text = u"//channel/title"
00664                 etree.SubElement(url, "filter").text = u"//item"
00665                 etree.SubElement(url, "parserType").text = u'xml'
00666                 rssData.append(url)
00667 
00668         if self.config['debug_enabled']:
00669             print "rssData:"
00670             sys.stdout.write(etree.tostring(rssData, encoding='UTF-8', pretty_print=True))
00671             print
00672 
00673         # Get the RSS Feed data
00674         self.channelLanguage = u'en'
00675         self.itemAuthor = u'The WB.com'
00676         self.itemFilter = etree.XPath('.//item', namespaces=self.common.namespaces)
00677         self.titleFilter = etree.XPath('.//title', namespaces=self.common.namespaces)
00678         self.linkFilter = etree.XPath('.//link', namespaces=self.common.namespaces)
00679         self.descFilter1 = etree.XPath('.//description', namespaces=self.common.namespaces)
00680         self.descFilter2 = etree.XPath("//text()")
00681         self.pubdateFilter = etree.XPath('.//pubDate', namespaces=self.common.namespaces)
00682         self.thumbNailFilter = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
00683         self.itemThumbNail = etree.XPath('.//media:thumbnail', namespaces=self.common.namespaces)
00684         self.itemDwnLink = etree.XPath('.//media:content', namespaces=self.common.namespaces)
00685         self.rssName = etree.XPath('title', namespaces=self.common.namespaces)
00686         self.feedFilter = etree.XPath('//url[text()=$url]')
00687         self.HTMLparser = etree.HTMLParser()
00688         if rssData.find('url') != None:
00689             try:
00690                 resultTree = self.common.getUrlData(rssData)
00691             except Exception, errormsg:
00692                 raise TheWBUrlDownloadError(self.error_messages['TheWBUrlDownloadError'] % (errormsg))
00693 
00694             if self.config['debug_enabled']:
00695                 print "resultTree:"
00696                 sys.stdout.write(etree.tostring(resultTree, encoding='UTF-8', pretty_print=True))
00697                 print
00698 
00699             # Process each directory of the user preferences that have an enabled rss feed
00700             for result in resultTree.findall('results'):
00701                 names = result.find('name').text.split(u';')
00702                 names[0] = self.common.massageText(names[0])
00703                 if names[0] == 'RSS':
00704                     names[0] = self.common.massageText(self.rssName(result.find('result'))[0].text.strip())
00705                     urlName = names[0]
00706                 else:
00707                     urlName = result.find('url').text.replace(rssFeedsUrl, u'').strip()
00708 
00709                 urlMax = None
00710                 url = self.feedFilter(self.userPrefs, url=names[1])
00711                 if len(url):
00712                     if url[0].attrib.get('max'):
00713                         try:
00714                             urlMax = int(url[0].attrib.get('max'))
00715                         except:
00716                             pass
00717                     elif url[0].getparent().attrib.get('globalmax'):
00718                         try:
00719                             urlMax = int(url[0].getparent().attrib.get('globalmax'))
00720                         except:
00721                             pass
00722                     if urlMax == 0:
00723                         urlMax = None
00724                 if self.config['debug_enabled']:
00725                     print "Results: #Items(%s) for (%s)" % (len(self.itemFilter(result)), names)
00726                     print
00727                 self.createItems(showItems, result, urlName, names[0], urlMax=urlMax)
00728                 continue
00729 
00730         # Add all the shows and rss items to the channel
00731         for key in sorted(showItems.keys()):
00732             if not len(showItems[key][0]):
00733                 continue
00734             # Create a new directory and/or subdirectory if required
00735             directoryElement = etree.SubElement(channelTree, u'directory')
00736             directoryElement.attrib['name'] = showItems[key][2]
00737             directoryElement.attrib['thumbnail'] = self.channel_icon
00738 
00739             if self.config['debug_enabled']:
00740                 print "Results: #Items(%s) for (%s)" % (len(showItems[key][0]), showItems[key][2])
00741                 print
00742 
00743             # Copy all the items into the MNV RSS directory
00744             for itemKey in sorted(showItems[key][0].keys()):
00745                 directoryElement.append(showItems[key][0][itemKey])
00746 
00747         if self.config['debug_enabled']:
00748             print "Final results: #Items(%s)" % len(rssTree.xpath('//item'))
00749             print
00750 
00751         # Check that there was at least some items
00752         if len(rssTree.xpath('//item')):
00753             # Output the MNV search results
00754             sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
00755             sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
00756 
00757         sys.exit(0)
00758     # end displayTreeView()
00759 
00760     def createItems(self, showItems, result, urlName, showName, urlMax=None):
00761         '''Create a dictionary of MNV compliant RSS items from the results of a RSS feed show search.
00762         Also merge with any items that were found by using the Web search. Identical items use the RSS
00763         feed item data over the search item as RSS provides better results.
00764         return nothing as the show item dictionary will have all the results
00765         '''
00766         # Initalize show if it has not already had a search result
00767         if not urlName in showItems.keys():
00768             showItems[urlName] = [{}, None, showName]
00769 
00770         # Convert each RSS item into a MNV item
00771         count = 0
00772         for thewbItem in self.itemFilter(result):
00773             newItem = etree.XML(self.common.mnvItem)
00774             # These videos are only viewable in the US so add a country indicator
00775             etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}country").text = u'us'
00776             # Extract and massage data
00777             tmpLink = self.linkFilter(thewbItem)[0].text.strip()
00778             link = self.common.ampReplace(u'file://%s/nv_python_libs/configs/HTML/thewb.html?videocode=%s' % (baseProcessingDir, tmpLink[tmpLink.rfind(u'/')+1:]))
00779             etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}customhtml").text = 'true'
00780             # Convert the pubDate '2010-05-02T11:23:25-07:00' to a MNV pubdate format
00781             pubdate = self.pubdateFilter(thewbItem)
00782             if len(pubdate):
00783                 pubdate = pubdate[0].text[:-6]
00784                 pubdate = time.strptime(pubdate, '%Y-%m-%dT%H:%M:%S')
00785                 pubdate = time.strftime(self.common.pubDateFormat, pubdate)
00786             else:
00787                 pubdate = datetime.datetime.now().strftime(self.common.pubDateFormat)
00788             title = self.common.massageText(self.titleFilter(thewbItem)[0].text.strip())
00789             tmptitle = None
00790             descList = self.descFilter2(etree.parse(StringIO(self.descFilter1(thewbItem)[0].text), self.HTMLparser))
00791             description = None
00792             seasonNum = None
00793             episodeNum = None
00794             for eText in descList:
00795                 if eText == '\n\t':
00796                     continue
00797                 eText = eText.strip().encode('UTF-8')
00798                 if not description:
00799                     description = eText
00800                     continue
00801                 try:
00802                     if eText.startswith(u'Season: ') or eText.startswith(u'EP: '):
00803                         s_e = eText.replace(u'Season:',u'').replace(u', Episode:',u'').replace(u'EP:',u'').strip().split(u' ')
00804                         if len(s_e) == 1 and can_int(s_e[0].strip()):
00805                             eText = u'Ep(%02d)' % int(s_e[0].strip())
00806                             episodeNum = s_e[0].strip()
00807                         elif len(s_e) == 2 and can_int(s_e[0].strip()) and can_int(s_e[1].strip()):
00808                             eText = u'S%02dE%02d' % (int(s_e[0].strip()), int(s_e[1].strip()))
00809                             seasonNum = s_e[0].strip()
00810                             episodeNum = s_e[1].strip()
00811                         title = title.replace(u'-', u'–')
00812                         index = title.find(u'–')
00813                         if index != -1:
00814                             tmptitle = u'%s: %s %s' % (title[:index].strip(), eText.strip(), title[index:].strip())
00815                         else:
00816                             tmptitle = u'%s %s' % (title, eText.strip())
00817                         continue
00818                     elif eText.startswith(u'Running Time: '):
00819                         videoDuration = eText.replace(u'Running Time: ', u'').strip().split(u':')
00820                         if not len(videoDuration):
00821                             continue
00822                         videoSeconds = False
00823                         try:
00824                             if len(videoDuration) == 1:
00825                                 videoSeconds = int(videoDuration[0])
00826                             elif len(videoDuration) == 2:
00827                                 videoSeconds = int(videoDuration[0])*60+int(videoDuration[1])
00828                             elif len(videoDuration) == 3:
00829                                 videoSeconds = int(videoDuration[0])*3600+int(videoDuration[1])*60+int(videoDuration[2])
00830                             if videoSeconds:
00831                                 self.itemDwnLink(newItem)[0].attrib['duration'] = unicode(videoSeconds)
00832                         except:
00833                             pass
00834                 except UnicodeDecodeError:
00835                     continue
00836 
00837             if tmptitle:
00838                 title = tmptitle
00839             title = self.common.massageText(title.strip())
00840             description = self.common.massageText(description.strip())
00841             # Insert data into a new item element
00842             newItem.find('title').text = title
00843             newItem.find('author').text = self.itemAuthor
00844             newItem.find('pubDate').text = pubdate
00845             newItem.find('description').text = description
00846             newItem.find('link').text = link
00847             self.itemDwnLink(newItem)[0].attrib['url'] = link
00848             try:
00849                 self.itemThumbNail(newItem)[0].attrib['url'] = self.common.ampReplace(self.itemThumbNail(thewbItem)[0].attrib['url'])
00850             except IndexError:
00851                 pass
00852             self.itemDwnLink(newItem)[0].attrib['lang'] = self.channelLanguage
00853             if seasonNum:
00854                 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}season").text = seasonNum
00855             if episodeNum:
00856                 etree.SubElement(newItem, "{http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format}episode").text = episodeNum
00857             # Merge RSS results with search results and override any duplicates with the RSS item
00858             showItems[urlName][0][title.lower()] = newItem
00859             if urlMax: # Check of the maximum items to processes has been met
00860                 count+=1
00861                 if count > urlMax:
00862                     break
00863         return
00864     # end createItems()
00865 # end Videos() class
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends