MythTV  0.26-pre
tedtalks_api.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 # -*- coding: UTF-8 -*-
00003 # ----------------------
00004 # Name: tedtalks_api - Simple-to-use Python interface to the TedTalks RSS feeds
00005 #                       (http://www.ted.com)
00006 # Python Script
00007 # Author:   R.D. Vaughan
00008 # Purpose:  This python script is intended to perform a variety of utility functions to
00009 #           search and access text metadata, video and image URLs from TedTalks Web site.
00010 #
00011 # License:Creative Commons GNU GPL v2
00012 # (http://creativecommons.org/licenses/GPL/2.0/)
00013 #-------------------------------------
00014 __title__ ="tedtalks_api - Simple-to-use Python interface to the TedTalks videos (http://www.ted.com)"
00015 __author__="R.D. Vaughan"
00016 __purpose__='''
00017 This python script is intended to perform a variety of utility functions to search and access text
00018 meta data, video and image URLs from the TedTalks Web site. These routines process videos
00019 provided by TedTalks (http://www.ted.com). The specific TedTalks RSS feeds that are processed are controled through a user XML preference file usually found at
00020 "~/.mythtv/MythNetvision/userGrabberPrefs/tedtalks.xml"
00021 '''
00022 
00023 __version__="v0.1.0"
00024 # 0.1.0 Initial development
00025 
00026 import os, struct, sys, re, time, datetime, shutil, urllib
00027 from string import capitalize
00028 import logging
00029 from threading import Thread
00030 from copy import deepcopy
00031 from operator import itemgetter, attrgetter
00032 
00033 from tedtalks_exceptions import (TedTalksUrlError, TedTalksHttpError, TedTalksRssError, TedTalksVideoNotFound, TedTalksConfigFileError, TedTalksUrlDownloadError)
00034 
00035 class OutStreamEncoder(object):
00036     """Wraps a stream with an encoder"""
00037     def __init__(self, outstream, encoding=None):
00038         self.out = outstream
00039         if not encoding:
00040             self.encoding = sys.getfilesystemencoding()
00041         else:
00042             self.encoding = encoding
00043 
00044     def write(self, obj):
00045         """Wraps the output stream, encoding Unicode strings with the specified encoding"""
00046         if isinstance(obj, unicode):
00047             try:
00048                 self.out.write(obj.encode(self.encoding))
00049             except IOError:
00050                 pass
00051         else:
00052             try:
00053                 self.out.write(obj)
00054             except IOError:
00055                 pass
00056 
00057     def __getattr__(self, attr):
00058         """Delegate everything but write to the stream"""
00059         return getattr(self.out, attr)
00060 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
00061 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
00062 
00063 
00064 try:
00065     from StringIO import StringIO
00066     from lxml import etree
00067 except Exception, e:
00068     sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
00069     sys.exit(1)
00070 
00071 # Check that the lxml library is current enough
00072 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html)
00073 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later"
00074 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package
00075 version = ''
00076 for digit in etree.LIBXML_VERSION:
00077     version+=str(digit)+'.'
00078 version = version[:-1]
00079 if version < '2.7.2':
00080     sys.stderr.write(u'''
00081 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
00082           At least "libxml" version 2.7.2 must be installed. Your version is (%s).
00083 ''' % version)
00084     sys.exit(1)
00085 
00086 # Used for debugging
00087 #import nv_python_libs.mashups.mashups_api as target
00088 try:
00089     '''Import the python mashups support classes
00090     '''
00091     import nv_python_libs.mashups.mashups_api as mashups_api
00092 except Exception, e:
00093     sys.stderr.write('''
00094 The subdirectory "nv_python_libs/mashups" containing the modules mashups_api and
00095 mashups_exceptions.py (v0.1.0 or greater),
00096 They should have been included with the distribution of tedtalks.py.
00097 Error(%s)
00098 ''' % e)
00099     sys.exit(1)
00100 if mashups_api.__version__ < '0.1.0':
00101     sys.stderr.write("\n! Error: Your current installed mashups_api.py version is (%s)\nYou must at least have version (0.1.0) or higher.\n" % mashups_api.__version__)
00102     sys.exit(1)
00103 
00104 
00105 class Videos(object):
00106     """Main interface to http://www.ted.com
00107     This is done to support a common naming framework for all python Netvision plugins no matter their
00108     site target.
00109 
00110     Supports search methods
00111     The apikey is a not required to access http://www.ted.com
00112     """
00113     def __init__(self,
00114                 apikey,
00115                 mythtv = True,
00116                 interactive = False,
00117                 select_first = False,
00118                 debug = False,
00119                 custom_ui = None,
00120                 language = None,
00121                 search_all_languages = False,
00122                 ):
00123         """apikey (str/unicode):
00124             Specify the target site API key. Applications need their own key in some cases
00125 
00126         mythtv (True/False):
00127             When True, the returned meta data is being returned has the key and values massaged to match MythTV
00128             When False, the returned meta data  is being returned matches what target site returned
00129 
00130         interactive (True/False): (This option is not supported by all target site apis)
00131             When True, uses built-in console UI is used to select the correct show.
00132             When False, the first search result is used.
00133 
00134         select_first (True/False): (This option is not supported currently implemented in any grabbers)
00135             Automatically selects the first series search result (rather
00136             than showing the user a list of more than one series).
00137             Is overridden by interactive = False, or specifying a custom_ui
00138 
00139         debug (True/False):
00140              shows verbose debugging information
00141 
00142         custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers)
00143             A callable subclass of interactive class (overrides interactive option)
00144 
00145         language (2 character language abbreviation): (This option is not supported by all target site apis)
00146             The language of the returned data. Is also the language search
00147             uses. Default is "en" (English). For full list, run..
00148 
00149         search_all_languages (True/False): (This option is not supported by all target site apis)
00150             By default, a Netvision grabber will only search in the language specified using
00151             the language option. When this is True, it will search for the
00152             show in any language
00153 
00154         """
00155         self.config = {}
00156 
00157         if apikey is not None:
00158             self.config['apikey'] = apikey
00159         else:
00160             pass    # TedTalks does not require an apikey
00161 
00162         self.config['debug_enabled'] = debug # show debugging messages
00163         self.common = common
00164         self.common.debug = debug   # Set the common function debug level
00165 
00166         self.log_name = u'TedTalks_Grabber'
00167         self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name)
00168         self.logger = self.common.logger # Setups the logger (self.log.debug() etc)
00169 
00170         self.config['custom_ui'] = custom_ui
00171 
00172         self.config['interactive'] = interactive
00173 
00174         self.config['select_first'] = select_first
00175 
00176         self.config['search_all_languages'] = search_all_languages
00177 
00178         self.error_messages = {'TedTalksUrlError': u"! Error: The URL (%s) cause the exception error (%s)\n", 'TedTalksHttpError': u"! Error: An HTTP communications error with the TedTalks was raised (%s)\n", 'TedTalksRssError': u"! Error: Invalid RSS meta data\nwas received from the TedTalks error (%s). Skipping item.\n", 'TedTalksVideoNotFound': u"! Error: Video search with the TedTalks did not return any results (%s)\n", 'TedTalksConfigFileError': u"! Error: tedtalks_config.xml file missing\nit should be located in and named as (%s).\n", 'TedTalksUrlDownloadError': u"! Error: Downloading a RSS feed or Web page (%s).\n", }
00179 
00180         # Channel details and search results
00181         self.channel = {'channel_title': u'TedTalks', 'channel_link': u'http://www.ted.com', 'channel_description': u"TED is a small nonprofit devoted to Ideas Worth Spreading.", 'channel_numresults': 0, 'channel_returned': 1, u'channel_startindex': 0}
00182 
00183         self.channel_icon = u'%SHAREDIR%/mythnetvision/icons/tedtalks.png'
00184 
00185         self.config[u'image_extentions'] = ["png", "jpg", "bmp"] # Acceptable image extentions
00186 
00187         # Initialize Mashups api variables
00188         mashups_api.common = self.common
00189         self.mashups_api = mashups_api.Videos(u'')
00190         self.mashups_api.channel = self.channel
00191         if language:
00192             self.mashups_api.config['language'] = self.config['language']
00193         self.mashups_api.config['debug_enabled'] = self.config['debug_enabled']
00194         self.mashups_api.getUserPreferences = self.getUserPreferences
00195     # end __init__()
00196 
00197 ###########################################################################################################
00198 #
00199 # Start - Utility functions
00200 #
00201 ###########################################################################################################
00202 
00203     def getTedTalksConfig(self):
00204         ''' Read the MNV TedTalks grabber "tedtalks_config.xml" configuration file
00205         return nothing
00206         '''
00207         # Read the grabber tedtalks_config.xml configuration file
00208         url = u'file://%s/nv_python_libs/configs/XML/tedtalks_config.xml' % (baseProcessingDir, )
00209         if not os.path.isfile(url[7:]):
00210             raise TedTalksConfigFileError(self.error_messages['TedTalksConfigFileError'] % (url[7:], ))
00211 
00212         if self.config['debug_enabled']:
00213             print url
00214             print
00215         try:
00216             self.tedtalks_config = etree.parse(url)
00217         except Exception, errormsg:
00218             raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
00219         return
00220     # end getTedTalksConfig()
00221 
00222 
00223     def getUserPreferences(self):
00224         '''Read the tedtalks_config.xml and user preference tedtalks.xml file.
00225         If the tedtalks.xml file does not exist then create it.
00226         If the tedtalks.xml file is too old then update it.
00227         return nothing
00228         '''
00229         # Get tedtalks_config.xml
00230         self.getTedTalksConfig()
00231 
00232         # Check if the tedtalks.xml file exists
00233         userPreferenceFile = self.tedtalks_config.find('userPreferenceFile').text
00234         if userPreferenceFile[0] == '~':
00235              self.tedtalks_config.find('userPreferenceFile').text = u"%s%s" % (os.path.expanduser(u"~"), userPreferenceFile[1:])
00236         if os.path.isfile(self.tedtalks_config.find('userPreferenceFile').text):
00237             # Read the grabber tedtalks_config.xml configuration file
00238             url = u'file://%s' % (self.tedtalks_config.find('userPreferenceFile').text, )
00239             if self.config['debug_enabled']:
00240                 print url
00241                 print
00242             try:
00243                 self.userPrefs = etree.parse(url)
00244             except Exception, errormsg:
00245                 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
00246             create = False
00247         else:
00248             create = True
00249 
00250         # If required create/update the tedtalks.xml file
00251         self.updateTedTalks(create)
00252         return
00253     # end getUserPreferences()
00254 
00255     def updateTedTalks(self, create=False):
00256         ''' Create or update the tedtalks.xml user preferences file
00257         return nothing
00258         '''
00259         userDefaultFile = u'%s/nv_python_libs/configs/XML/defaultUserPrefs/tedtalks.xml' % (baseProcessingDir, )
00260         if os.path.isfile(userDefaultFile):
00261             # Read the default tedtalks.xml user preferences file
00262             url = u'file://%s' % (userDefaultFile, )
00263             if self.config['debug_enabled']:
00264                 print url
00265                 print
00266             try:
00267                 userTedTalks = etree.parse(url)
00268             except Exception, e:
00269                 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, e))
00270         else:
00271             raise Exception(u'!Error: The default TedTalk file is missing (%s)', userDefaultFile)
00272 
00273         # If there was an existing tedtalks.xml file then add any relevant user settings
00274         # to this new tedtalks.xml
00275         if not create:
00276             for showElement in self.userPrefs.xpath("//sourceURL"):
00277                 showName = showElement.getparent().attrib['name']
00278                 sourceName = showElement.attrib['name']
00279                 elements = userTedTalks.xpath("//sourceURL[@name=$showName]", showName=showName,)
00280                 if len(elements):
00281                     elements[0].attrib['enabled'] = showElement.attrib['enabled']
00282                     elements[0].attrib['parameter'] = showElement.attrib['parameter']
00283 
00284         if self.config['debug_enabled']:
00285             print "After any merging userTedTalks:"
00286             sys.stdout.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True))
00287             print
00288 
00289         # Save the tedtalks.xml file
00290         prefDir = self.tedtalks_config.find('userPreferenceFile').text.replace(u'/tedtalks.xml', u'')
00291         if not os.path.isdir(prefDir):
00292             os.makedirs(prefDir)
00293         fd = open(self.tedtalks_config.find('userPreferenceFile').text, 'w')
00294         fd.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True))
00295         fd.close()
00296 
00297         # Read the refreshed user config file
00298         try:
00299             self.userPrefs = etree.parse(self.tedtalks_config.find('userPreferenceFile').text)
00300             self.mashups_api.userPrefs = self.userPrefs
00301         except Exception, errormsg:
00302             raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg))
00303         return
00304     # end updateTedTalks()
00305 
00306 ###########################################################################################################
00307 #
00308 # End of Utility functions
00309 #
00310 ###########################################################################################################
00311 
00312     def searchTitle(self, title, pagenumber, pagelen):
00313         '''Key word video search of the TedTalks web site
00314         return an array of matching item elements
00315         return
00316         '''
00317         searchVar = self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text
00318         try:
00319             searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title.encode("utf-8")))
00320             searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber))
00321         except UnicodeDecodeError:
00322             searchVar = u'?q=%s' % ()
00323             searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title))
00324             searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber))
00325         url = searchVar
00326 
00327         if self.config['debug_enabled']:
00328             print url
00329             print
00330 
00331         self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text = url
00332 
00333         # Globally add all the xpath extentions to the "mythtv" namespace allowing access within the
00334         # XSLT stylesheets
00335         self.common.buildFunctionDict()
00336         mnvXpath = etree.FunctionNamespace('http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format')
00337         mnvXpath.prefix = 'mnvXpath'
00338         for key in self.common.functionDict.keys():
00339             mnvXpath[key] = common.functionDict[key]
00340 
00341         # Add the parameter element from the User preferences file
00342         paraMeter = self.userPrefs.find('search').xpath("//search//sourceURL[@enabled='true']/@parameter")
00343         if not len(paraMeter):
00344             raise Exception(u'TedTalks User preferences file "tedtalks.xml" does not have an enabled search with a "parameter" attribute.')
00345         etree.SubElement(self.tedtalks_config.find('searchURLS').xpath(".//url")[0], "parameter").text = paraMeter[0]
00346 
00347         # Perform a search
00348         try:
00349             resultTree = self.common.getUrlData(self.tedtalks_config.find('searchURLS'))
00350         except Exception, errormsg:
00351             raise TedTalksUrlDownloadError(self.error_messages['TedTalksUrlDownloadError'] % (errormsg))
00352 
00353         if resultTree is None:
00354             raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title)
00355 
00356         searchResults = resultTree.xpath('//result//item')
00357         if not len(searchResults):
00358             raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title)
00359 
00360         return searchResults
00361         # end searchTitle()
00362 
00363 
00364     def searchForVideos(self, title, pagenumber):
00365         """Common name for a video search. Used to interface with MythTV plugin NetVision
00366         """
00367         # Get tedtalks_config.xml
00368         self.getUserPreferences()
00369 
00370         if self.config['debug_enabled']:
00371             print "self.tedtalks_config:"
00372             sys.stdout.write(etree.tostring(self.tedtalks_config, encoding='UTF-8', pretty_print=True))
00373             print
00374 
00375         # Easier for debugging
00376 #        print self.searchTitle(title, pagenumber, self.page_limit)
00377 #        print
00378 #        sys.exit()
00379 
00380         try:
00381             data = self.searchTitle(title, pagenumber, self.page_limit)
00382         except TedTalksVideoNotFound, msg:
00383             sys.stderr.write(u"%s\n" % msg)
00384             sys.exit(0)
00385         except TedTalksUrlError, msg:
00386             sys.stderr.write(u'%s\n' % msg)
00387             sys.exit(1)
00388         except TedTalksHttpError, msg:
00389             sys.stderr.write(self.error_messages['TedTalksHttpError'] % msg)
00390             sys.exit(1)
00391         except TedTalksRssError, msg:
00392             sys.stderr.write(self.error_messages['TedTalksRssError'] % msg)
00393             sys.exit(1)
00394         except Exception, e:
00395             sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e))
00396             sys.exit(1)
00397 
00398         # Create RSS element tree
00399         rssTree = etree.XML(self.common.mnvRSS+u'</rss>')
00400 
00401         # Set the paging values
00402         if len(data) == self.page_limit:
00403             self.channel['channel_returned'] = len(data)
00404             self.channel['channel_startindex'] = len(data)+(self.page_limit*(int(pagenumber)-1))
00405             self.channel['channel_numresults'] = len(data)+(self.page_limit*(int(pagenumber)-1)+1)
00406         else:
00407             self.channel['channel_returned'] = len(data)+(self.page_limit*(int(pagenumber)-1))
00408             self.channel['channel_startindex'] = len(data)
00409             self.channel['channel_numresults'] = len(data)
00410 
00411         # Add the Channel element tree
00412         channelTree = self.common.mnvChannelElement(self.channel)
00413         rssTree.append(channelTree)
00414 
00415         for item in data:
00416             channelTree.append(item)
00417 
00418         # Output the MNV search results
00419         sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
00420         sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True))
00421         sys.exit(0)
00422     # end searchForVideos()
00423 
00424     def displayTreeView(self):
00425         '''Gather all videos for each TedTalks show
00426         Display the results and exit
00427         '''
00428         self.mashups_api.page_limit = self.page_limit
00429         self.mashups_api.grabber_title = self.grabber_title
00430         self.mashups_api.mashup_title = self.mashup_title
00431         self.mashups_api.channel_icon = self.channel_icon
00432         self.mashups_api.mashup_title = u'tedtalks'
00433 
00434         # Easier for debugging
00435 #        self.mashups_api.displayTreeView()
00436 #        print
00437 #        sys.exit(1)
00438 
00439         try:
00440             self.mashups_api.Search = False
00441             self.mashups_api.displayTreeView()
00442         except Exception, e:
00443             sys.stderr.write(u"! Error: During a TedTalks Video treeview\nError(%s)\n" % (e))
00444             sys.exit(1)
00445 
00446         sys.exit(0)
00447     # end displayTreeView()
00448 # end Videos() class
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends