|
MythTV
0.26-pre
|
00001 #!/usr/bin/env python 00002 # -*- coding: UTF-8 -*- 00003 # ---------------------- 00004 # Name: tedtalks_api - Simple-to-use Python interface to the TedTalks RSS feeds 00005 # (http://www.ted.com) 00006 # Python Script 00007 # Author: R.D. Vaughan 00008 # Purpose: This python script is intended to perform a variety of utility functions to 00009 # search and access text metadata, video and image URLs from TedTalks Web site. 00010 # 00011 # License:Creative Commons GNU GPL v2 00012 # (http://creativecommons.org/licenses/GPL/2.0/) 00013 #------------------------------------- 00014 __title__ ="tedtalks_api - Simple-to-use Python interface to the TedTalks videos (http://www.ted.com)" 00015 __author__="R.D. Vaughan" 00016 __purpose__=''' 00017 This python script is intended to perform a variety of utility functions to search and access text 00018 meta data, video and image URLs from the TedTalks Web site. These routines process videos 00019 provided by TedTalks (http://www.ted.com). The specific TedTalks RSS feeds that are processed are controled through a user XML preference file usually found at 00020 "~/.mythtv/MythNetvision/userGrabberPrefs/tedtalks.xml" 00021 ''' 00022 00023 __version__="v0.1.0" 00024 # 0.1.0 Initial development 00025 00026 import os, struct, sys, re, time, datetime, shutil, urllib 00027 from string import capitalize 00028 import logging 00029 from threading import Thread 00030 from copy import deepcopy 00031 from operator import itemgetter, attrgetter 00032 00033 from tedtalks_exceptions import (TedTalksUrlError, TedTalksHttpError, TedTalksRssError, TedTalksVideoNotFound, TedTalksConfigFileError, TedTalksUrlDownloadError) 00034 00035 class OutStreamEncoder(object): 00036 """Wraps a stream with an encoder""" 00037 def __init__(self, outstream, encoding=None): 00038 self.out = outstream 00039 if not encoding: 00040 self.encoding = sys.getfilesystemencoding() 00041 else: 00042 self.encoding = encoding 00043 00044 def write(self, obj): 00045 """Wraps the output stream, encoding Unicode strings with the specified encoding""" 00046 if isinstance(obj, unicode): 00047 try: 00048 self.out.write(obj.encode(self.encoding)) 00049 except IOError: 00050 pass 00051 else: 00052 try: 00053 self.out.write(obj) 00054 except IOError: 00055 pass 00056 00057 def __getattr__(self, attr): 00058 """Delegate everything but write to the stream""" 00059 return getattr(self.out, attr) 00060 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8') 00061 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8') 00062 00063 00064 try: 00065 from StringIO import StringIO 00066 from lxml import etree 00067 except Exception, e: 00068 sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e) 00069 sys.exit(1) 00070 00071 # Check that the lxml library is current enough 00072 # From the lxml documents it states: (http://codespeak.net/lxml/installation.html) 00073 # "If you want to use XPath, do not use libxml2 2.6.27. We recommend libxml2 2.7.2 or later" 00074 # Testing was performed with the Ubuntu 9.10 "python-lxml" version "2.1.5-1ubuntu2" repository package 00075 version = '' 00076 for digit in etree.LIBXML_VERSION: 00077 version+=str(digit)+'.' 00078 version = version[:-1] 00079 if version < '2.7.2': 00080 sys.stderr.write(u''' 00081 ! Error - The installed version of the "lxml" python library "libxml" version is too old. 00082 At least "libxml" version 2.7.2 must be installed. Your version is (%s). 00083 ''' % version) 00084 sys.exit(1) 00085 00086 # Used for debugging 00087 #import nv_python_libs.mashups.mashups_api as target 00088 try: 00089 '''Import the python mashups support classes 00090 ''' 00091 import nv_python_libs.mashups.mashups_api as mashups_api 00092 except Exception, e: 00093 sys.stderr.write(''' 00094 The subdirectory "nv_python_libs/mashups" containing the modules mashups_api and 00095 mashups_exceptions.py (v0.1.0 or greater), 00096 They should have been included with the distribution of tedtalks.py. 00097 Error(%s) 00098 ''' % e) 00099 sys.exit(1) 00100 if mashups_api.__version__ < '0.1.0': 00101 sys.stderr.write("\n! Error: Your current installed mashups_api.py version is (%s)\nYou must at least have version (0.1.0) or higher.\n" % mashups_api.__version__) 00102 sys.exit(1) 00103 00104 00105 class Videos(object): 00106 """Main interface to http://www.ted.com 00107 This is done to support a common naming framework for all python Netvision plugins no matter their 00108 site target. 00109 00110 Supports search methods 00111 The apikey is a not required to access http://www.ted.com 00112 """ 00113 def __init__(self, 00114 apikey, 00115 mythtv = True, 00116 interactive = False, 00117 select_first = False, 00118 debug = False, 00119 custom_ui = None, 00120 language = None, 00121 search_all_languages = False, 00122 ): 00123 """apikey (str/unicode): 00124 Specify the target site API key. Applications need their own key in some cases 00125 00126 mythtv (True/False): 00127 When True, the returned meta data is being returned has the key and values massaged to match MythTV 00128 When False, the returned meta data is being returned matches what target site returned 00129 00130 interactive (True/False): (This option is not supported by all target site apis) 00131 When True, uses built-in console UI is used to select the correct show. 00132 When False, the first search result is used. 00133 00134 select_first (True/False): (This option is not supported currently implemented in any grabbers) 00135 Automatically selects the first series search result (rather 00136 than showing the user a list of more than one series). 00137 Is overridden by interactive = False, or specifying a custom_ui 00138 00139 debug (True/False): 00140 shows verbose debugging information 00141 00142 custom_ui (xx_ui.BaseUI subclass): (This option is not supported currently implemented in any grabbers) 00143 A callable subclass of interactive class (overrides interactive option) 00144 00145 language (2 character language abbreviation): (This option is not supported by all target site apis) 00146 The language of the returned data. Is also the language search 00147 uses. Default is "en" (English). For full list, run.. 00148 00149 search_all_languages (True/False): (This option is not supported by all target site apis) 00150 By default, a Netvision grabber will only search in the language specified using 00151 the language option. When this is True, it will search for the 00152 show in any language 00153 00154 """ 00155 self.config = {} 00156 00157 if apikey is not None: 00158 self.config['apikey'] = apikey 00159 else: 00160 pass # TedTalks does not require an apikey 00161 00162 self.config['debug_enabled'] = debug # show debugging messages 00163 self.common = common 00164 self.common.debug = debug # Set the common function debug level 00165 00166 self.log_name = u'TedTalks_Grabber' 00167 self.common.logger = self.common.initLogger(path=sys.stderr, log_name=self.log_name) 00168 self.logger = self.common.logger # Setups the logger (self.log.debug() etc) 00169 00170 self.config['custom_ui'] = custom_ui 00171 00172 self.config['interactive'] = interactive 00173 00174 self.config['select_first'] = select_first 00175 00176 self.config['search_all_languages'] = search_all_languages 00177 00178 self.error_messages = {'TedTalksUrlError': u"! Error: The URL (%s) cause the exception error (%s)\n", 'TedTalksHttpError': u"! Error: An HTTP communications error with the TedTalks was raised (%s)\n", 'TedTalksRssError': u"! Error: Invalid RSS meta data\nwas received from the TedTalks error (%s). Skipping item.\n", 'TedTalksVideoNotFound': u"! Error: Video search with the TedTalks did not return any results (%s)\n", 'TedTalksConfigFileError': u"! Error: tedtalks_config.xml file missing\nit should be located in and named as (%s).\n", 'TedTalksUrlDownloadError': u"! Error: Downloading a RSS feed or Web page (%s).\n", } 00179 00180 # Channel details and search results 00181 self.channel = {'channel_title': u'TedTalks', 'channel_link': u'http://www.ted.com', 'channel_description': u"TED is a small nonprofit devoted to Ideas Worth Spreading.", 'channel_numresults': 0, 'channel_returned': 1, u'channel_startindex': 0} 00182 00183 self.channel_icon = u'%SHAREDIR%/mythnetvision/icons/tedtalks.png' 00184 00185 self.config[u'image_extentions'] = ["png", "jpg", "bmp"] # Acceptable image extentions 00186 00187 # Initialize Mashups api variables 00188 mashups_api.common = self.common 00189 self.mashups_api = mashups_api.Videos(u'') 00190 self.mashups_api.channel = self.channel 00191 if language: 00192 self.mashups_api.config['language'] = self.config['language'] 00193 self.mashups_api.config['debug_enabled'] = self.config['debug_enabled'] 00194 self.mashups_api.getUserPreferences = self.getUserPreferences 00195 # end __init__() 00196 00197 ########################################################################################################### 00198 # 00199 # Start - Utility functions 00200 # 00201 ########################################################################################################### 00202 00203 def getTedTalksConfig(self): 00204 ''' Read the MNV TedTalks grabber "tedtalks_config.xml" configuration file 00205 return nothing 00206 ''' 00207 # Read the grabber tedtalks_config.xml configuration file 00208 url = u'file://%s/nv_python_libs/configs/XML/tedtalks_config.xml' % (baseProcessingDir, ) 00209 if not os.path.isfile(url[7:]): 00210 raise TedTalksConfigFileError(self.error_messages['TedTalksConfigFileError'] % (url[7:], )) 00211 00212 if self.config['debug_enabled']: 00213 print url 00214 print 00215 try: 00216 self.tedtalks_config = etree.parse(url) 00217 except Exception, errormsg: 00218 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg)) 00219 return 00220 # end getTedTalksConfig() 00221 00222 00223 def getUserPreferences(self): 00224 '''Read the tedtalks_config.xml and user preference tedtalks.xml file. 00225 If the tedtalks.xml file does not exist then create it. 00226 If the tedtalks.xml file is too old then update it. 00227 return nothing 00228 ''' 00229 # Get tedtalks_config.xml 00230 self.getTedTalksConfig() 00231 00232 # Check if the tedtalks.xml file exists 00233 userPreferenceFile = self.tedtalks_config.find('userPreferenceFile').text 00234 if userPreferenceFile[0] == '~': 00235 self.tedtalks_config.find('userPreferenceFile').text = u"%s%s" % (os.path.expanduser(u"~"), userPreferenceFile[1:]) 00236 if os.path.isfile(self.tedtalks_config.find('userPreferenceFile').text): 00237 # Read the grabber tedtalks_config.xml configuration file 00238 url = u'file://%s' % (self.tedtalks_config.find('userPreferenceFile').text, ) 00239 if self.config['debug_enabled']: 00240 print url 00241 print 00242 try: 00243 self.userPrefs = etree.parse(url) 00244 except Exception, errormsg: 00245 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg)) 00246 create = False 00247 else: 00248 create = True 00249 00250 # If required create/update the tedtalks.xml file 00251 self.updateTedTalks(create) 00252 return 00253 # end getUserPreferences() 00254 00255 def updateTedTalks(self, create=False): 00256 ''' Create or update the tedtalks.xml user preferences file 00257 return nothing 00258 ''' 00259 userDefaultFile = u'%s/nv_python_libs/configs/XML/defaultUserPrefs/tedtalks.xml' % (baseProcessingDir, ) 00260 if os.path.isfile(userDefaultFile): 00261 # Read the default tedtalks.xml user preferences file 00262 url = u'file://%s' % (userDefaultFile, ) 00263 if self.config['debug_enabled']: 00264 print url 00265 print 00266 try: 00267 userTedTalks = etree.parse(url) 00268 except Exception, e: 00269 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, e)) 00270 else: 00271 raise Exception(u'!Error: The default TedTalk file is missing (%s)', userDefaultFile) 00272 00273 # If there was an existing tedtalks.xml file then add any relevant user settings 00274 # to this new tedtalks.xml 00275 if not create: 00276 for showElement in self.userPrefs.xpath("//sourceURL"): 00277 showName = showElement.getparent().attrib['name'] 00278 sourceName = showElement.attrib['name'] 00279 elements = userTedTalks.xpath("//sourceURL[@name=$showName]", showName=showName,) 00280 if len(elements): 00281 elements[0].attrib['enabled'] = showElement.attrib['enabled'] 00282 elements[0].attrib['parameter'] = showElement.attrib['parameter'] 00283 00284 if self.config['debug_enabled']: 00285 print "After any merging userTedTalks:" 00286 sys.stdout.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True)) 00287 print 00288 00289 # Save the tedtalks.xml file 00290 prefDir = self.tedtalks_config.find('userPreferenceFile').text.replace(u'/tedtalks.xml', u'') 00291 if not os.path.isdir(prefDir): 00292 os.makedirs(prefDir) 00293 fd = open(self.tedtalks_config.find('userPreferenceFile').text, 'w') 00294 fd.write(etree.tostring(userTedTalks, encoding='UTF-8', pretty_print=True)) 00295 fd.close() 00296 00297 # Read the refreshed user config file 00298 try: 00299 self.userPrefs = etree.parse(self.tedtalks_config.find('userPreferenceFile').text) 00300 self.mashups_api.userPrefs = self.userPrefs 00301 except Exception, errormsg: 00302 raise TedTalksUrlError(self.error_messages['TedTalksUrlError'] % (url, errormsg)) 00303 return 00304 # end updateTedTalks() 00305 00306 ########################################################################################################### 00307 # 00308 # End of Utility functions 00309 # 00310 ########################################################################################################### 00311 00312 def searchTitle(self, title, pagenumber, pagelen): 00313 '''Key word video search of the TedTalks web site 00314 return an array of matching item elements 00315 return 00316 ''' 00317 searchVar = self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text 00318 try: 00319 searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title.encode("utf-8"))) 00320 searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber)) 00321 except UnicodeDecodeError: 00322 searchVar = u'?q=%s' % () 00323 searchVar = searchVar.replace(u'SEARCHTERM', urllib.quote_plus(title)) 00324 searchVar = searchVar.replace(u'PAGENUM', unicode(pagenumber)) 00325 url = searchVar 00326 00327 if self.config['debug_enabled']: 00328 print url 00329 print 00330 00331 self.tedtalks_config.find('searchURLS').xpath(".//href")[0].text = url 00332 00333 # Globally add all the xpath extentions to the "mythtv" namespace allowing access within the 00334 # XSLT stylesheets 00335 self.common.buildFunctionDict() 00336 mnvXpath = etree.FunctionNamespace('http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format') 00337 mnvXpath.prefix = 'mnvXpath' 00338 for key in self.common.functionDict.keys(): 00339 mnvXpath[key] = common.functionDict[key] 00340 00341 # Add the parameter element from the User preferences file 00342 paraMeter = self.userPrefs.find('search').xpath("//search//sourceURL[@enabled='true']/@parameter") 00343 if not len(paraMeter): 00344 raise Exception(u'TedTalks User preferences file "tedtalks.xml" does not have an enabled search with a "parameter" attribute.') 00345 etree.SubElement(self.tedtalks_config.find('searchURLS').xpath(".//url")[0], "parameter").text = paraMeter[0] 00346 00347 # Perform a search 00348 try: 00349 resultTree = self.common.getUrlData(self.tedtalks_config.find('searchURLS')) 00350 except Exception, errormsg: 00351 raise TedTalksUrlDownloadError(self.error_messages['TedTalksUrlDownloadError'] % (errormsg)) 00352 00353 if resultTree is None: 00354 raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title) 00355 00356 searchResults = resultTree.xpath('//result//item') 00357 if not len(searchResults): 00358 raise TedTalksVideoNotFound(u"No TedTalks Video matches found for search value (%s)" % title) 00359 00360 return searchResults 00361 # end searchTitle() 00362 00363 00364 def searchForVideos(self, title, pagenumber): 00365 """Common name for a video search. Used to interface with MythTV plugin NetVision 00366 """ 00367 # Get tedtalks_config.xml 00368 self.getUserPreferences() 00369 00370 if self.config['debug_enabled']: 00371 print "self.tedtalks_config:" 00372 sys.stdout.write(etree.tostring(self.tedtalks_config, encoding='UTF-8', pretty_print=True)) 00373 print 00374 00375 # Easier for debugging 00376 # print self.searchTitle(title, pagenumber, self.page_limit) 00377 # print 00378 # sys.exit() 00379 00380 try: 00381 data = self.searchTitle(title, pagenumber, self.page_limit) 00382 except TedTalksVideoNotFound, msg: 00383 sys.stderr.write(u"%s\n" % msg) 00384 sys.exit(0) 00385 except TedTalksUrlError, msg: 00386 sys.stderr.write(u'%s\n' % msg) 00387 sys.exit(1) 00388 except TedTalksHttpError, msg: 00389 sys.stderr.write(self.error_messages['TedTalksHttpError'] % msg) 00390 sys.exit(1) 00391 except TedTalksRssError, msg: 00392 sys.stderr.write(self.error_messages['TedTalksRssError'] % msg) 00393 sys.exit(1) 00394 except Exception, e: 00395 sys.stderr.write(u"! Error: Unknown error during a Video search (%s)\nError(%s)\n" % (title, e)) 00396 sys.exit(1) 00397 00398 # Create RSS element tree 00399 rssTree = etree.XML(self.common.mnvRSS+u'</rss>') 00400 00401 # Set the paging values 00402 if len(data) == self.page_limit: 00403 self.channel['channel_returned'] = len(data) 00404 self.channel['channel_startindex'] = len(data)+(self.page_limit*(int(pagenumber)-1)) 00405 self.channel['channel_numresults'] = len(data)+(self.page_limit*(int(pagenumber)-1)+1) 00406 else: 00407 self.channel['channel_returned'] = len(data)+(self.page_limit*(int(pagenumber)-1)) 00408 self.channel['channel_startindex'] = len(data) 00409 self.channel['channel_numresults'] = len(data) 00410 00411 # Add the Channel element tree 00412 channelTree = self.common.mnvChannelElement(self.channel) 00413 rssTree.append(channelTree) 00414 00415 for item in data: 00416 channelTree.append(item) 00417 00418 # Output the MNV search results 00419 sys.stdout.write(u'<?xml version="1.0" encoding="UTF-8"?>\n') 00420 sys.stdout.write(etree.tostring(rssTree, encoding='UTF-8', pretty_print=True)) 00421 sys.exit(0) 00422 # end searchForVideos() 00423 00424 def displayTreeView(self): 00425 '''Gather all videos for each TedTalks show 00426 Display the results and exit 00427 ''' 00428 self.mashups_api.page_limit = self.page_limit 00429 self.mashups_api.grabber_title = self.grabber_title 00430 self.mashups_api.mashup_title = self.mashup_title 00431 self.mashups_api.channel_icon = self.channel_icon 00432 self.mashups_api.mashup_title = u'tedtalks' 00433 00434 # Easier for debugging 00435 # self.mashups_api.displayTreeView() 00436 # print 00437 # sys.exit(1) 00438 00439 try: 00440 self.mashups_api.Search = False 00441 self.mashups_api.displayTreeView() 00442 except Exception, e: 00443 sys.stderr.write(u"! Error: During a TedTalks Video treeview\nError(%s)\n" % (e)) 00444 sys.exit(1) 00445 00446 sys.exit(0) 00447 # end displayTreeView() 00448 # end Videos() class
1.7.6.1