MythTV  0.26-pre
rssparse.cpp
Go to the documentation of this file.
00001 #include <QFile>
00002 #include <QDataStream>
00003 #include <QDomDocument>
00004 #include <QDomImplementation>
00005 #include <QHash>
00006 #include <QLocale>
00007 #include <QUrl>
00008 #include <QFileInfo>
00009 #include <QRegExp>
00010 
00011 #include "rssparse.h"
00012 #include "netutils.h"
00013 #include "mythcontext.h"
00014 #include "mythdirs.h"
00015 #include "mythmiscutil.h"
00016 
00017 using namespace std;
00018 
00019 ResultItem::ResultItem(const QString& title, const QString& subtitle,
00020               const QString& desc, const QString& URL,
00021               const QString& thumbnail, const QString& mediaURL,
00022               const QString& author, const QDateTime& date,
00023               const QString& time, const QString& rating,
00024               const off_t& filesize, const QString& player,
00025               const QStringList& playerargs, const QString& download,
00026               const QStringList& downloadargs, const uint& width,
00027               const uint& height, const QString& language,
00028               const bool& downloadable, const QStringList& countries,
00029               const uint& season, const uint& episode,
00030               const bool& customhtml)
00031 {
00032     m_title = title;
00033     m_subtitle = subtitle;
00034     m_desc = desc;
00035     m_URL = URL;
00036     m_thumbnail = thumbnail;
00037     m_mediaURL = mediaURL;
00038     m_author = author;
00039     if (!date.isNull())
00040         m_date = date;
00041     else
00042         m_date = QDateTime::fromString("0000-00-00T00:00:00", Qt::ISODate);
00043     m_time = time;
00044     m_rating = rating;
00045     m_filesize = filesize;
00046     m_player = player;
00047     m_playerargs = playerargs;
00048     m_download = download;
00049     m_downloadargs = downloadargs;
00050     m_width = width;
00051     m_height = height;
00052     m_language = language;
00053     m_downloadable = downloadable;
00054     m_countries = countries;
00055     m_season = season;
00056     m_episode = episode;
00057     m_customhtml = customhtml;
00058 }
00059 
00060 ResultItem::ResultItem()
00061 {
00062     m_date = QDateTime::fromString("0000-00-00T00:00:00", Qt::ISODate);
00063 }
00064 
00065 ResultItem::~ResultItem()
00066 {
00067 }
00068 
00069 void ResultItem::toMap(MetadataMap &metadataMap)
00070 {
00071     metadataMap["title"] = m_title;
00072     metadataMap["subtitle"] = m_subtitle;
00073     metadataMap["description"] = m_desc;
00074     metadataMap["url"] = m_URL;
00075     metadataMap["thumbnail"] = m_thumbnail;
00076     metadataMap["mediaurl"] = m_mediaURL;
00077     metadataMap["author"] = m_author;
00078 
00079     if (m_date.isNull())
00080         metadataMap["date"] = QString();
00081     else
00082         metadataMap["date"] = MythDateTimeToString(m_date, kDateFull);
00083 
00084     if (m_time.toInt() == 0)
00085         metadataMap["length"] = QString();
00086     else
00087     {
00088         QTime time(0,0,0,0);
00089         int secs = m_time.toInt();
00090         QTime fin = time.addSecs(secs);
00091         QString format;
00092         if (secs >= 3600)
00093             format = "H:mm:ss";
00094         else if (secs >= 600)
00095             format = "mm:ss";
00096         else if (secs >= 60)
00097             format = "m:ss";
00098         else
00099             format = ":ss";
00100         metadataMap["length"] = fin.toString(format);
00101     }
00102 
00103     if (m_rating == 0 || m_rating.isNull())
00104         metadataMap["rating"] = QString();
00105     else
00106         metadataMap["rating"] = m_rating;
00107 
00108     if (m_filesize == -1)
00109         metadataMap["filesize"] = QString();
00110     else if (m_filesize == 0 && !m_downloadable)
00111         metadataMap["filesize"] = QObject::tr("Web Only");
00112     else if (m_filesize == 0 && m_downloadable)
00113         metadataMap["filesize"] = QObject::tr("Downloadable");
00114     else
00115         metadataMap["filesize"] = QString::number(m_filesize);
00116 
00117     QString tmpSize;
00118     tmpSize.sprintf("%0.2f ", m_filesize / 1024.0 / 1024.0);
00119     tmpSize += QObject::tr("MB", "Megabytes");
00120     if (m_filesize == -1)
00121         metadataMap["filesize_str"] = QString();
00122     else if (m_filesize == 0 && !m_downloadable)
00123         metadataMap["filesize_str"] = QObject::tr("Web Only");
00124     else if (m_filesize == 0 && m_downloadable)
00125         metadataMap["filesize_str"] = QObject::tr("Downloadable");
00126     else
00127         metadataMap["filesize"] = tmpSize;
00128 
00129     metadataMap["player"] = m_player;
00130     metadataMap["playerargs"] = m_playerargs.join(", ");
00131     metadataMap["downloader"] = m_download;
00132     metadataMap["downloadargs"] = m_downloadargs.join(", ");
00133     if (m_width == 0)
00134         metadataMap["width"] = QString();
00135     else
00136         metadataMap["width"] = QString::number(m_width);
00137     if (m_height == 0)
00138         metadataMap["height"] = QString();
00139     else
00140         metadataMap["height"] = QString::number(m_height);
00141     if (m_width == 0 || m_height == 0)
00142         metadataMap["resolution"] = QString();
00143     else
00144         metadataMap["resolution"] = QString("%1x%2").arg(m_width).arg(m_height);
00145     metadataMap["language"] = m_language;
00146     metadataMap["countries"] = m_countries.join(", ");
00147 
00148 
00149     if (m_season > 0 || m_episode > 0)
00150     {
00151         metadataMap["season"] = GetDisplaySeasonEpisode(m_season, 1);
00152         metadataMap["episode"] = GetDisplaySeasonEpisode(m_episode, 1);
00153         metadataMap["s##e##"] = QString("s%1e%2").arg(GetDisplaySeasonEpisode
00154                                  (m_season, 2)).arg(
00155                                  GetDisplaySeasonEpisode(m_episode, 2));
00156         metadataMap["##x##"] = QString("%1x%2").arg(GetDisplaySeasonEpisode
00157                                  (m_season, 1)).arg(
00158                                  GetDisplaySeasonEpisode(m_episode, 2));
00159     }
00160     else
00161     {
00162         metadataMap["season"] = QString();
00163         metadataMap["episode"] = QString();
00164         metadataMap["s##e##"] = QString();
00165         metadataMap["##x##"] = QString();
00166     }
00167 }
00168 
00169 namespace
00170 {
00171         QList<QDomNode> GetDirectChildrenNS(const QDomElement& elem,
00172                         const QString& ns, const QString& name)
00173         {
00174                 QList<QDomNode> result;
00175                 QDomNodeList unf = elem.elementsByTagNameNS(ns, name);
00176                 for (int i = 0, size = unf.size(); i < size; ++i)
00177                         if (unf.at(i).parentNode() == elem)
00178                                 result << unf.at(i);
00179                 return result;
00180         }
00181 }
00182 
00183 class MRSSParser
00184 {
00185     struct ArbitraryLocatedData
00186     {
00187         QString URL;
00188         QString Rating;
00189         QString RatingScheme;
00190         QString Title;
00191         QString Description;
00192         QString Keywords;
00193         QString CopyrightURL;
00194         QString CopyrightText;
00195         int RatingAverage;
00196         int RatingCount;
00197         int RatingMin;
00198         int RatingMax;
00199         int Views;
00200         int Favs;
00201         QString Tags;
00202         QList<MRSSThumbnail> Thumbnails;
00203         QList<MRSSCredit> Credits;
00204         QList<MRSSComment> Comments;
00205         QList<MRSSPeerLink> PeerLinks;
00206         QList<MRSSScene> Scenes;
00207 
00211         ArbitraryLocatedData& operator+= (const ArbitraryLocatedData& child)
00212         {
00213             if (!child.URL.isEmpty())
00214                 URL = child.URL;
00215             if (!child.Rating.isEmpty())
00216                 Rating = child.Rating;
00217             if (!child.RatingScheme.isEmpty())
00218                 RatingScheme = child.RatingScheme;
00219             if (!child.Title.isEmpty())
00220                 Title = child.Title;
00221             if (!child.Description.isEmpty())
00222                 Description = child.Description;
00223             if (!child.Keywords.isEmpty())
00224                 Keywords = child.Keywords;
00225             if (!child.CopyrightURL.isEmpty())
00226                 CopyrightURL = child.CopyrightURL;
00227             if (!child.CopyrightText.isEmpty())
00228                 CopyrightText = child.CopyrightText;
00229             if (child.RatingAverage != 0)
00230                 RatingAverage = child.RatingAverage;
00231             if (child.RatingCount != 0)
00232                 RatingCount = child.RatingCount;
00233             if (child.RatingMin != 0)
00234                 RatingMin = child.RatingMin;
00235             if (child.RatingMax != 0)
00236                 RatingMax = child.RatingMax;
00237             if (child.Views != 0)
00238                 Views = child.Views;
00239             if (child.Favs != 0)
00240                 Favs = child.Favs;
00241             if (!child.Tags.isEmpty())
00242                 Tags = child.Tags;
00243 
00244             Thumbnails += child.Thumbnails;
00245             Credits += child.Credits;
00246             Comments += child.Comments;
00247             PeerLinks += child.PeerLinks;
00248             Scenes += child.Scenes;
00249             return *this;
00250         }
00251     };
00252 
00253 
00254 public:
00255     MRSSParser() {}
00256 
00257     QList<MRSSEntry> operator() (const QDomElement& item)
00258     {
00259         QList<MRSSEntry> result;
00260 
00261         QDomNodeList groups = item.elementsByTagNameNS(Parse::MediaRSS,
00262             "group");
00263 
00264         for (int i = 0; i < groups.size(); ++i)
00265             result += CollectChildren(groups.at(i).toElement());
00266 
00267         result += CollectChildren(item);
00268 
00269         return result;
00270     }
00271 
00272 private:
00273 
00274     QList<MRSSEntry> CollectChildren(const QDomElement& holder)
00275     {
00276          QList<MRSSEntry> result;
00277          QDomNodeList entries = holder.elementsByTagNameNS(Parse::MediaRSS,
00278              "content");
00279 
00280          for (int i = 0; i < entries.size(); ++i)
00281          {
00282              MRSSEntry entry;
00283 
00284              QDomElement en = entries.at(i).toElement();
00285              ArbitraryLocatedData d = GetArbitraryLocatedDataFor(en);
00286 
00287              if (en.hasAttribute("url"))
00288                  entry.URL = en.attribute("url");
00289              else
00290                  entry.URL = d.URL;
00291 
00292              entry.Size = en.attribute("fileSize").toInt();
00293              entry.Type = en.attribute("type");
00294              entry.Medium = en.attribute("medium");
00295              entry.IsDefault = (en.attribute("isDefault") == "true");
00296              entry.Expression = en.attribute("expression");
00297              if (entry.Expression.isEmpty())
00298                  entry.Expression = "full";
00299              entry.Bitrate = en.attribute("bitrate").toInt();
00300              entry.Framerate = en.attribute("framerate").toDouble();
00301              entry.SamplingRate = en.attribute("samplingrate").toDouble();
00302              entry.Channels = en.attribute("channels").toInt();
00303              if (!en.attribute("duration").isNull())
00304                  entry.Duration = en.attribute("duration").toInt();
00305              else
00306                  entry.Duration = 0;
00307              if (!en.attribute("width").isNull())
00308                  entry.Width = en.attribute("width").toInt();
00309              else
00310                  entry.Width = 0;
00311              if (!en.attribute("height").isNull())
00312                  entry.Height = en.attribute("height").toInt();
00313              else
00314                  entry.Height = 0;
00315              if (!en.attribute("lang").isNull())
00316                  entry.Lang = en.attribute("lang");
00317              else
00318                  entry.Lang = QString();
00319 
00320              if (!en.attribute("rating").isNull())
00321                  entry.Rating = d.Rating;
00322              else
00323                  entry.Rating = QString();
00324              entry.RatingScheme = d.RatingScheme;
00325              entry.Title = d.Title;
00326              entry.Description = d.Description;
00327              entry.Keywords = d.Keywords;
00328              entry.CopyrightURL = d.CopyrightURL;
00329              entry.CopyrightText = d.CopyrightText;
00330              if (d.RatingAverage != 0)
00331                  entry.RatingAverage = d.RatingAverage;
00332              else
00333                  entry.RatingAverage = 0;
00334              entry.RatingCount = d.RatingCount;
00335              entry.RatingMin = d.RatingMin;
00336              entry.RatingMax = d.RatingMax;
00337              entry.Views = d.Views;
00338              entry.Favs = d.Favs;
00339              entry.Tags = d.Tags;
00340              entry.Thumbnails = d.Thumbnails;
00341              entry.Credits = d.Credits;
00342              entry.Comments = d.Comments;
00343              entry.PeerLinks = d.PeerLinks;
00344              entry.Scenes = d.Scenes;
00345 
00346              result << entry;
00347         }
00348         return result;
00349     }
00350 
00351     ArbitraryLocatedData GetArbitraryLocatedDataFor(const QDomElement& holder)
00352     {
00353         ArbitraryLocatedData result;
00354 
00355         QList<QDomElement> parents;
00356         QDomElement parent = holder;
00357         while (!parent.isNull())
00358         {
00359             parents.prepend(parent);
00360             parent = parent.parentNode().toElement();
00361         }
00362 
00363         Q_FOREACH(QDomElement p, parents)
00364             result += CollectArbitraryLocatedData(p);
00365 
00366         return result;
00367     }
00368 
00369     QString GetURL(const QDomElement& element)
00370     {
00371         QList<QDomNode> elems = GetDirectChildrenNS(element, Parse::MediaRSS,
00372             "player");
00373         if (!elems.size())
00374             return QString();
00375 
00376         return QString(elems.at(0).toElement().attribute("url"));
00377     }
00378 
00379     QString GetTitle(const QDomElement& element)
00380     {
00381         QList<QDomNode> elems = GetDirectChildrenNS(element, Parse::MediaRSS,
00382             "title");
00383 
00384         if (!elems.size())
00385             return QString();
00386 
00387         QDomElement telem = elems.at(0).toElement();
00388         return QString(Parse::UnescapeHTML(telem.text()));
00389     }
00390 
00391     QString GetDescription(const QDomElement& element)
00392     {
00393         QList<QDomNode> elems = GetDirectChildrenNS(element, Parse::MediaRSS,
00394             "description");
00395 
00396         if (!elems.size())
00397             return QString();
00398 
00399         QDomElement telem = elems.at(0).toElement();
00400         return QString(Parse::UnescapeHTML(telem.text()));
00401     }
00402 
00403     QString GetKeywords(const QDomElement& element)
00404     {
00405         QList<QDomNode> elems = GetDirectChildrenNS(element, Parse::MediaRSS,
00406             "keywords");
00407 
00408         if (!elems.size())
00409             return QString();
00410 
00411         QDomElement telem = elems.at(0).toElement();
00412         return QString(telem.text());
00413     }
00414 
00415     int GetInt(const QDomElement& elem, const QString& attrname)
00416     {
00417         if (elem.hasAttribute(attrname))
00418         {
00419             bool ok = false;
00420             int result = elem.attribute(attrname).toInt(&ok);
00421             if (ok)
00422                 return int(result);
00423         }
00424         return int();
00425     }
00426 
00427     QList<MRSSThumbnail> GetThumbnails(const QDomElement& element)
00428     {
00429         QList<MRSSThumbnail> result;
00430         QList<QDomNode> thumbs = GetDirectChildrenNS(element, Parse::MediaRSS,
00431             "thumbnail");
00432         for (int i = 0; i < thumbs.size(); ++i)
00433         {
00434             QDomElement thumbNode = thumbs.at(i).toElement();
00435             int widthOpt = GetInt(thumbNode, "width");
00436             int width = widthOpt ? widthOpt : 0;
00437             int heightOpt = GetInt(thumbNode, "height");
00438             int height = heightOpt ? heightOpt : 0;
00439             MRSSThumbnail thumb =
00440             {
00441                 thumbNode.attribute("url"),
00442                 width,
00443                 height,
00444                 thumbNode.attribute("time")
00445              };
00446              result << thumb;
00447         }
00448         return result;
00449     }
00450 
00451     QList<MRSSCredit> GetCredits(const QDomElement& element)
00452     {
00453         QList<MRSSCredit> result;
00454         QList<QDomNode> credits = GetDirectChildrenNS(element, Parse::MediaRSS,
00455            "credit");
00456 
00457         for (int i = 0; i < credits.size(); ++i)
00458         {
00459             QDomElement creditNode = credits.at(i).toElement();
00460             if (!creditNode.hasAttribute("role"))
00461                  continue;
00462             MRSSCredit credit =
00463             {
00464                 creditNode.attribute("role"),
00465                 creditNode.text()
00466             };
00467             result << credit;
00468         }
00469         return result;
00470     }
00471 
00472     QList<MRSSComment> GetComments(const QDomElement& element)
00473     {
00474         QList<MRSSComment> result;
00475         QList<QDomNode> commParents = GetDirectChildrenNS(element, Parse::MediaRSS,
00476             "comments");
00477 
00478         if (commParents.size())
00479         {
00480             QDomNodeList comments = commParents.at(0).toElement()
00481                 .elementsByTagNameNS(Parse::MediaRSS,
00482                 "comment");
00483             for (int i = 0; i < comments.size(); ++i)
00484             {
00485                 MRSSComment comment =
00486                 {
00487                     QObject::tr("Comments"),
00488                     comments.at(i).toElement().text()
00489                 };
00490                 result << comment;
00491             }
00492         }
00493 
00494         QList<QDomNode> respParents = GetDirectChildrenNS(element, Parse::MediaRSS,
00495             "responses");
00496 
00497         if (respParents.size())
00498         {
00499             QDomNodeList responses = respParents.at(0).toElement()
00500                 .elementsByTagNameNS(Parse::MediaRSS,
00501                 "response");
00502             for (int i = 0; i < responses.size(); ++i)
00503             {
00504                 MRSSComment comment =
00505                 {
00506                     QObject::tr("Responses"),
00507                     responses.at(i).toElement().text()
00508                 };
00509                 result << comment;
00510             }
00511         }
00512 
00513         QList<QDomNode> backParents = GetDirectChildrenNS(element, Parse::MediaRSS,
00514             "backLinks");
00515 
00516         if (backParents.size())
00517         {
00518             QDomNodeList backlinks = backParents.at(0).toElement()
00519                 .elementsByTagNameNS(Parse::MediaRSS,
00520                 "backLink");
00521             for (int i = 0; i < backlinks.size(); ++i)
00522             {
00523                 MRSSComment comment =
00524                 {
00525                     QObject::tr("Backlinks"),
00526                     backlinks.at(i).toElement().text()
00527                 };
00528                 result << comment;
00529             }
00530         }
00531         return result;
00532     }
00533 
00534     QList<MRSSPeerLink> GetPeerLinks(const QDomElement& element)
00535     {
00536         QList<MRSSPeerLink> result;
00537         QList<QDomNode> links = GetDirectChildrenNS(element, Parse::MediaRSS,
00538             "peerLink");
00539 
00540         for (int i = 0; i < links.size(); ++i)
00541         {
00542             QDomElement linkNode = links.at(i).toElement();
00543             MRSSPeerLink pl =
00544             {
00545                 linkNode.attribute("type"),
00546                 linkNode.attribute("href")
00547             };
00548             result << pl;
00549         }
00550         return result;
00551     }
00552 
00553     QList<MRSSScene> GetScenes(const QDomElement& element)
00554     {
00555         QList<MRSSScene> result;
00556         QList<QDomNode> scenesNode = GetDirectChildrenNS(element, Parse::MediaRSS,
00557             "scenes");
00558 
00559         if (scenesNode.size())
00560         {
00561             QDomNodeList scenesNodes = scenesNode.at(0).toElement()
00562                 .elementsByTagNameNS(Parse::MediaRSS, "scene");
00563 
00564             for (int i = 0; i < scenesNodes.size(); ++i)
00565             {
00566                 QDomElement sceneNode = scenesNodes.at(i).toElement();
00567                 MRSSScene scene =
00568                 {
00569                     sceneNode.firstChildElement("sceneTitle").text(),
00570                     sceneNode.firstChildElement("sceneDescription").text(),
00571                     sceneNode.firstChildElement("sceneStartTime").text(),
00572                     sceneNode.firstChildElement("sceneEndTime").text()
00573                 };
00574                 result << scene;
00575             }
00576         }
00577         return result;
00578     }
00579 
00580     ArbitraryLocatedData CollectArbitraryLocatedData(const QDomElement& element)
00581     {
00582 
00583         QString rating;
00584         QString rscheme;
00585         {
00586             QList<QDomNode> elems = GetDirectChildrenNS(element, Parse::MediaRSS,
00587                 "rating");
00588 
00589             if (elems.size())
00590             {
00591                 QDomElement relem = elems.at(0).toElement();
00592                 rating = relem.text();
00593                 if (relem.hasAttribute("scheme"))
00594                     rscheme = relem.attribute("scheme");
00595                 else
00596                     rscheme = "urn:simple";
00597             }
00598         }
00599 
00600         QString curl;
00601         QString ctext;
00602         {
00603             QList<QDomNode> elems = GetDirectChildrenNS(element, Parse::MediaRSS,
00604                 "copyright");
00605 
00606             if (elems.size())
00607             {
00608                 QDomElement celem = elems.at(0).toElement();
00609                 ctext = celem.text();
00610                 if (celem.hasAttribute("url"))
00611                     curl = celem.attribute("url");
00612             }
00613         }
00614 
00615         int raverage = 0;
00616         int rcount = 0;
00617         int rmin = 0;
00618         int rmax = 0;
00619         int views = 0;
00620         int favs = 0;
00621         QString tags;
00622         {
00623             QList<QDomNode> comms = GetDirectChildrenNS(element, Parse::MediaRSS,
00624                 "community");
00625             if (comms.size())
00626             {
00627                 QDomElement comm = comms.at(0).toElement();
00628                 QDomNodeList stars = comm.elementsByTagNameNS(Parse::MediaRSS,
00629                     "starRating");
00630                 if (stars.size())
00631                 {
00632                     QDomElement rating = stars.at(0).toElement();
00633                     raverage = GetInt(rating, "average");
00634                     rcount = GetInt(rating, "count");
00635                     rmin = GetInt(rating, "min");
00636                     rmax = GetInt(rating, "max");
00637                 }
00638 
00639                 QDomNodeList stats = comm.elementsByTagNameNS(Parse::MediaRSS,
00640                     "statistics");
00641                 if (stats.size())
00642                 {
00643                     QDomElement stat = stats.at(0).toElement();
00644                     views = GetInt(stat, "views");
00645                     favs = GetInt(stat, "favorites");
00646                  }
00647 
00648                 QDomNodeList tagsNode = comm.elementsByTagNameNS(Parse::MediaRSS,
00649                     "tags");
00650                 if (tagsNode.size())
00651                 {
00652                     QDomElement tag = tagsNode.at(0).toElement();
00653                     tags = tag.text();
00654                 }
00655             }
00656         }
00657 
00658         ArbitraryLocatedData result =
00659         {
00660             GetURL(element),
00661             rating,
00662             rscheme,
00663             GetTitle(element),
00664             GetDescription(element),
00665             GetKeywords(element),
00666             curl,
00667             ctext,
00668             raverage,
00669             rcount,
00670             rmin,
00671             rmax,
00672             views,
00673             favs,
00674             tags,
00675             GetThumbnails(element),
00676             GetCredits(element),
00677             GetComments(element),
00678             GetPeerLinks(element),
00679             GetScenes(element)
00680         };
00681 
00682         return result;
00683     }
00684 };
00685 
00686 
00687 //========================================================================================
00688 //          Search Construction, Destruction
00689 //========================================================================================
00690 
00691 const QString Parse::DC = "http://purl.org/dc/elements/1.1/";
00692 const QString Parse::WFW = "http://wellformedweb.org/CommentAPI/";
00693 const QString Parse::Atom = "http://www.w3.org/2005/Atom";
00694 const QString Parse::RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
00695 const QString Parse::Slash = "http://purl.org/rss/1.0/modules/slash/";
00696 const QString Parse::Enc = "http://purl.oclc.org/net/rss_2.0/enc#";
00697 const QString Parse::ITunes = "http://www.itunes.com/dtds/podcast-1.0.dtd";
00698 const QString Parse::GeoRSSSimple = "http://www.georss.org/georss";
00699 const QString Parse::GeoRSSW3 = "http://www.w3.org/2003/01/geo/wgs84_pos#";
00700 const QString Parse::MediaRSS = "http://search.yahoo.com/mrss/";
00701 const QString Parse::MythRSS = "http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format";
00702 
00703 Parse::Parse()
00704 {
00705 }
00706 
00707 Parse::~Parse()
00708 {
00709 }
00710 
00711 ResultItem::resultList Parse::parseRSS(QDomDocument domDoc)
00712 {
00713     ResultItem::resultList vList;
00714 
00715     QString document = domDoc.toString();
00716     LOG(VB_GENERAL, LOG_DEBUG, "Will Be Parsing: " + document);
00717 
00718     QDomElement root = domDoc.documentElement();
00719     QDomElement channel = root.firstChildElement("channel");
00720     while (!channel.isNull())
00721     {
00722         QDomElement item = channel.firstChildElement("item");
00723         while (!item.isNull())
00724         {
00725             vList.append(ParseItem(item));
00726             item = item.nextSiblingElement("item");
00727         }
00728         channel = channel.nextSiblingElement("channel");
00729     }
00730 
00731     return vList;
00732 }
00733 
00734 ResultItem* Parse::ParseItem(const QDomElement& item) const
00735 {
00736     QString title, subtitle, description, url, author, duration, rating,
00737             thumbnail, mediaURL, player, language, download;
00738     off_t filesize = 0;
00739     uint width = 0, height = 0, season = 0, episode = 0;
00740     QDateTime date;
00741     QStringList playerargs, downloadargs, countries;
00742     bool downloadable = true;
00743     bool customhtml = false;
00744 
00745     // Get the title of the article/video
00746     title = item.firstChildElement("title").text();
00747     title = UnescapeHTML(title);
00748     if (title.isEmpty())
00749         title = "";
00750 
00751     // Get the subtitle of this item.
00752     QDomNodeList subt = item.elementsByTagNameNS(MythRSS, "subtitle");
00753     if (subt.size())
00754     {
00755         subtitle = subt.at(0).toElement().text();
00756     }
00757 
00758     // Parse the description of the article/video
00759     QDomElement descriptiontemp = item.firstChildElement("description");
00760     if (!descriptiontemp.isNull())
00761         description = descriptiontemp.text();
00762     if (description.isEmpty())
00763     {
00764         QDomNodeList nodes = item.elementsByTagNameNS(ITunes, "summary");
00765         if (nodes.size())
00766             description = nodes.at(0).toElement().text();
00767     }
00768     // Unescape and remove HTML tags from the description.
00769     if (description.isEmpty())
00770         description = "";
00771     else
00772         description = UnescapeHTML(description);
00773 
00774     // Get the link (web playable)
00775     url = item.firstChildElement("link").text();
00776 
00777     // Parse the item author
00778     QDomElement authortemp = item.firstChildElement("author");
00779     if (!authortemp.isNull())
00780         author = authortemp.text();
00781     if (author.isEmpty())
00782         author = GetAuthor(item);
00783 
00784     // Turn the RFC-822 pubdate into a QDateTime
00785     date = RFC822TimeToQDateTime(item.firstChildElement("pubDate").text());
00786     if (!date.isValid() || date.isNull())
00787         date = GetDCDateTime(item);
00788     if (!date.isValid() || date.isNull())
00789         date = QDateTime::currentDateTime();
00790 
00791     // Parse the insane iTunes duration (HH:MM:SS or H:MM:SS or MM:SS or M:SS or SS)
00792     QDomNodeList dur = item.elementsByTagNameNS(ITunes, "duration");
00793     if (dur.size())
00794     {
00795         QString itunestime = dur.at(0).toElement().text();
00796         QString dateformat;
00797 
00798         if (itunestime.count() == 8)
00799             dateformat = "hh:mm:ss";
00800         else if (itunestime.count() == 7)
00801             dateformat = "h:mm:ss";
00802         else if (itunestime.count() == 5)
00803             dateformat = "mm:ss";
00804         else if (itunestime.count() == 4)
00805             dateformat = "m:ss";
00806         else if (itunestime.count() == 2)
00807             dateformat = "ss";
00808         else
00809             duration = "0";
00810 
00811         if (!dateformat.isNull())
00812         {
00813             QTime itime = QTime::fromString(itunestime, dateformat);
00814             if (itime.isValid())
00815             {
00816                 int seconds = itime.second() + (itime.minute() * 60) + (itime.hour() * 3600);
00817                 duration = QString::number(seconds);
00818             }
00819         }
00820     }
00821 
00822     // Get the rating
00823     QDomElement ratingtemp = item.firstChildElement("rating");
00824     if (!ratingtemp.isNull())
00825         rating = ratingtemp.text();
00826 
00827     // Get the external player binary
00828     QDomElement playertemp = item.firstChildElement("player");
00829     if (!playertemp.isNull() && !playertemp.hasChildNodes())
00830         player = playertemp.text();
00831 
00832     // Get the arguments to pass to the external player
00833     QDomElement playerargstemp = item.firstChildElement("playerargs");
00834     if (!playerargstemp.isNull())
00835         playerargs = playerargstemp.text().split(" ");
00836 
00837     // Get the external downloader binary/script
00838     QDomElement downloadtemp = item.firstChildElement("download");
00839     if (!downloadtemp.isNull())
00840         download = downloadtemp.text();
00841 
00842     // Get the arguments to pass to the external downloader
00843     QDomElement downloadargstemp = item.firstChildElement("downloadargs");
00844     if (!downloadargstemp.isNull())
00845         downloadargs = downloadargstemp.text().split(" ");
00846 
00847     // Get the countries in which this item is playable
00848     QDomNodeList cties = item.elementsByTagNameNS(MythRSS, "country");
00849     if (cties.size())
00850     {
00851         int i = 0;
00852         while (i < cties.size())
00853         {
00854             countries.append(cties.at(i).toElement().text());
00855             i++;
00856         }
00857     }
00858 
00859     // Get the season number of this item.
00860     QDomNodeList seas = item.elementsByTagNameNS(MythRSS, "season");
00861     if (seas.size())
00862     {
00863         season = seas.at(0).toElement().text().toUInt();
00864     }
00865 
00866     // Get the Episode number of this item.
00867     QDomNodeList ep = item.elementsByTagNameNS(MythRSS, "episode");
00868     if (ep.size())
00869     {
00870         episode = ep.at(0).toElement().text().toUInt();
00871     }
00872 
00873     // Does this grabber return custom HTML?
00874     QDomNodeList html = item.elementsByTagNameNS(MythRSS, "customhtml");
00875     if (html.size())
00876     {
00877         QString htmlstring = html.at(0).toElement().text();
00878         if (htmlstring.toLower().contains("true") || htmlstring == "1" ||
00879             htmlstring.toLower().contains("yes"))
00880             customhtml = true;
00881     }
00882 
00883     QList<MRSSEntry> enclosures = GetMediaRSS(item);
00884 
00885     if (enclosures.size())
00886     {
00887         MRSSEntry media = enclosures.takeAt(0);
00888 
00889         QList<MRSSThumbnail> thumbs = media.Thumbnails;
00890         if (thumbs.size())
00891         {
00892             MRSSThumbnail thumb = thumbs.takeAt(0);
00893             thumbnail = thumb.URL;
00894         }
00895 
00896         mediaURL = media.URL;
00897 
00898         width = media.Width;
00899         height = media.Height;
00900         language = media.Lang;
00901 
00902         if (duration.isEmpty())
00903             duration = QString::number(media.Duration);
00904 
00905         if (filesize == 0)
00906             filesize = media.Size;
00907 
00908         if (rating.isEmpty())
00909             rating = QString::number(media.RatingAverage);
00910     }
00911     if (mediaURL.isEmpty())
00912     {
00913         QList<Enclosure> stdEnc = GetEnclosures(item);
00914 
00915         if (stdEnc.size())
00916         {
00917             Enclosure e = stdEnc.takeAt(0);
00918 
00919             mediaURL = e.URL;
00920 
00921             if (filesize == 0)
00922                 filesize = e.Length;
00923         }
00924     }
00925 
00926     if (mediaURL.isNull() || mediaURL == url)
00927         downloadable = false;
00928 
00929     return(new ResultItem(title, subtitle, description,
00930               url, thumbnail, mediaURL, author, date, duration,
00931               rating, filesize, player, playerargs,
00932               download, downloadargs, width, height,
00933               language, downloadable, countries, season,
00934               episode, customhtml));
00935 }
00936 
00937 QString Parse::GetLink(const QDomElement& parent) const
00938 {
00939     QString result;
00940     QDomElement link = parent.firstChildElement("link");
00941     while(!link.isNull())
00942     {
00943         if (!link.hasAttribute("rel") || link.attribute("rel") == "alternate")
00944         {
00945             if (!link.hasAttribute("href"))
00946                 result = link.text();
00947             else
00948                 result = link.attribute("href");
00949             break;
00950         }
00951         link = link.nextSiblingElement("link");
00952     }
00953     return result;
00954 }
00955 
00956 QString Parse::GetAuthor(const QDomElement& parent) const
00957 {
00958     QString result;
00959     QDomNodeList nodes = parent.elementsByTagNameNS(ITunes,
00960         "author");
00961     if (nodes.size())
00962     {
00963         result = nodes.at(0).toElement().text();
00964         return result;
00965     }
00966 
00967     nodes = parent.elementsByTagNameNS(DC,
00968        "creator");
00969     if (nodes.size())
00970     {
00971         result = nodes.at(0).toElement().text();
00972         return result;
00973     }
00974 
00975     return result;
00976 }
00977 
00978 QString Parse::GetCommentsRSS(const QDomElement& parent) const
00979 {
00980     QString result;
00981     QDomNodeList nodes = parent.elementsByTagNameNS(WFW,
00982         "commentRss");
00983     if (nodes.size())
00984         result = nodes.at(0).toElement().text();
00985     return result;
00986 }
00987 
00988 QString Parse::GetCommentsLink(const QDomElement& parent) const
00989 {
00990     QString result;
00991     QDomNodeList nodes = parent.elementsByTagNameNS("", "comments");
00992     if (nodes.size())
00993         result = nodes.at(0).toElement().text();
00994     return result;
00995 }
00996 
00997 QDateTime Parse::GetDCDateTime(const QDomElement& parent) const
00998 {
00999     QDomNodeList dates = parent.elementsByTagNameNS(DC, "date");
01000     if (!dates.size())
01001         return QDateTime();
01002     return FromRFC3339(dates.at(0).toElement().text());
01003 }
01004 
01005 QDateTime Parse::RFC822TimeToQDateTime(const QString& t) const
01006 {
01007     if (t.size() < 20)
01008         return QDateTime();
01009 
01010     QString time = t.simplified();
01011     short int hoursShift = 0, minutesShift = 0;
01012 
01013     QStringList tmp = time.split(' ');
01014     if (tmp.isEmpty())
01015         return QDateTime();
01016     if (tmp. at(0).contains(QRegExp("\\D")))
01017         tmp.removeFirst();
01018     if (tmp.size() != 5)
01019         return QDateTime();
01020     QString timezone = tmp.takeAt(tmp.size() -1);
01021     if (timezone.size() == 5)
01022     {
01023         bool ok;
01024         int tz = timezone.toInt(&ok);
01025         if(ok)
01026         {
01027             hoursShift = tz / 100;
01028             minutesShift = tz % 100;
01029         }
01030     }
01031     else
01032         hoursShift = TimezoneOffsets.value(timezone, 0);
01033 
01034     if (tmp.at(0).size() == 1)
01035         tmp[0].prepend("0");
01036     tmp [1].truncate(3);
01037 
01038     time = tmp.join(" ");
01039 
01040     QDateTime result;
01041     if (tmp.at(2).size() == 4)
01042         result = QLocale::c().toDateTime(time, "dd MMM yyyy hh:mm:ss");
01043     else
01044         result = QLocale::c().toDateTime(time, "dd MMM yy hh:mm:ss");
01045     if (result.isNull() || !result.isValid())
01046         return QDateTime();
01047     result = result.addSecs(hoursShift * 3600 * (-1) + minutesShift *60 * (-1));
01048     result.setTimeSpec(Qt::UTC);
01049     return result.toLocalTime();
01050 }
01051 
01052 QDateTime Parse::FromRFC3339(const QString& t) const
01053 {
01054     int hoursShift = 0, minutesShift = 0;
01055     if (t.size() < 19)
01056         return QDateTime();
01057     QDateTime result = QDateTime::fromString(t.left(19).toUpper(), "yyyy-MM-ddTHH:mm:ss");
01058     QRegExp fractionalSeconds("(\\.)(\\d+)");
01059     if (fractionalSeconds.indexIn(t) > -1)
01060     {
01061         bool ok;
01062         int fractional = fractionalSeconds.cap(2).toInt(&ok);
01063         if (ok)
01064         {
01065             if (fractional < 100)
01066                 fractional *= 10;
01067             if (fractional <10)
01068                 fractional *= 100;
01069             result.addMSecs(fractional);
01070         }
01071     }
01072     QRegExp timeZone("(\\+|\\-)(\\d\\d)(:)(\\d\\d)$");
01073     if (timeZone.indexIn(t) > -1)
01074     {
01075         short int multiplier = -1;
01076         if (timeZone.cap(1) == "-")
01077             multiplier = 1;
01078         hoursShift = timeZone.cap(2).toInt();
01079         minutesShift = timeZone.cap(4).toInt();
01080         result = result.addSecs(hoursShift * 3600 * multiplier + minutesShift * 60 * multiplier);
01081     }
01082     result.setTimeSpec(Qt::UTC);
01083     return result.toLocalTime();
01084 }
01085 
01086 QList<Enclosure> Parse::GetEnclosures(const QDomElement& entry) const
01087 {
01088     QList<Enclosure> result;
01089     QDomNodeList links = entry.elementsByTagName("enclosure");
01090     for (int i = 0; i < links.size(); ++i)
01091     {
01092         QDomElement link = links.at(i).toElement();
01093 
01094         Enclosure e =
01095         {
01096             link.attribute("url"),
01097             link.attribute("type"),
01098             link.attribute("length", "-1").toLongLong(),
01099             link.attribute("hreflang")
01100         };
01101 
01102         result << e;
01103     }
01104     return result;
01105 }
01106 
01107 QList<MRSSEntry> Parse::GetMediaRSS(const QDomElement& item) const
01108 {
01109     return MRSSParser() (item);
01110 }
01111 
01112 QString Parse::UnescapeHTML(const QString& escaped)
01113 {
01114     QString result = escaped;
01115     result.replace("&amp;", "&");
01116     result.replace("&lt;", "<");
01117     result.replace("&gt;", ">");
01118     result.replace("&apos;", "\'");
01119     result.replace("&rsquo;", "\'");
01120     result.replace("&#x2019;", "\'");
01121     result.replace("&quot;", "\"");
01122     result.replace("&#8230;",QChar(8230));
01123     result.replace("&#233;",QChar(233));
01124     result.replace("&mdash;", QChar(8212));
01125     result.replace("&nbsp;", " ");
01126     result.replace("&#160;", QChar(160));
01127     result.replace("&#225;", QChar(225));
01128     result.replace("&#8216;", QChar(8216));
01129     result.replace("&#8217;", QChar(8217));
01130     result.replace("&#039;", "\'");
01131     result.replace("&ndash;", QChar(8211));
01132     result.replace("&auml;", QChar(0x00e4));
01133     result.replace("&ouml;", QChar(0x00f6));
01134     result.replace("&uuml;", QChar(0x00fc));
01135     result.replace("&Auml;", QChar(0x00c4));
01136     result.replace("&Ouml;", QChar(0x00d6));
01137     result.replace("&Uuml;", QChar(0x00dc));
01138     result.replace("&szlig;", QChar(0x00df));
01139     result.replace("&euro;", "€");
01140     result.replace("&#8230;", "...");
01141     result.replace("&#x00AE;", QChar(0x00ae));
01142     result.replace("&#x201C;", QChar(0x201c));
01143     result.replace("&#x201D;", QChar(0x201d));
01144     result.replace("<p>", "\n");
01145 
01146     QRegExp stripHTML(QLatin1String("<.*>"));
01147     stripHTML.setMinimal(true);
01148     result.remove(stripHTML);
01149 
01150     return result;
01151 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends