MythTV  0.26-pre
xine_demux_sputext.cpp
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2000-2003 the xine project
00003  *
00004  * This file is part of xine, a free video player.
00005  *
00006  * xine is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * xine is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
00019  *
00020  * code based on old libsputext/xine_decoder.c
00021  *
00022  * code based on mplayer module:
00023  *
00024  * Subtitle reader with format autodetection
00025  *
00026  * Written by laaz
00027  * Some code cleanup & realloc() by A'rpi/ESP-team
00028  * dunnowhat sub format by szabi
00029  */
00030 
00031 #ifdef HAVE_CONFIG_H
00032 #include "config.h"
00033 #endif
00034 
00035 #include <ctype.h>
00036 #include <stdlib.h>
00037 #include <stdio.h>
00038 #include <unistd.h>
00039 #include <string.h>
00040 #include <sys/types.h>
00041 #include <sys/stat.h>
00042 #include <fcntl.h>
00043 #include <ctype.h>
00044 #include "xine_demux_sputext.h"
00045 
00046 #define LOG_MODULE "demux_sputext"
00047 #define LOG_VERBOSE
00048 /*
00049 #define LOG
00050 */
00051 
00052 #define ERR           (void *)-1
00053 #define LINE_LEN      1000
00054 #define LINE_LEN_QUOT "1000"
00055 
00056 /*
00057  * Demuxer code start
00058  */
00059 
00060 #define FORMAT_UNKNOWN   -1
00061 #define FORMAT_MICRODVD   0
00062 #define FORMAT_SUBRIP     1
00063 #define FORMAT_SUBVIEWER  2
00064 #define FORMAT_SAMI       3
00065 #define FORMAT_VPLAYER    4
00066 #define FORMAT_RT         5
00067 #define FORMAT_SSA        6 /* Sub Station Alpha */
00068 #define FORMAT_PJS        7
00069 #define FORMAT_MPSUB      8
00070 #define FORMAT_AQTITLE    9
00071 #define FORMAT_JACOBSUB   10
00072 #define FORMAT_SUBVIEWER2 11
00073 #define FORMAT_SUBRIP09   12
00074 #define FORMAT_MPL2       13 /*Mplayer sub 2 ?*/
00075 
00076 static int eol(char p) {
00077   return (p=='\r' || p=='\n' || p=='\0');
00078 }
00079 
00080 static inline void trail_space(char *s) {
00081   int i;
00082   while (isspace(*s)) {
00083     char *copy = s;
00084     do {
00085       copy[0] = copy[1];
00086       copy++;
00087     } while(*copy);
00088   }
00089   i = strlen(s) - 1;
00090   while (i > 0 && isspace(s[i]))
00091     s[i--] = '\0';
00092 }
00093 
00094 /*
00095  * Reimplementation of fgets() using the input->read() method.
00096  */
00097 static char *read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len) {
00098   off_t nread = 0;
00099   char *s;
00100   int linelen;
00101 
00102   // Since our RemoteFile code sleeps 200ms whenever we get back less data
00103   // than requested, but this code just keeps trying to read until it gets
00104   // an error back, we check for empty reads so that we can stop reading
00105   // when there is no more data to read
00106   if (demuxstr->emptyReads == 0 && (len - demuxstr->buflen) > 512) {
00107     nread = demuxstr->rbuffer->Read(
00108         &demuxstr->buf[demuxstr->buflen], len - demuxstr->buflen);
00109     if (nread < 0) {
00110       printf("read failed.\n");
00111       return NULL;
00112     }
00113   }
00114 
00115   if (!nread)
00116     demuxstr->emptyReads++;
00117 
00118   demuxstr->buflen += nread;
00119   demuxstr->buf[demuxstr->buflen] = '\0';
00120 
00121   s = strchr(demuxstr->buf, '\n');
00122 
00123   if (line && (s || demuxstr->buflen)) {
00124 
00125     linelen = s ? (s - demuxstr->buf) + 1 : demuxstr->buflen;
00126 
00127     memcpy(line, demuxstr->buf, linelen);
00128     line[linelen] = '\0';
00129 
00130     memmove(demuxstr->buf, &demuxstr->buf[linelen], SUB_BUFSIZE - linelen);
00131     demuxstr->buflen -= linelen;
00132 
00133     return line;
00134   }
00135 
00136   return NULL;
00137 }
00138 
00139 
00140 static subtitle_t *sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current) {
00141 
00142   static char line[LINE_LEN + 1];
00143   static char *s = NULL;
00144   char text[LINE_LEN + 1], *p, *q;
00145   int state;
00146 
00147   p = NULL;
00148   current->lines = current->start = 0;
00149   current->end = -1;
00150   state = 0;
00151 
00152   /* read the first line */
00153   if (!s)
00154     if (!(s = read_line_from_input(demuxstr, line, LINE_LEN))) return 0;
00155 
00156   do {
00157     switch (state) {
00158 
00159     case 0: /* find "START=" */
00160       s = strstr (s, "Start=");
00161       if (s) {
00162         current->start = strtol (s + 6, &s, 0) / 10;
00163         state = 1; continue;
00164       }
00165       break;
00166 
00167     case 1: /* find "<P" */
00168       if ((s = strstr (s, "<P"))) { s += 2; state = 2; continue; }
00169       break;
00170 
00171     case 2: /* find ">" */
00172       if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; }
00173       break;
00174 
00175     case 3: /* get all text until '<' appears */
00176       if (*s == '\0') { break; }
00177       else if (*s == '<') { state = 4; }
00178       else if (!strncasecmp (s, "&nbsp;", 6)) { *p++ = ' '; s += 6; }
00179       else if (*s == '\r') { s++; }
00180       else if (!strncasecmp (s, "<br>", 4) || *s == '\n') {
00181         *p = '\0'; p = text; trail_space (text);
00182         if (text[0] != '\0')
00183           current->text[current->lines++] = strdup (text);
00184         if (*s == '\n') s++; else s += 4;
00185       }
00186       else *p++ = *s++;
00187       continue;
00188 
00189     case 4: /* get current->end or skip <TAG> */
00190       q = strstr (s, "Start=");
00191       if (q) {
00192         current->end = strtol (q + 6, &q, 0) / 10 - 1;
00193         *p = '\0'; trail_space (text);
00194         if (text[0] != '\0')
00195           current->text[current->lines++] = strdup (text);
00196         if (current->lines > 0) { state = 99; break; }
00197         state = 0; continue;
00198       }
00199       s = strchr (s, '>');
00200       if (s) { s++; state = 3; continue; }
00201       break;
00202     }
00203 
00204     /* read next line */
00205     if (state != 99 && !(s = read_line_from_input (demuxstr, line, LINE_LEN)))
00206       return 0;
00207 
00208   } while (state != 99);
00209 
00210   return current;
00211 }
00212 
00213 
00214 static char *sub_readtext(char *source, char **dest) {
00215   int len=0;
00216   char *p=source;
00217 
00218   while ( !eol(*p) && *p!= '|' ) {
00219     p++,len++;
00220   }
00221 
00222   if (!dest)
00223     return (char*)ERR;
00224 
00225   *dest= (char *)malloc (len+1);
00226   if (!(*dest))
00227     return (char*)ERR;
00228 
00229   strncpy(*dest, source, len);
00230   (*dest)[len]=0;
00231 
00232   while (*p=='\r' || *p=='\n' || *p=='|')
00233     p++;
00234 
00235   if (*p)  return p;  /* not-last text field */
00236   else return (char*)NULL;   /* last text field     */
00237 }
00238 
00239 static subtitle_t *sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current) {
00240 
00241   char line[LINE_LEN + 1];
00242   char line2[LINE_LEN + 1];
00243   char *p, *next;
00244   int i;
00245 
00246   memset (current, 0, sizeof(subtitle_t));
00247 
00248   current->end=-1;
00249   do {
00250     if (!read_line_from_input (demuxstr, line, LINE_LEN)) return NULL;
00251   } while ((sscanf (line, "{%ld}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2) !=2) &&
00252            (sscanf (line, "{%ld}{%ld}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2) !=3)
00253           );
00254 
00255   p=line2;
00256 
00257   next=p, i=0;
00258   while ((next =sub_readtext (next, &(current->text[i])))) {
00259     if (current->text[i]==ERR) return (subtitle_t *)ERR;
00260     i++;
00261     if (i>=SUB_MAX_TEXT) {
00262       printf ("Too many lines in a subtitle\n");
00263       current->lines=i;
00264       return current;
00265     }
00266   }
00267   current->lines= ++i;
00268 
00269   return current;
00270 }
00271 
00272 static subtitle_t *sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current) {
00273 
00274   char line[LINE_LEN + 1];
00275   int a1,a2,a3,a4,b1,b2,b3,b4;
00276   char *p=NULL, *q=NULL;
00277   int len;
00278 
00279   memset (current, 0, sizeof(subtitle_t));
00280 
00281   while (1) {
00282     if (!read_line_from_input(demuxstr, line, LINE_LEN)) return NULL;
00283     if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
00284       if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
00285         continue;
00286     }
00287     current->start = a1*360000+a2*6000+a3*100+a4;
00288     current->end   = b1*360000+b2*6000+b3*100+b4;
00289 
00290     if (!read_line_from_input(demuxstr, line, LINE_LEN))
00291       return NULL;
00292 
00293     p=q=line;
00294     for (current->lines=1; current->lines <= SUB_MAX_TEXT; current->lines++) {
00295       for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' && strncasecmp(p,"[br]",4); p++,len++);
00296       current->text[current->lines-1]=(char *)malloc (len+1);
00297       if (!current->text[current->lines-1]) return (subtitle_t *)ERR;
00298       strncpy (current->text[current->lines-1], q, len);
00299       current->text[current->lines-1][len]='\0';
00300       if (!*p || *p=='\r' || *p=='\n') break;
00301       if (*p=='[') while (*p++!=']');
00302       if (*p=='|') p++;
00303     }
00304     if (current->lines > SUB_MAX_TEXT) current->lines = SUB_MAX_TEXT;
00305     break;
00306   }
00307   return current;
00308 }
00309 
00310 static subtitle_t *sub_read_line_subrip(demux_sputext_t *demuxstr,subtitle_t *current) {
00311   char line[LINE_LEN + 1];
00312   int a1,a2,a3,a4,b1,b2,b3,b4;
00313   int i,end_sub;
00314 
00315   memset(current,0,sizeof(subtitle_t));
00316   do {
00317     if(!read_line_from_input(demuxstr,line,LINE_LEN))
00318       return NULL;
00319     i = sscanf(line,"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
00320   } while(i < 8);
00321   current->start = a1*360000+a2*6000+a3*100+a4/10;
00322   current->end   = b1*360000+b2*6000+b3*100+b4/10;
00323   i=0;
00324   end_sub=0;
00325   do {
00326     char *p; /* pointer to the curently read char */
00327     char temp_line[SUB_BUFSIZE]; /* subtitle line that will be transfered to current->text[i] */
00328     int temp_index; /* ... and its index wich 'points' to the first EMPTY place -> last read char is at temp_index-1 if temp_index>0 */
00329     temp_line[SUB_BUFSIZE-1]='\0'; /* just in case... */
00330     if(!read_line_from_input(demuxstr,line,LINE_LEN)) {
00331       if(i)
00332         break; /* if something was read, transmit it */
00333       else
00334         return NULL; /* if not, repport EOF */
00335     }
00336     for(temp_index=0,p=line;*p!='\0' && !end_sub && temp_index<SUB_BUFSIZE && i<SUB_MAX_TEXT;p++) {
00337       switch(*p) {
00338         case '\\':
00339           if(*(p+1)=='N' || *(p+1)=='n') {
00340             temp_line[temp_index++]='\0'; /* end of curent line */
00341             p++;
00342           } else
00343             temp_line[temp_index++]=*p;
00344           break;
00345         case '{':
00346 #if 0 /* italic not implemented in renderer, ignore them for now */
00347           if(!strncmp(p,"{\\i1}",5) && temp_index+3<SUB_BUFSIZE) {
00348             temp_line[temp_index++]='<';
00349             temp_line[temp_index++]='i';
00350             temp_line[temp_index++]='>';
00351 #else
00352           if(!strncmp(p,"{\\i1}",5)) {
00353 #endif
00354             p+=4;
00355           }
00356 #if 0 /* italic not implemented in renderer, ignore them for now */
00357           else if(!strncmp(p,"{\\i0}",5) && temp_index+4<SUB_BUFSIZE) {
00358             temp_line[temp_index++]='<';
00359             temp_line[temp_index++]='/';
00360             temp_line[temp_index++]='i';
00361             temp_line[temp_index++]='>';
00362 #else
00363           else if(!strncmp(p,"{\\i0}",5)) {
00364 #endif
00365             p+=4;
00366           }
00367           else
00368             temp_line[temp_index++]=*p;
00369           break;
00370         case '\r': /* just ignore '\r's */
00371           break;
00372         case '\n':
00373           temp_line[temp_index++]='\0';
00374           break;
00375         default:
00376           temp_line[temp_index++]=*p;
00377           break;
00378       }
00379       if(temp_index>0) {
00380         if(temp_index==SUB_BUFSIZE)
00381           printf("Too many characters in a subtitle line\n");
00382         if(temp_line[temp_index-1]=='\0' || temp_index==SUB_BUFSIZE) {
00383           if(temp_index>1) { /* more than 1 char (including '\0') -> that is a valid one */
00384             current->text[i]=(char *)malloc(temp_index);
00385             if(!current->text[i])
00386               return (subtitle_t *)ERR;
00387             strncpy(current->text[i],temp_line,temp_index); /* temp_index<=SUB_BUFSIZE is always true here */
00388             i++;
00389             temp_index=0;
00390           } else
00391             end_sub=1;
00392         }
00393       }
00394     }
00395   } while(i<SUB_MAX_TEXT && !end_sub);
00396   if(i>=SUB_MAX_TEXT)
00397     printf("Too many lines in a subtitle\n");
00398   current->lines=i;
00399   return current;
00400 }
00401 
00402 static subtitle_t *sub_read_line_vplayer(demux_sputext_t *demuxstr,subtitle_t *current) {
00403   char line[LINE_LEN + 1];
00404   int a1,a2,a3,b1,b2,b3;
00405   char *p=NULL, *next, *p2;
00406   int i;
00407 
00408   memset (current, 0, sizeof(subtitle_t));
00409 
00410   while (!current->text[0]) {
00411     if( demuxstr->next_line[0] == '\0' ) { /* if the buffer is empty.... */
00412       if( !read_line_from_input(demuxstr, line, LINE_LEN) ) return NULL;
00413     } else {
00414       /* ... get the current line from buffer. */
00415       strncpy( line, demuxstr->next_line, LINE_LEN);
00416       line[LINE_LEN] = '\0'; /* I'm scared. This makes me feel better. */
00417       demuxstr->next_line[0] = '\0'; /* mark the buffer as empty. */
00418     }
00419     /* Initialize buffer with next line */
00420     if( ! read_line_from_input( demuxstr, demuxstr->next_line, LINE_LEN) ) {
00421       demuxstr->next_line[0] = '\0';
00422       return NULL;
00423     }
00424     if( (sscanf( line,            "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
00425         (sscanf( demuxstr->next_line, "%d:%d:%d:", &b1, &b2, &b3) < 3) )
00426       continue;
00427     current->start = a1*360000+a2*6000+a3*100;
00428     current->end   = b1*360000+b2*6000+b3*100;
00429     if ((current->end - current->start) > LINE_LEN)
00430       current->end = current->start + LINE_LEN; /* not too long though.  */
00431     /* teraz czas na wkopiowanie stringu */
00432     p=line;
00433     /* finds the body of the subtitle_t */
00434     for (i=0; i<3; i++){
00435       p2=strchr( p, ':');
00436       if( p2 == NULL ) break;
00437       p=p2+1;
00438     }
00439 
00440     next=p;
00441     i=0;
00442     while( (next = sub_readtext( next, &(current->text[i]))) ) {
00443       if (current->text[i]==ERR)
00444         return (subtitle_t *)ERR;
00445       i++;
00446       if (i>=SUB_MAX_TEXT) {
00447         printf("Too many lines in a subtitle\n");
00448         current->lines=i;
00449         return current;
00450       }
00451     }
00452     current->lines=++i;
00453   }
00454   return current;
00455 }
00456 
00457 static subtitle_t *sub_read_line_rt(demux_sputext_t *demuxstr,subtitle_t *current) {
00458   /*
00459    * TODO: This format uses quite rich (sub/super)set of xhtml
00460    * I couldn't check it since DTD is not included.
00461    * WARNING: full XML parses can be required for proper parsing
00462    */
00463   char line[LINE_LEN + 1];
00464   int a1,a2,a3,a4,b1,b2,b3,b4;
00465   char *p=NULL,*next=NULL;
00466   int i,len,plen;
00467 
00468   memset (current, 0, sizeof(subtitle_t));
00469 
00470   while (!current->text[0]) {
00471     if (!read_line_from_input(demuxstr, line, LINE_LEN)) return NULL;
00472     /*
00473      * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
00474      * to describe the same moment in time. Maybe there are even more formats in use.
00475      */
00476     if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
00477 
00478       plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
00479     if (
00480         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen)) < 4) &&
00481         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen)) < 5) &&
00482         /*      ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) && */
00483         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen)) < 6) &&
00484         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen)) < 8)
00485         )
00486       continue;
00487     current->start = a1*360000+a2*6000+a3*100+a4/10;
00488     current->end   = b1*360000+b2*6000+b3*100+b4/10;
00489     p=line;     p+=plen;i=0;
00490     /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
00491     next = strstr(line,"<clear/>")+8;i=0;
00492     while ((next =sub_readtext (next, &(current->text[i])))) {
00493       if (current->text[i]==ERR)
00494           return (subtitle_t *)ERR;
00495       i++;
00496       if (i>=SUB_MAX_TEXT) {
00497         printf("Too many lines in a subtitle\n");
00498         current->lines=i;
00499         return current;
00500       }
00501     }
00502     current->lines=i+1;
00503   }
00504   return current;
00505 }
00506 
00507 static subtitle_t *sub_read_line_ssa(demux_sputext_t *demuxstr,subtitle_t *current) {
00508   int comma;
00509   static int max_comma = 32; /* let's use 32 for the case that the */
00510   /*  amount of commas increase with newer SSA versions */
00511 
00512   int hour1, min1, sec1, hunsec1, hour2, min2, sec2, hunsec2, nothing;
00513   int num;
00514   char line[LINE_LEN + 1], line3[LINE_LEN + 1], *line2;
00515   char *tmp;
00516 
00517   do {
00518     if (!read_line_from_input(demuxstr, line, LINE_LEN)) return NULL;
00519   } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
00520                    "%[^\n\r]", &nothing,
00521                    &hour1, &min1, &sec1, &hunsec1,
00522                    &hour2, &min2, &sec2, &hunsec2,
00523                    line3) < 9
00524            &&
00525            sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
00526                    "%[^\n\r]", &nothing,
00527                    &hour1, &min1, &sec1, &hunsec1,
00528                    &hour2, &min2, &sec2, &hunsec2,
00529                    line3) < 9       );
00530 
00531   line2=strchr(line3, ',');
00532   if (!line2)
00533     return NULL;
00534 
00535   for (comma = 4; comma < max_comma; comma ++)
00536     {
00537       tmp = line2;
00538       if(!(tmp=strchr(++tmp, ','))) break;
00539       if(*(++tmp) == ' ') break;
00540       /* a space after a comma means we're already in a sentence */
00541       line2 = tmp;
00542     }
00543 
00544   if(comma < max_comma)max_comma = comma;
00545   /* eliminate the trailing comma */
00546   if(*line2 == ',') line2++;
00547 
00548   current->lines=0;num=0;
00549   current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
00550   current->end   = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
00551 
00552   while (((tmp=strstr(line2, "\\n")) != NULL) || ((tmp=strstr(line2, "\\N")) != NULL) ){
00553     current->text[num]=(char *)malloc(tmp-line2+1);
00554     strncpy (current->text[num], line2, tmp-line2);
00555     current->text[num][tmp-line2]='\0';
00556     line2=tmp+2;
00557     num++;
00558     current->lines++;
00559     if (current->lines >=  SUB_MAX_TEXT) return current;
00560   }
00561 
00562   current->text[num]=strdup(line2);
00563   current->lines++;
00564 
00565   return current;
00566 }
00567 
00568 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
00569  * From MPlayer subreader.c :
00570  *
00571  * PJS subtitles reader.
00572  * That's the "Phoenix Japanimation Society" format.
00573  * I found some of them in http://www.scriptsclub.org/ (used for anime).
00574  * The time is in tenths of second.
00575  *
00576  * by set, based on code by szabi (dunnowhat sub format ;-)
00577  */
00578 
00579 static subtitle_t *sub_read_line_pjs (demux_sputext_t *demuxstr, subtitle_t *current) {
00580   char line[LINE_LEN + 1];
00581   char text[LINE_LEN + 1];
00582   char *s, *d;
00583 
00584   memset (current, 0, sizeof(subtitle_t));
00585 
00586   if (!read_line_from_input(demuxstr, line, LINE_LEN))
00587     return NULL;
00588   for (s = line; *s && isspace(*s); s++);
00589   if (*s == 0)
00590     return NULL;
00591   if (sscanf (line, "%ld,%ld,", &(current->start),
00592               &(current->end)) <2)
00593     return (subtitle_t *)ERR;
00594   /* the files I have are in tenths of second */
00595   current->start *= 10;
00596   current->end *= 10;
00597 
00598   /* walk to the beggining of the string */
00599   for (; *s; s++) if (*s==',') break;
00600   if (*s) {
00601       for (s++; *s; s++) if (*s==',') break;
00602       if (*s) s++;
00603   }
00604   if (*s!='"') {
00605        return (subtitle_t *)ERR;
00606   }
00607   /* copy the string to the text buffer */
00608   for (s++, d=text; *s && *s!='"'; s++, d++)
00609       *d=*s;
00610   *d=0;
00611   current->text[0] = strdup(text);
00612   current->lines = 1;
00613 
00614   return current;
00615 }
00616 
00617 static subtitle_t *sub_read_line_mpsub (demux_sputext_t *demuxstr, subtitle_t *current) {
00618   char line[LINE_LEN + 1];
00619   float a,b;
00620   int num=0;
00621   char *p, *q;
00622 
00623   do {
00624     if (!read_line_from_input(demuxstr, line, LINE_LEN))
00625       return NULL;
00626   } while (sscanf (line, "%f %f", &a, &b) !=2);
00627 
00628   demuxstr->mpsub_position += (a*100.0);
00629   current->start = (int) demuxstr->mpsub_position;
00630   demuxstr->mpsub_position += (b*100.0);
00631   current->end = (int) demuxstr->mpsub_position;
00632 
00633   while (num < SUB_MAX_TEXT) {
00634     if (!read_line_from_input(demuxstr, line, LINE_LEN))
00635       return NULL;
00636 
00637     p=line;
00638     while (isspace(*p))
00639       p++;
00640 
00641     if (eol(*p) && num > 0)
00642       return current;
00643 
00644     if (eol(*p))
00645       return NULL;
00646 
00647     for (q=p; !eol(*q); q++);
00648     *q='\0';
00649     if (strlen(p)) {
00650       current->text[num]=strdup(p);
00651       printf(">%s<\n",p);
00652       current->lines = ++num;
00653     } else {
00654       if (num)
00655         return current;
00656       else
00657         return NULL;
00658     }
00659   }
00660 
00661   return NULL;
00662 }
00663 
00664 static subtitle_t *sub_read_line_aqt (demux_sputext_t *demuxstr, subtitle_t *current) {
00665   char line[LINE_LEN + 1];
00666 
00667   memset (current, 0, sizeof(subtitle_t));
00668 
00669   while (1) {
00670     /* try to locate next subtitle_t */
00671     if (!read_line_from_input(demuxstr, line, LINE_LEN))
00672       return NULL;
00673     if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
00674       break;
00675   }
00676 
00677   if (!read_line_from_input(demuxstr, line, LINE_LEN))
00678     return NULL;
00679 
00680   sub_readtext((char *) &line,&current->text[0]);
00681   current->lines = 1;
00682   current->end = -1;
00683 
00684   if (!read_line_from_input(demuxstr, line, LINE_LEN))
00685     return current;;
00686 
00687   sub_readtext((char *) &line,&current->text[1]);
00688   current->lines = 2;
00689 
00690   if ((current->text[0][0]==0) && (current->text[1][0]==0)) {
00691     return NULL;
00692   }
00693 
00694   return current;
00695 }
00696 
00697 static subtitle_t *sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current) {
00698     char line1[LINE_LEN], line2[LINE_LEN], directive[LINE_LEN], *p, *q;
00699     unsigned a1, a2, a3, a4, b1, b2, b3, b4, comment = 0;
00700     static unsigned jacoTimeres = 30;
00701     static int jacoShift = 0;
00702 
00703     memset(current, 0, sizeof(subtitle_t));
00704     memset(line1, 0, LINE_LEN);
00705     memset(line2, 0, LINE_LEN);
00706     memset(directive, 0, LINE_LEN);
00707     while (!current->text[0]) {
00708         if (!read_line_from_input(demuxstr, line1, LINE_LEN)) {
00709             return NULL;
00710         }
00711         if (sscanf
00712             (line1, "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
00713              &b1, &b2, &b3, &b4, line2) < 9) {
00714             if (sscanf(line1, "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2) < 3) {
00715                 if (line1[0] == '#') {
00716                     int hours = 0, minutes = 0, seconds, delta, inverter =
00717                         1;
00718                     unsigned units = jacoShift;
00719                     switch (toupper(line1[1])) {
00720                     case 'S':
00721                         if (isalpha(line1[2])) {
00722                             delta = 6;
00723                         } else {
00724                             delta = 2;
00725                         }
00726                         if (sscanf(&line1[delta], "%d", &hours)) {
00727                             if (hours < 0) {
00728                                 hours *= -1;
00729                                 inverter = -1;
00730                             }
00731                             if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
00732                                 if (sscanf
00733                                     (&line1[delta], "%*d:%*d:%d",
00734                                      &seconds)) {
00735                                     sscanf(&line1[delta], "%*d:%*d:%*d.%d",
00736                                            &units);
00737                                 } else {
00738                                     hours = 0;
00739                                     sscanf(&line1[delta], "%d:%d.%d",
00740                                            &minutes, &seconds, &units);
00741                                     minutes *= inverter;
00742                                 }
00743                             } else {
00744                                 hours = minutes = 0;
00745                                 sscanf(&line1[delta], "%d.%d", &seconds,
00746                                        &units);
00747                                 seconds *= inverter;
00748                             }
00749                             jacoShift =
00750                                 ((hours * 3600 + minutes * 60 +
00751                                   seconds) * jacoTimeres +
00752                                  units) * inverter;
00753                         }
00754                         break;
00755                     case 'T':
00756                         if (isalpha(line1[2])) {
00757                             delta = 8;
00758                         } else {
00759                             delta = 2;
00760                         }
00761                         sscanf(&line1[delta], "%u", &jacoTimeres);
00762                         break;
00763                     }
00764                 }
00765                 continue;
00766             } else {
00767                 current->start =
00768                     (unsigned long) ((a4 + jacoShift) * 100.0 /
00769                                      jacoTimeres);
00770                 current->end =
00771                     (unsigned long) ((b4 + jacoShift) * 100.0 /
00772                                      jacoTimeres);
00773             }
00774         } else {
00775             current->start =
00776                 (unsigned
00777                  long) (((a1 * 3600 + a2 * 60 + a3) * jacoTimeres + a4 +
00778                          jacoShift) * 100.0 / jacoTimeres);
00779             current->end =
00780                 (unsigned
00781                  long) (((b1 * 3600 + b2 * 60 + b3) * jacoTimeres + b4 +
00782                          jacoShift) * 100.0 / jacoTimeres);
00783         }
00784         current->lines = 0;
00785         p = line2;
00786         while ((*p == ' ') || (*p == '\t')) {
00787             ++p;
00788         }
00789         if (isalpha(*p)||*p == '[') {
00790             int cont, jLength;
00791 
00792             if (sscanf(p, "%s %" LINE_LEN_QUOT "[^\n\r]", directive, line1) < 2)
00793                 return (subtitle_t *)ERR;
00794             jLength = strlen(directive);
00795             for (cont = 0; cont < jLength; ++cont) {
00796                 if (isalpha(*(directive + cont)))
00797                     *(directive + cont) = toupper(*(directive + cont));
00798             }
00799             if ((strstr(directive, "RDB") != NULL)
00800                 || (strstr(directive, "RDC") != NULL)
00801                 || (strstr(directive, "RLB") != NULL)
00802                 || (strstr(directive, "RLG") != NULL)) {
00803                 continue;
00804             }
00805             /* no alignment */
00806 #if 0
00807             if (strstr(directive, "JL") != NULL) {
00808                 current->alignment = SUB_ALIGNMENT_HLEFT;
00809             } else if (strstr(directive, "JR") != NULL) {
00810                 current->alignment = SUB_ALIGNMENT_HRIGHT;
00811             } else {
00812                 current->alignment = SUB_ALIGNMENT_HCENTER;
00813             }
00814 #endif
00815             strcpy(line2, line1);
00816             p = line2;
00817         }
00818         for (q = line1; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); ++p) {
00819             switch (*p) {
00820             case '{':
00821                 comment++;
00822                 break;
00823             case '}':
00824                 if (comment) {
00825                     --comment;
00826                     /* the next line to get rid of a blank after the comment */
00827                     if ((*(p + 1)) == ' ')
00828                         p++;
00829                 }
00830                 break;
00831             case '~':
00832                 if (!comment) {
00833                     *q = ' ';
00834                     ++q;
00835                 }
00836                 break;
00837             case ' ':
00838             case '\t':
00839                 if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
00840                     break;
00841                 if (!comment) {
00842                     *q = ' ';
00843                     ++q;
00844                 }
00845                 break;
00846             case '\\':
00847                 if (*(p + 1) == 'n') {
00848                     *q = '\0';
00849                     q = line1;
00850                     current->text[current->lines++] = strdup(line1);
00851                     ++p;
00852                     break;
00853                 }
00854                 if ((toupper(*(p + 1)) == 'C')
00855                     || (toupper(*(p + 1)) == 'F')) {
00856                     ++p,++p;
00857                     break;
00858                 }
00859                 if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
00860                     /* actually this means "insert current date here" */
00861                     (*(p + 1) == 'D') ||
00862                     (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
00863                     (*(p + 1) == 'N') ||
00864                     /* actually this means "insert current time here" */
00865                     (*(p + 1) == 'T') ||
00866                     (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
00867                     ++p;
00868                     break;
00869                 }
00870                 if ((*(p + 1) == '\\') ||
00871                     (*(p + 1) == '~') || (*(p + 1) == '{')) {
00872                     ++p;
00873                 } else if (eol(*(p + 1))) {
00874                     if (!read_line_from_input(demuxstr, directive, LINE_LEN))
00875                         return NULL;
00876                     trail_space(directive);
00877                     strncat(line2, directive,
00878                             ((LINE_LEN > 511) ? LINE_LEN-1 : 511)
00879                             - strlen(line2));
00880                     break;
00881                 }
00882             default:
00883                 if (!comment) {
00884                     *q = *p;
00885                     ++q;
00886                 }
00887             }
00888         }
00889         *q = '\0';
00890         current->text[current->lines] = strdup(line1);
00891     }
00892     current->lines++;
00893     return current;
00894 }
00895 
00896 static subtitle_t *sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current) {
00897     char line[LINE_LEN+1];
00898     int a1,a2,a3,a4;
00899     char *p=NULL;
00900     int i,len;
00901 
00902     while (!current->text[0]) {
00903         if (!read_line_from_input(demuxstr, line, LINE_LEN)) return NULL;
00904         if (line[0]!='{')
00905             continue;
00906         if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4)
00907             continue;
00908         current->start = a1*360000+a2*6000+a3*100+a4/10;
00909         for (i=0; i<SUB_MAX_TEXT;) {
00910             if (!read_line_from_input(demuxstr, line, LINE_LEN)) break;
00911             if (line[0]=='}') break;
00912             len=0;
00913             for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
00914             if (len) {
00915                 current->text[i]=(char *)malloc (len+1);
00916                 if (!current->text[i]) return (subtitle_t *)ERR;
00917                 strncpy (current->text[i], line, len); current->text[i][len]='\0';
00918                 ++i;
00919             } else {
00920                 break;
00921             }
00922         }
00923         current->lines=i;
00924     }
00925     return current;
00926 }
00927 
00928 static subtitle_t *sub_read_line_subrip09 (demux_sputext_t *demuxstr, subtitle_t *current) {
00929   char line[LINE_LEN + 1];
00930   char *next;
00931   int h, m, s;
00932   int i;
00933 
00934   memset (current, 0, sizeof(subtitle_t));
00935 
00936   do {
00937     if (!read_line_from_input (demuxstr, line, LINE_LEN)) return NULL;
00938   } while (sscanf (line, "[%d:%d:%d]", &h, &m, &s) != 3);
00939 
00940   if (!read_line_from_input (demuxstr, line, LINE_LEN)) return NULL;
00941 
00942   current->start = 360000 * h + 6000 * m + 100 * s;
00943   current->end = -1;
00944 
00945   next=line;
00946   i=0;
00947   while ((next = sub_readtext (next, &(current->text[i])))) {
00948     if (current->text[i]==ERR) return (subtitle_t *)ERR;
00949     i++;
00950     if (i>=SUB_MAX_TEXT) {
00951       printf("Too many lines in a subtitle\n");
00952       current->lines=i;
00953       return current;
00954     }
00955   }
00956   current->lines= ++i;
00957 
00958   return current;
00959 }
00960 
00961 /* Code from subreader.c of MPlayer
00962 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
00963 */
00964 
00965 static subtitle_t *sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current) {
00966   char line[LINE_LEN+1];
00967   char line2[LINE_LEN+1];
00968   char *p, *next;
00969   int i;
00970 
00971   memset (current, 0, sizeof(subtitle_t));
00972   do {
00973      if (!read_line_from_input (demuxstr, line, LINE_LEN)) return NULL;
00974   } while ((sscanf (line,
00975                       "[%ld][%ld]%[^\r\n]",
00976                       &(current->start), &(current->end), line2) < 3));
00977   current->start *= 10;
00978   current->end *= 10;
00979   p=line2;
00980 
00981   next=p, i=0;
00982   while ((next = sub_readtext (next, &(current->text[i])))) {
00983       if (current->text[i] == ERR) {return (subtitle_t *)ERR;}
00984       i++;
00985       if (i >= SUB_MAX_TEXT) {
00986         printf("Too many lines in a subtitle\n");
00987         current->lines = i;
00988         return current;
00989       }
00990     }
00991   current->lines= ++i;
00992 
00993   return current;
00994 }
00995 
00996 
00997 static int sub_autodetect (demux_sputext_t *demuxstr) {
00998 
00999   char line[LINE_LEN + 1];
01000   int  i, j=0;
01001   char p;
01002 
01003   while (j < 100) {
01004     j++;
01005     if (!read_line_from_input(demuxstr, line, LINE_LEN))
01006       return FORMAT_UNKNOWN;
01007 
01008     if ((sscanf (line, "{%d}{}", &i)==1) ||
01009         (sscanf (line, "{%d}{%d}", &i, &i)==2)) {
01010       demuxstr->uses_time=0;
01011       return FORMAT_MICRODVD;
01012     }
01013 
01014     if (sscanf (line, "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
01015       demuxstr->uses_time=1;
01016       return FORMAT_SUBRIP;
01017     }
01018 
01019     if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",     &i, &i, &i, &i, &i, &i, &i, &i)==8){
01020       demuxstr->uses_time=1;
01021       return FORMAT_SUBVIEWER;
01022     }
01023 
01024     if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d",     &i, &i, &i, &i, &i, &i, &i, &i)==8){
01025       demuxstr->uses_time=1;
01026       return FORMAT_SUBVIEWER;
01027     }
01028 
01029     if (strstr (line, "<SAMI>")) {
01030       demuxstr->uses_time=1;
01031       return FORMAT_SAMI;
01032     }
01033     if (sscanf (line, "%d:%d:%d:",     &i, &i, &i )==3) {
01034       demuxstr->uses_time=1;
01035       return FORMAT_VPLAYER;
01036     }
01037     /*
01038      * A RealText format is a markup language, starts with <window> tag,
01039      * options (behaviour modifiers) are possible.
01040      */
01041     if ( !strcasecmp(line, "<window") ) {
01042       demuxstr->uses_time=1;
01043       return FORMAT_RT;
01044     }
01045     if ((!memcmp(line, "Dialogue: Marked", 16)) || (!memcmp(line, "Dialogue: ", 10))) {
01046       demuxstr->uses_time=1;
01047       return FORMAT_SSA;
01048     }
01049     if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
01050       demuxstr->uses_time=0;
01051       return FORMAT_PJS;
01052     }
01053     if (sscanf (line, "FORMAT=%d", &i) == 1) {
01054       demuxstr->uses_time=0;
01055       return FORMAT_MPSUB;
01056     }
01057     if (sscanf (line, "FORMAT=TIM%c", &p)==1 && p=='E') {
01058       demuxstr->uses_time=1;
01059       return FORMAT_MPSUB;
01060     }
01061     if (strstr (line, "-->>")) {
01062       demuxstr->uses_time=0;
01063       return FORMAT_AQTITLE;
01064     }
01065     if (sscanf(line, "@%d @%d", &i, &i) == 2 ||
01066         sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
01067       demuxstr->uses_time = 1;
01068       return FORMAT_JACOBSUB;
01069     }
01070     if (sscanf(line, "{T %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
01071       demuxstr->uses_time = 1;
01072       return FORMAT_SUBVIEWER2;
01073     }
01074     if (sscanf(line, "[%d:%d:%d]", &i, &i, &i) == 3) {
01075       demuxstr->uses_time = 1;
01076       return FORMAT_SUBRIP09;
01077     }
01078 
01079     if (sscanf (line, "[%d][%d]", &i, &i) == 2) {
01080       demuxstr->uses_time = 1;
01081       return FORMAT_MPL2;
01082     }
01083   }
01084   return FORMAT_UNKNOWN;  /* too many bad lines */
01085 }
01086 
01087 subtitle_t *sub_read_file (demux_sputext_t *demuxstr) {
01088 
01089   int n_max;
01090   int timeout;
01091   subtitle_t *first;
01092   subtitle_t * (*func[])(demux_sputext_t *demuxstr,subtitle_t *dest)=
01093   {
01094     sub_read_line_microdvd,
01095     sub_read_line_subrip,
01096     sub_read_line_subviewer,
01097     sub_read_line_sami,
01098     sub_read_line_vplayer,
01099     sub_read_line_rt,
01100     sub_read_line_ssa,
01101     sub_read_line_pjs,
01102     sub_read_line_mpsub,
01103     sub_read_line_aqt,
01104     sub_read_line_jacobsub,
01105     sub_read_line_subviewer2,
01106     sub_read_line_subrip09,
01107     sub_read_line_mpl2,
01108   };
01109 
01110   /* Rewind (sub_autodetect() needs to read input from the beginning) */
01111   if(demuxstr->rbuffer->Seek(0, SEEK_SET) == -1) {
01112     printf("seek failed.\n");
01113     return NULL;
01114   }
01115   demuxstr->buflen = 0;
01116   demuxstr->emptyReads = 0;
01117 
01118   demuxstr->format=sub_autodetect (demuxstr);
01119   if (demuxstr->format==FORMAT_UNKNOWN) {
01120     return NULL;
01121   }
01122 
01123   /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
01124 
01125   /* Rewind */
01126   if(demuxstr->rbuffer->Seek(0, SEEK_SET) == -1) {
01127     printf("seek failed.\n");
01128     return NULL;
01129   }
01130   demuxstr->buflen = 0;
01131   demuxstr->emptyReads = 0;
01132 
01133   demuxstr->num=0;n_max=32;
01134   first = (subtitle_t *) malloc(n_max*sizeof(subtitle_t));
01135   if(!first) return NULL;
01136   timeout = MAX_TIMEOUT;
01137 
01138   if (demuxstr->uses_time) timeout *= 100;
01139   else timeout *= 10;
01140 
01141   while(1) {
01142     subtitle_t *sub;
01143 
01144     if(demuxstr->num>=n_max){
01145       n_max+=16;
01146       first=(subtitle_t *)realloc(first,n_max*sizeof(subtitle_t));
01147     }
01148 
01149     sub = func[demuxstr->format] (demuxstr, &first[demuxstr->num]);
01150 
01151     if (!sub) {
01152       break;   /* EOF */
01153     } else {
01154       demuxstr->emptyReads = 0;
01155     }
01156 
01157     if (sub==ERR)
01158       ++demuxstr->errs;
01159     else {
01160       if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1) {
01161         /* end time not defined in the subtitle */
01162         if (timeout > 0) {
01163           /* timeout */
01164           if (timeout > sub->start - first[demuxstr->num-1].start) {
01165             first[demuxstr->num-1].end = sub->start;
01166           } else
01167             first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
01168         } else {
01169           /* no timeout */
01170           first[demuxstr->num-1].end = sub->start;
01171         }
01172       }
01173       ++demuxstr->num; /* Error vs. Valid */
01174     }
01175   }
01176   /* timeout of last subtitle */
01177   if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1)
01178     if (timeout > 0) {
01179       first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
01180     }
01181 
01182 #ifdef DEBUG_XINE_DEMUX_SPUTEXT
01183   {
01184     char buffer[1024];
01185 
01186     sprintf(buffer, "Read %i subtitles", demuxstr->num);
01187 
01188     if(demuxstr->errs)
01189       sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
01190     else
01191       strcat(buffer, "\n");
01192 
01193     printf("%s", buffer);
01194   }
01195 #endif
01196 
01197   return first;
01198 }
01199 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends