• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavformat/id3v2.c

Go to the documentation of this file.
00001 /*
00002  * ID3v2 header parser
00003  * Copyright (c) 2003 Fabrice Bellard
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #include "id3v2.h"
00023 #include "id3v1.h"
00024 #include "libavutil/avstring.h"
00025 #include "libavutil/intreadwrite.h"
00026 #include "libavutil/dict.h"
00027 #include "avio_internal.h"
00028 
00029 const AVMetadataConv ff_id3v2_34_metadata_conv[] = {
00030     { "TALB", "album"},
00031     { "TCOM", "composer"},
00032     { "TCON", "genre"},
00033     { "TCOP", "copyright"},
00034     { "TENC", "encoded_by"},
00035     { "TIT2", "title"},
00036     { "TLAN", "language"},
00037     { "TPE1", "artist"},
00038     { "TPE2", "album_artist"},
00039     { "TPE3", "performer"},
00040     { "TPOS", "disc"},
00041     { "TPUB", "publisher"},
00042     { "TRCK", "track"},
00043     { "TSSE", "encoder"},
00044     { 0 }
00045 };
00046 
00047 const AVMetadataConv ff_id3v2_4_metadata_conv[] = {
00048     { "TDRL", "date"},
00049     { "TDRC", "date"},
00050     { "TDEN", "creation_time"},
00051     { "TSOA", "album-sort"},
00052     { "TSOP", "artist-sort"},
00053     { "TSOT", "title-sort"},
00054     { 0 }
00055 };
00056 
00057 static const AVMetadataConv id3v2_2_metadata_conv[] = {
00058     { "TAL",  "album"},
00059     { "TCO",  "genre"},
00060     { "TT2",  "title"},
00061     { "TEN",  "encoded_by"},
00062     { "TP1",  "artist"},
00063     { "TP2",  "album_artist"},
00064     { "TP3",  "performer"},
00065     { "TRK",  "track"},
00066     { 0 }
00067 };
00068 
00069 
00070 const char ff_id3v2_tags[][4] = {
00071    "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
00072    "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
00073    "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
00074    "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
00075    { 0 },
00076 };
00077 
00078 const char ff_id3v2_4_tags[][4] = {
00079    "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
00080    "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
00081    { 0 },
00082 };
00083 
00084 const char ff_id3v2_3_tags[][4] = {
00085    "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
00086    { 0 },
00087 };
00088 
00089 int ff_id3v2_match(const uint8_t *buf, const char * magic)
00090 {
00091     return  buf[0]         == magic[0] &&
00092             buf[1]         == magic[1] &&
00093             buf[2]         == magic[2] &&
00094             buf[3]         != 0xff &&
00095             buf[4]         != 0xff &&
00096            (buf[6] & 0x80) ==    0 &&
00097            (buf[7] & 0x80) ==    0 &&
00098            (buf[8] & 0x80) ==    0 &&
00099            (buf[9] & 0x80) ==    0;
00100 }
00101 
00102 int ff_id3v2_tag_len(const uint8_t * buf)
00103 {
00104     int len = ((buf[6] & 0x7f) << 21) +
00105               ((buf[7] & 0x7f) << 14) +
00106               ((buf[8] & 0x7f) << 7) +
00107                (buf[9] & 0x7f) +
00108               ID3v2_HEADER_SIZE;
00109     if (buf[5] & 0x10)
00110         len += ID3v2_HEADER_SIZE;
00111     return len;
00112 }
00113 
00114 static unsigned int get_size(AVIOContext *s, int len)
00115 {
00116     int v = 0;
00117     while (len--)
00118         v = (v << 7) + (avio_r8(s) & 0x7F);
00119     return v;
00120 }
00121 
00125 static void free_geobtag(void *obj)
00126 {
00127     ID3v2ExtraMetaGEOB *geob = obj;
00128     av_free(geob->mime_type);
00129     av_free(geob->file_name);
00130     av_free(geob->description);
00131     av_free(geob->data);
00132     av_free(geob);
00133 }
00134 
00147 static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding,
00148                       uint8_t **dst, int *maxread)
00149 {
00150     int ret;
00151     uint8_t tmp;
00152     uint32_t ch = 1;
00153     int left = *maxread;
00154     unsigned int (*get)(AVIOContext*) = avio_rb16;
00155     AVIOContext *dynbuf;
00156 
00157     if ((ret = avio_open_dyn_buf(&dynbuf)) < 0) {
00158         av_log(s, AV_LOG_ERROR, "Error opening memory stream\n");
00159         return ret;
00160     }
00161 
00162     switch (encoding) {
00163 
00164     case ID3v2_ENCODING_ISO8859:
00165         while (left && ch) {
00166             ch = avio_r8(pb);
00167             PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
00168             left--;
00169         }
00170         break;
00171 
00172     case ID3v2_ENCODING_UTF16BOM:
00173         if ((left -= 2) < 0) {
00174             av_log(s, AV_LOG_ERROR, "Cannot read BOM value, input too short\n");
00175             avio_close_dyn_buf(dynbuf, dst);
00176             av_freep(dst);
00177             return AVERROR_INVALIDDATA;
00178         }
00179         switch (avio_rb16(pb)) {
00180         case 0xfffe:
00181             get = avio_rl16;
00182         case 0xfeff:
00183             break;
00184         default:
00185             av_log(s, AV_LOG_ERROR, "Incorrect BOM value\n");
00186             avio_close_dyn_buf(dynbuf, dst);
00187             av_freep(dst);
00188             *maxread = left;
00189             return AVERROR_INVALIDDATA;
00190         }
00191         // fall-through
00192 
00193     case ID3v2_ENCODING_UTF16BE:
00194         while ((left > 1) && ch) {
00195             GET_UTF16(ch, ((left -= 2) >= 0 ? get(pb) : 0), break;)
00196             PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
00197         }
00198         if (left < 0)
00199             left += 2; /* did not read last char from pb */
00200         break;
00201 
00202     case ID3v2_ENCODING_UTF8:
00203         while (left && ch) {
00204             ch = avio_r8(pb);
00205             avio_w8(dynbuf, ch);
00206             left--;
00207         }
00208         break;
00209     default:
00210         av_log(s, AV_LOG_WARNING, "Unknown encoding\n");
00211     }
00212 
00213     if (ch)
00214         avio_w8(dynbuf, 0);
00215 
00216     avio_close_dyn_buf(dynbuf, dst);
00217     *maxread = left;
00218 
00219     return 0;
00220 }
00221 
00225 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key)
00226 {
00227     uint8_t *dst;
00228     int encoding, dict_flags = AV_DICT_DONT_OVERWRITE;
00229     unsigned genre;
00230 
00231     if (taglen < 1)
00232         return;
00233 
00234     encoding = avio_r8(pb);
00235     taglen--; /* account for encoding type byte */
00236 
00237     if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
00238         av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
00239         return;
00240     }
00241 
00242     if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
00243         && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
00244         && genre <= ID3v1_GENRE_MAX) {
00245         av_freep(&dst);
00246         dst = ff_id3v1_genre_str[genre];
00247     } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
00248         /* dst now contains the key, need to get value */
00249         key = dst;
00250         if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
00251             av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
00252             av_freep(&key);
00253             return;
00254         }
00255         dict_flags |= AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_STRDUP_KEY;
00256     }
00257     else if (*dst)
00258         dict_flags |= AV_DICT_DONT_STRDUP_VAL;
00259 
00260     if (dst)
00261         av_dict_set(&s->metadata, key, dst, dict_flags);
00262 }
00263 
00267 static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, char *tag, ID3v2ExtraMeta **extra_meta)
00268 {
00269     ID3v2ExtraMetaGEOB *geob_data = NULL;
00270     ID3v2ExtraMeta *new_extra = NULL;
00271     char encoding;
00272     unsigned int len;
00273 
00274     if (taglen < 1)
00275         return;
00276 
00277     geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB));
00278     if (!geob_data) {
00279         av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMetaGEOB));
00280         return;
00281     }
00282 
00283     new_extra = av_mallocz(sizeof(ID3v2ExtraMeta));
00284     if (!new_extra) {
00285         av_log(s, AV_LOG_ERROR, "Failed to alloc %zu bytes\n", sizeof(ID3v2ExtraMeta));
00286         goto fail;
00287     }
00288 
00289     /* read encoding type byte */
00290     encoding = avio_r8(pb);
00291     taglen--;
00292 
00293     /* read MIME type (always ISO-8859) */
00294     if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type, &taglen) < 0
00295         || taglen <= 0)
00296         goto fail;
00297 
00298     /* read file name */
00299     if (decode_str(s, pb, encoding, &geob_data->file_name, &taglen) < 0
00300         || taglen <= 0)
00301         goto fail;
00302 
00303     /* read content description */
00304     if (decode_str(s, pb, encoding, &geob_data->description, &taglen) < 0
00305         || taglen < 0)
00306         goto fail;
00307 
00308     if (taglen) {
00309         /* save encapsulated binary data */
00310         geob_data->data = av_malloc(taglen);
00311         if (!geob_data->data) {
00312             av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen);
00313             goto fail;
00314         }
00315         if ((len = avio_read(pb, geob_data->data, taglen)) < taglen)
00316             av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data truncated.\n");
00317         geob_data->datasize = len;
00318     } else {
00319         geob_data->data = NULL;
00320         geob_data->datasize = 0;
00321     }
00322 
00323     /* add data to the list */
00324     new_extra->tag = "GEOB";
00325     new_extra->data = geob_data;
00326     new_extra->next = *extra_meta;
00327     *extra_meta = new_extra;
00328 
00329     return;
00330 
00331 fail:
00332     av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", tag);
00333     free_geobtag(geob_data);
00334     av_free(new_extra);
00335     return;
00336 }
00337 
00338 static int is_number(const char *str)
00339 {
00340     while (*str >= '0' && *str <= '9') str++;
00341     return !*str;
00342 }
00343 
00344 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag)
00345 {
00346     AVDictionaryEntry *t;
00347     if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
00348         strlen(t->value) == 4 && is_number(t->value))
00349         return t;
00350     return NULL;
00351 }
00352 
00353 static void merge_date(AVDictionary **m)
00354 {
00355     AVDictionaryEntry *t;
00356     char date[17] = {0};      // YYYY-MM-DD hh:mm
00357 
00358     if (!(t = get_date_tag(*m, "TYER")) &&
00359         !(t = get_date_tag(*m, "TYE")))
00360         return;
00361     av_strlcpy(date, t->value, 5);
00362     av_dict_set(m, "TYER", NULL, 0);
00363     av_dict_set(m, "TYE",  NULL, 0);
00364 
00365     if (!(t = get_date_tag(*m, "TDAT")) &&
00366         !(t = get_date_tag(*m, "TDA")))
00367         goto finish;
00368     snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
00369     av_dict_set(m, "TDAT", NULL, 0);
00370     av_dict_set(m, "TDA",  NULL, 0);
00371 
00372     if (!(t = get_date_tag(*m, "TIME")) &&
00373         !(t = get_date_tag(*m, "TIM")))
00374         goto finish;
00375     snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2);
00376     av_dict_set(m, "TIME", NULL, 0);
00377     av_dict_set(m, "TIM",  NULL, 0);
00378 
00379 finish:
00380     if (date[0])
00381         av_dict_set(m, "date", date, 0);
00382 }
00383 
00384 typedef struct ID3v2EMFunc {
00385     const char *tag3;
00386     const char *tag4;
00387     void (*read)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta **);
00388     void (*free)(void *obj);
00389 } ID3v2EMFunc;
00390 
00391 static const ID3v2EMFunc id3v2_extra_meta_funcs[] = {
00392     { "GEO", "GEOB", read_geobtag, free_geobtag },
00393     { NULL }
00394 };
00395 
00401 static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34)
00402 {
00403     int i = 0;
00404     while (id3v2_extra_meta_funcs[i].tag3) {
00405         if (!memcmp(tag,
00406                     (isv34 ? id3v2_extra_meta_funcs[i].tag4 :
00407                              id3v2_extra_meta_funcs[i].tag3),
00408                     (isv34 ? 4 : 3)))
00409             return &id3v2_extra_meta_funcs[i];
00410         i++;
00411     }
00412     return NULL;
00413 }
00414 
00415 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta)
00416 {
00417     int isv34, tlen, unsync;
00418     char tag[5];
00419     int64_t next, end = avio_tell(s->pb) + len;
00420     int taghdrlen;
00421     const char *reason = NULL;
00422     AVIOContext pb;
00423     AVIOContext *pbx;
00424     unsigned char *buffer = NULL;
00425     int buffer_size = 0;
00426     const ID3v2EMFunc *extra_func;
00427 
00428     switch (version) {
00429     case 2:
00430         if (flags & 0x40) {
00431             reason = "compression";
00432             goto error;
00433         }
00434         isv34 = 0;
00435         taghdrlen = 6;
00436         break;
00437 
00438     case 3:
00439     case 4:
00440         isv34 = 1;
00441         taghdrlen = 10;
00442         break;
00443 
00444     default:
00445         reason = "version";
00446         goto error;
00447     }
00448 
00449     unsync = flags & 0x80;
00450 
00451     if (isv34 && flags & 0x40) { /* Extended header present, just skip over it */
00452         int extlen = get_size(s->pb, 4);
00453         if (version == 4)
00454             extlen -= 4;     // in v2.4 the length includes the length field we just read
00455 
00456         if (extlen < 0) {
00457             reason = "invalid extended header length";
00458             goto error;
00459         }
00460         avio_skip(s->pb, extlen);
00461     }
00462 
00463     while (len >= taghdrlen) {
00464         unsigned int tflags = 0;
00465         int tunsync = 0;
00466 
00467         if (isv34) {
00468             avio_read(s->pb, tag, 4);
00469             tag[4] = 0;
00470             if(version==3){
00471                 tlen = avio_rb32(s->pb);
00472             }else
00473                 tlen = get_size(s->pb, 4);
00474             tflags = avio_rb16(s->pb);
00475             tunsync = tflags & ID3v2_FLAG_UNSYNCH;
00476         } else {
00477             avio_read(s->pb, tag, 3);
00478             tag[3] = 0;
00479             tlen = avio_rb24(s->pb);
00480         }
00481         if (tlen < 0 || tlen > len - taghdrlen) {
00482             av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag);
00483             break;
00484         }
00485         len -= taghdrlen + tlen;
00486         next = avio_tell(s->pb) + tlen;
00487 
00488         if (!tlen) {
00489             if (tag[0])
00490                 av_log(s, AV_LOG_DEBUG, "Invalid empty frame %s, skipping.\n", tag);
00491             continue;
00492         }
00493 
00494         if (tflags & ID3v2_FLAG_DATALEN) {
00495             avio_rb32(s->pb);
00496             tlen -= 4;
00497         }
00498 
00499         if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
00500             av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
00501             avio_skip(s->pb, tlen);
00502         /* check for text tag or supported special meta tag */
00503         } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)))) {
00504             if (unsync || tunsync) {
00505                 int64_t end = avio_tell(s->pb) + tlen;
00506                 uint8_t *b;
00507                 av_fast_malloc(&buffer, &buffer_size, tlen);
00508                 if (!buffer) {
00509                     av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
00510                     goto seek;
00511                 }
00512                 b = buffer;
00513                 while (avio_tell(s->pb) < end) {
00514                     *b++ = avio_r8(s->pb);
00515                     if (*(b - 1) == 0xff && avio_tell(s->pb) < end - 1) {
00516                         uint8_t val = avio_r8(s->pb);
00517                         *b++ = val ? val : avio_r8(s->pb);
00518                     }
00519                 }
00520                 ffio_init_context(&pb, buffer, b - buffer, 0, NULL, NULL, NULL, NULL);
00521                 tlen = b - buffer;
00522                 pbx = &pb; // read from sync buffer
00523             } else {
00524                 pbx = s->pb; // read straight from input
00525             }
00526             if (tag[0] == 'T')
00527                 /* parse text tag */
00528                 read_ttag(s, pbx, tlen, tag);
00529             else
00530                 /* parse special meta tag */
00531                 extra_func->read(s, pbx, tlen, tag, extra_meta);
00532         }
00533         else if (!tag[0]) {
00534             if (tag[1])
00535                 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
00536             avio_skip(s->pb, tlen);
00537             break;
00538         }
00539         /* Skip to end of tag */
00540 seek:
00541         avio_seek(s->pb, next, SEEK_SET);
00542     }
00543 
00544     if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
00545         end += 10;
00546 
00547   error:
00548     if (reason)
00549         av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
00550     avio_seek(s->pb, end, SEEK_SET);
00551     av_free(buffer);
00552     return;
00553 }
00554 
00555 void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta)
00556 {
00557     int len, ret;
00558     uint8_t buf[ID3v2_HEADER_SIZE];
00559     int     found_header;
00560     int64_t off;
00561 
00562     do {
00563         /* save the current offset in case there's nothing to read/skip */
00564         off = avio_tell(s->pb);
00565         ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE);
00566         if (ret != ID3v2_HEADER_SIZE)
00567             break;
00568             found_header = ff_id3v2_match(buf, magic);
00569             if (found_header) {
00570             /* parse ID3v2 header */
00571             len = ((buf[6] & 0x7f) << 21) |
00572                   ((buf[7] & 0x7f) << 14) |
00573                   ((buf[8] & 0x7f) << 7) |
00574                    (buf[9] & 0x7f);
00575             ff_id3v2_parse(s, len, buf[3], buf[5], extra_meta);
00576         } else {
00577             avio_seek(s->pb, off, SEEK_SET);
00578         }
00579     } while (found_header);
00580     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv);
00581     ff_metadata_conv(&s->metadata, NULL, id3v2_2_metadata_conv);
00582     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv);
00583     merge_date(&s->metadata);
00584 }
00585 
00586 void ff_id3v2_read(AVFormatContext *s, const char *magic)
00587 {
00588     ff_id3v2_read_all(s, magic, NULL);
00589 }
00590 
00591 void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta)
00592 {
00593     ID3v2ExtraMeta *current = *extra_meta, *next;
00594     const ID3v2EMFunc *extra_func;
00595 
00596     while (current) {
00597         if ((extra_func = get_extra_meta_func(current->tag, 1)))
00598             extra_func->free(current->data);
00599         next = current->next;
00600         av_freep(&current);
00601         current = next;
00602     }
00603 }
Generated on Thu Jul 11 2013 15:38:23 for Libav by doxygen 1.7.1