Bug 140515 - mmap()/alloc()ing 6.3GB when parsing certain .mp3
Summary: mmap()/alloc()ing 6.3GB when parsing certain .mp3
Status: RESOLVED FIXED
Alias: None
Product: taglib
Classification: Frameworks and Libraries
Component: general (show other bugs)
Version: unspecified
Platform: unspecified Linux
: NOR crash
Target Milestone: ---
Assignee: Scott Wheeler
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2007-01-23 22:00 UTC by Hans Ecke
Modified: 2007-02-13 18:13 UTC (History)
0 users

See Also:
Latest Commit:
Version Fixed In:
Sentry Crash Report:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Hans Ecke 2007-01-23 22:00:42 UTC
Version:           svn 20070123 (using KDE KDE 3.5.5)
Installed from:    Unlisted Binary Package
Compiler:          gcc 4.1.1 
OS:                Linux

Hi-

Current amarok versions requires id3v2.4 for correct utf-8 behavior. No problem: I converted all my MP3s to 2.4 using mutagen http://www.sacredchao.net/quodlibet/wiki/Development/Mutagen

Re-running the amarok collection scanner caused a lot of parse failures in taglib. So I updated taglib to today's version. Now the collection scanner eats all my RAM. Details follow.

Attached is the tail end of a strace(1) trace for running

   amarokcollectionscanner -r /music/mdb/music

It shows how it processes two files: "03 - 7' Instrumental.mp3" and "01
- 7' Mix.mp3". When processing the latter file it mmap()s 3.3GB of RAM
twice. The program stops at that point, till I kill it from another
terminal.

Here are my stats:

   Linux 2.6.18
   2 cpu x86_64
   Fedora 5
   Qt 3.3.7
   KDE 3.5.5
   Amarok 1.4.4
   Taglib svn-20070123

How can I help fix this problem? How can I supply a more useful bug
report?

Thanks!

Hans

================================

access("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/03 - 7\' Instrumental.mp3", F_OK) = 0
access("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/03 - 7\' Instrumental.mp3", W_OK) = 0
open("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/03 - 7\' Instrumental.mp3", O_RDWR) = 10
fstat(10, {st_mode=S_IFREG|0644, st_size=3907584, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaae233000
lseek(10, 0, SEEK_CUR)                  = 0
lseek(10, 0, SEEK_SET)                  = 0
read(10, "ID3\4\0\0\0\1M.TALB\0\0\0)\0\0\1\377\376R\0e\0n\0e\0g"..., 4096) = 4096
lseek(10, 4096, SEEK_SET)               = 4096
lseek(10, 4096, SEEK_SET)               = 4096
fstat(10, {st_mode=S_IFREG|0644, st_size=3907584, ...}) = 0
lseek(10, 3907584, SEEK_SET)            = 3907584
lseek(10, 0, SEEK_SET)                  = 0
read(10, "ID3\4\0\0\0\1M.", 10)         = 10
read(10, "TALB\0\0\0)\0\0\1\377\376R\0e\0n\0e\0g\0a\0d\0e\0 \0S"..., 24576) = 24576
read(10, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096) = 4096
fstat(10, {st_mode=S_IFREG|0644, st_size=3907584, ...}) = 0
lseek(10, 3903488, SEEK_SET)            = 3903488
read(10, "\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 4096) = 4096
lseek(10, 3907584, SEEK_SET)            = 3907584
fstat(10, {st_mode=S_IFREG|0644, st_size=3907584, ...}) = 0
lseek(10, 3907584, SEEK_SET)            = 3907584
lseek(10, 3907584, SEEK_SET)            = 3907584
lseek(10, 3907584, SEEK_SET)            = 3907584
read(10, "", 4096)                      = 0
lseek(10, 24576, SEEK_SET)              = 24576
read(10, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1720) = 1720
read(10, "K\200\0\0\10\0\0\tp\0\0\1\0\0\1.\0\0\0 \0\0%\300\0\0\4"..., 4096) = 4096
lseek(10, 30392, SEEK_SET)              = 30392
lseek(10, 30392, SEEK_SET)              = 30392
open("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/03 - 7\' Instrumental.mp3", O_RDONLY) = 11
fstat(11, {st_mode=S_IFREG|0644, st_size=3907584, ...}) = 0
read(11, "ID3\4\0\0\0\1M.TALB\0\0\0)\0\0\1\377\376R\0e\0n\0e\0g"..., 8192) = 8192
fstat(11, {st_mode=S_IFREG|0644, st_size=3907584, ...}) = 0
close(11)                               = 0
stat("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/03 - 7\' Instrumental.mp3", {st_mode=S_IFREG|0644, st_size=3907584, ...}) = 0
close(10)                               = 0
munmap(0x2aaaae233000, 4096)            = 0
write(1, "<tags title=\"7\' Instrumental\" co"..., 429) = 429
open("/home/hans/.kde/share/apps/amarok/collection_scan.log", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 10
fstat(10, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
fstat(10, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaae233000
write(10, "/music/mdb/music/Techno Trance/R"..., 84) = 84
close(10)                               = 0
munmap(0x2aaaae233000, 4096)            = 0
access("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/01 - 7\' Mix.mp3", F_OK) = 0
access("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/01 - 7\' Mix.mp3", W_OK) = 0
open("/music/mdb/music/Techno Trance/Renegade Soundwave/Renegade Soundwave/01 - 7\' Mix.mp3", O_RDWR) = 10
fstat(10, {st_mode=S_IFREG|0644, st_size=3481600, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaae233000
lseek(10, 0, SEEK_CUR)                  = 0
lseek(10, 0, SEEK_SET)                  = 0
read(10, "ID3\4\0\0\0\0012KTALB\0\0\0)\0\0\1\377\376R\0e\0n\0e\0"..., 4096) = 4096
lseek(10, 4096, SEEK_SET)               = 4096
lseek(10, 4096, SEEK_SET)               = 4096
fstat(10, {st_mode=S_IFREG|0644, st_size=3481600, ...}) = 0
lseek(10, 3481600, SEEK_SET)            = 3481600
lseek(10, 0, SEEK_SET)                  = 0
read(10, "ID3\4\0\0\0\0012K", 10)       = 10
read(10, "TALB\0\0\0)\0\0\1\377\376R\0e\0n\0e\0g\0a\0d\0e\0 \0S"..., 20480) = 20480
read(10, "\5\346\5\205\4\257\3\37\5l\5>\6b\5\26\5\306\4^\4\203\4"..., 4096) = 4096
mmap(NULL, 3393638400, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaae234000
fstat(10, {st_mode=S_IFREG|0644, st_size=3481600, ...}) = 0
lseek(10, 3477504, SEEK_SET)            = 3477504
read(10, "\377\377\377\377\377\377\377\377\377\377\377\377\377\377"..., 4096) = 4096
lseek(10, 3481600, SEEK_SET)            = 3481600
fstat(10, {st_mode=S_IFREG|0644, st_size=3481600, ...}) = 0
lseek(10, 3481600, SEEK_SET)            = 3481600
lseek(10, 3481600, SEEK_SET)            = 3481600
lseek(10, 3481600, SEEK_SET)            = 3481600
read(10, "", 4096)                      = 0
lseek(10, 20480, SEEK_SET)              = 20480
read(10, "\5\7\7\357\5\22\10\243\6s\5\346\5\205\4\257\3\37\5l\5>"..., 2389) = 2389
read(10, "K\200\0\0\10\0\0\tp\0\0\1\0\0\1.\0\0\0 \0\0%\300\0\0\4"..., 4096) = 4096
lseek(10, 26965, SEEK_SET)              = 26965
lseek(10, 26965, SEEK_SET)              = 26965
mmap(NULL, 3393638400, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aab786a1000
Comment 1 Scott Wheeler 2007-02-13 10:26:42 UTC
SVN commit 633123 by wheeler:

Add another sanity check -- don't let invalid frames try to allocate anything
larger than the tag size.

BUG:140515


 M  +4 -1      id3v2framefactory.cpp  


--- trunk/kdesupport/taglib/taglib/mpeg/id3v2/id3v2framefactory.cpp #633122:633123
@@ -73,7 +73,10 @@
   // A quick sanity check -- make sure that the frameID is 4 uppercase Latin1
   // characters.  Also make sure that there is data in the frame.
 
-  if(!frameID.size() == (version < 3 ? 3 : 4) || header->frameSize() <= 0) {
+  if(!frameID.size() == (version < 3 ? 3 : 4) ||
+     header->frameSize() <= 0 ||
+     header->frameSize() > data.size())
+  {
     delete header;
     return 0;
   }
Comment 2 Lukáš Lalinský 2007-02-13 10:55:26 UTC
My personal guess is that the problem is not in frame size reading code, but in data length (for compressed frames) reading code. According to the spec it's a synch-safe integer, TagLib reads it as a regular 32-bit integer.
Comment 3 Scott Wheeler 2007-02-13 11:07:33 UTC
I had the demo file here and verified the fix.  The frame wasn't actually compressed -- there was an invalid frame length, which landed the tag parser in a funny spot that looked close enough to a frame that TagLib tried to parse it as one.
Comment 4 Hans Ecke 2007-02-13 18:13:47 UTC
Fix verified. Thank you very much. Now amarok works again for me.