Application: baloo_file_extractor (5.54.0) Qt Version: 5.11.3 Frameworks Version: 5.54.0 Operating System: Linux 4.20.6-200.fc29.x86_64 x86_64 Distribution: "Fedora release 29 (Twenty Nine)" -- Information about the crash: Reproducible every time on my set of files. The crash can be reproduced every time. -- Backtrace: Application: Baloo File Extractor (baloo_file_extractor), signal: Aborted Using host libthread_db library "/lib64/libthread_db.so.1". [Current thread is 1 (Thread 0x7f1fa2e45940 (LWP 5463))] Thread 3 (Thread 0x7f1f8bf6f700 (LWP 5465)): #0 0x00007f1f9f2772ad in g_mutex_lock () from /lib64/libglib-2.0.so.0 #1 0x00007f1f9f22e8f6 in g_main_context_prepare () from /lib64/libglib-2.0.so.0 #2 0x00007f1f9f22f2db in ?? () from /lib64/libglib-2.0.so.0 #3 0x00007f1f9f22f4d0 in g_main_context_iteration () from /lib64/libglib-2.0.so.0 #4 0x00007f1fa1f825ab in QEventDispatcherGlib::processEvents (this=0x7f1f84000b20, flags=...) at kernel/qeventdispatcher_glib.cpp:424 #5 0x00007f1fa1f30e0b in QEventLoop::exec (this=this@entry=0x7f1f8bf6ec30, flags=..., flags@entry=...) at ../../include/QtCore/../../src/corelib/global/qflags.h:140 #6 0x00007f1fa1d98e86 in QThread::exec (this=this@entry=0x7f1fa2edb060 <(anonymous namespace)::Q_QGS__q_manager::innerFunction()::holder>) at ../../include/QtCore/../../src/corelib/global/qflags.h:120 #7 0x00007f1fa2e5ff89 in QDBusConnectionManager::run (this=0x7f1fa2edb060 <(anonymous namespace)::Q_QGS__q_manager::innerFunction()::holder>) at qdbusconnection.cpp:178 #8 0x00007f1fa1da22fb in QThreadPrivate::start (arg=0x7f1fa2edb060 <(anonymous namespace)::Q_QGS__q_manager::innerFunction()::holder>) at thread/qthread_unix.cpp:367 #9 0x00007f1fa148858e in start_thread () from /lib64/libpthread.so.0 #10 0x00007f1fa18d56a3 in clone () from /lib64/libc.so.6 Thread 2 (Thread 0x7f1f918c7700 (LWP 5464)): #0 0x00007f1fa18ca421 in poll () from /lib64/libc.so.6 #1 0x00007f1f9f12d39f in ?? () from /lib64/libxcb.so.1 #2 0x00007f1f9f12f01a in xcb_wait_for_event () from /lib64/libxcb.so.1 #3 0x00007f1f91ab2bf9 in ?? () from /lib64/libQt5XcbQpa.so.5 #4 0x00007f1fa1da22fb in QThreadPrivate::start (arg=0x5609b33ef050) at thread/qthread_unix.cpp:367 #5 0x00007f1fa148858e in start_thread () from /lib64/libpthread.so.0 #6 0x00007f1fa18d56a3 in clone () from /lib64/libc.so.6 Thread 1 (Thread 0x7f1fa2e45940 (LWP 5463)): [KCrash Handler] #6 0x00007f1fa181053f in raise () from /lib64/libc.so.6 #7 0x00007f1fa17fa895 in abort () from /lib64/libc.so.6 #8 0x00007f1fa1bd8e9b in ?? () from /lib64/libstdc++.so.6 #9 0x00007f1fa1bdf2fc in ?? () from /lib64/libstdc++.so.6 #10 0x00007f1fa1bdf357 in std::terminate() () from /lib64/libstdc++.so.6 #11 0x00007f1fa1bdf5b8 in __cxa_throw () from /lib64/libstdc++.so.6 #12 0x00007f1fa1d62de9 in qBadAlloc () at /usr/include/c++/8/bits/exception.h:63 #13 0x00007f1fa1d67a06 in QString::reallocData (this=this@entry=0x5609d7a0a4c8, alloc=1073741812, grow=grow@entry=true) at tools/qstring.cpp:2157 #14 0x00007f1fa1fb3ce3 in QString::operator+= (c=..., this=0x5609d7a0a4c8) at ../../include/QtCore/../../src/corelib/tools/qstring.h:473 #15 QXmlStreamReaderPrivate::fastScanLiteralContent (this=this@entry=0x5609d7a0a280) at serialization/qxmlstream.cpp:1201 #16 0x00007f1fa1fafef7 in QXmlStreamReaderPrivate::parse (this=<optimized out>) at serialization/qxmlstream_p.h:1640 #17 0x00007f1fa1fb255d in QXmlStreamReader::readNext (this=<optimized out>) at serialization/qxmlstream.cpp:610 #18 0x00007f1f906eb535 in KFileMetaData::XmlExtractor::extract (this=<optimized out>, result=0x7ffc79302bb0) at /usr/src/debug/kf5-kfilemetadata-5.54.0-1.fc29.x86_64/src/extractors/xmlextractor.cpp:131 #19 0x00005609b25d3f73 in Baloo::App::index (this=this@entry=0x7ffc793032c0, tr=0x5609d3f66680, url=..., id=id@entry=146375259996555283) at /usr/src/debug/kf5-baloo-5.54.0-1.fc29.x86_64/src/file/extractor/app.cpp:191 #20 0x00005609b25d5d8b in Baloo::App::processNextFile (this=0x7ffc793032c0) at /usr/src/debug/kf5-baloo-5.54.0-1.fc29.x86_64/src/file/extractor/app.cpp:111 #21 0x00007f1fa1f65f2a in QtPrivate::QSlotObjectBase::call (a=0x7ffc79302d00, r=<optimized out>, this=<optimized out>) at ../../include/QtCore/../../src/corelib/kernel/qobjectdefs_impl.h:376 #22 QSingleShotTimer::timerEvent (this=0x5609e07c6070) at kernel/qtimer.cpp:318 #23 0x00007f1fa1f5ad5b in QObject::event (this=0x5609e07c6070, e=<optimized out>) at kernel/qobject.cpp:1232 #24 0x00007f1fa2825285 in QApplicationPrivate::notify_helper(QObject*, QEvent*) () from /lib64/libQt5Widgets.so.5 #25 0x00007f1fa282c9a0 in QApplication::notify(QObject*, QEvent*) () from /lib64/libQt5Widgets.so.5 #26 0x00007f1fa1f31ec6 in QCoreApplication::notifyInternal2 (receiver=0x5609e07c6070, event=event@entry=0x7ffc79302fa0) at kernel/qcoreapplication.cpp:1047 #27 0x00007f1fa1f81889 in QCoreApplication::sendEvent (event=0x7ffc79302fa0, receiver=<optimized out>) at ../../include/QtCore/../../src/corelib/kernel/qcoreapplication.h:234 #28 QTimerInfoList::activateTimers (this=0x5609b3473290) at kernel/qtimerinfo_unix.cpp:643 #29 0x00007f1fa1f82164 in timerSourceDispatch (source=<optimized out>) at kernel/qeventdispatcher_glib.cpp:182 #30 idleTimerSourceDispatch (source=<optimized out>) at kernel/qeventdispatcher_glib.cpp:229 #31 0x00007f1f9f22f06d in g_main_context_dispatch () from /lib64/libglib-2.0.so.0 #32 0x00007f1f9f22f438 in ?? () from /lib64/libglib-2.0.so.0 #33 0x00007f1f9f22f4d0 in g_main_context_iteration () from /lib64/libglib-2.0.so.0 #34 0x00007f1fa1f82593 in QEventDispatcherGlib::processEvents (this=0x5609b342d8b0, flags=...) at kernel/qeventdispatcher_glib.cpp:422 #35 0x00007f1f91b46855 in ?? () from /lib64/libQt5XcbQpa.so.5 #36 0x00007f1fa1f30e0b in QEventLoop::exec (this=this@entry=0x7ffc79303220, flags=..., flags@entry=...) at ../../include/QtCore/../../src/corelib/global/qflags.h:140 #37 0x00007f1fa1f38ed6 in QCoreApplication::exec () at ../../include/QtCore/../../src/corelib/global/qflags.h:120 #38 0x00005609b25d33f7 in main (argc=<optimized out>, argv=0x7ffc793034e8) at /usr/src/debug/kf5-baloo-5.54.0-1.fc29.x86_64/src/file/extractor/main.cpp:60 [Inferior 1 (process 5463) detached] Reported using DrKonqi
Created attachment 117962 [details] internal-entity-polynomial-attribute.xml
Looks like extractor was crashing while indexing internal-entity-polynomial-attribute.xml from the qt-5.11.0 sources, huh. See attachment.
Crashing in KFileMetaData::XmlExtractor::extract. Is the file you attached the one that makes Baloo crash?
Yes, I think so.
This file causes huge memory usage when parsed with XML parser, so it's ok that baloo_file_extractor crashes (Firefox can't parse this file either). Bad that after I restart indexing this file is not skipped.
Thats a nasty file: size of e1: 120 characters e2: 64 * e1 e3: 64 * e2 e4: 64 * e3 root id: 64 * e4 = 2^24 * 120 characters ~= 2 * 10^9 characters (4GByte for UTF-16/QString). For reallocation, we need about ~8 GByte (old data storage and new data storage), plus anything else allocated. Although the failed document is remembered, it is currently not taken into account when indexing. The reason for this is the lack of extractor versioning, i.e. after one failed attempt due to e.g. a coding error we would never try the file again. See https://phabricator.kde.org/T9867, 3rd bullet point.
Git commit de81ddb651b14ca567e30c5bca4f7618894819a5 by Stefan Brüns. Committed on 23/02/2019 at 20:35. Pushed by bruns into branch 'master'. [Extractor] Add metadata to extractors Summary: This adds extractor metadata in a backwards and forward compatible way. There are several use cases for this metadata: - Delayed loading of extractor plugins - currently, all extractors are loaded and and initialized when an ExtractorCollection is created. - Versioning information - e.g. Baloo would benefit from versioning information, to reindex affected files after an extractor has been updated. Although it would be possible to extend the extractor plugin interface with a method for each relevant property, it would require a bump of the plugin inteface version each time the interface is extended. See: T9867, T8079 Test Plan: ctest Reviewers: #baloo, #frameworks, ngraham, astippich, poboiko Reviewed By: astippich Subscribers: kde-frameworks-devel Tags: #frameworks, #baloo Differential Revision: https://phabricator.kde.org/D19109 M +1 -0 autotests/CMakeLists.txt M +57 -2 autotests/extractorcollectiontest.cpp M +10 -0 src/extractor.cpp M +4 -0 src/extractor.h M +1 -0 src/extractor_p.h M +10 -1 src/extractorcollection.cpp M +3 -1 src/extractorcollection.h M +2 -0 src/extractors/CMakeLists.txt M +2 -1 src/extractors/appimageextractor.h A +9 -0 src/extractors/appimageextractor.json M +2 -1 src/extractors/epubextractor.h A +8 -0 src/extractors/epubextractor.json M +2 -1 src/extractors/exiv2extractor.h A +29 -0 src/extractors/exiv2extractor.json.in M +2 -1 src/extractors/ffmpegextractor.h A +16 -0 src/extractors/ffmpegextractor.json M +2 -1 src/extractors/mobiextractor.h A +8 -0 src/extractors/mobiextractor.json M +2 -1 src/extractors/odfextractor.h A +10 -0 src/extractors/odfextractor.json M +2 -1 src/extractors/office2007extractor.h A +10 -0 src/extractors/office2007extractor.json M +2 -1 src/extractors/officeextractor.h A +19 -0 src/extractors/officeextractor.json M +2 -1 src/extractors/plaintextextractor.h A +8 -0 src/extractors/plaintextextractor.json M +2 -1 src/extractors/poextractor.h A +8 -0 src/extractors/poextractor.json M +2 -1 src/extractors/popplerextractor.h A +8 -0 src/extractors/popplerextractor.json M +2 -1 src/extractors/postscriptdscextractor.h A +9 -0 src/extractors/postscriptdscextractor.json M +2 -1 src/extractors/taglibextractor.h A +25 -0 src/extractors/taglibextractor.json M +2 -1 src/extractors/xmlextractor.h A +10 -0 src/extractors/xmlextractor.json https://commits.kde.org/kfilemetadata/de81ddb651b14ca567e30c5bca4f7618894819a5