Version: 3.4.0 (using Devel) Compiler: gcc 4.0.3 OS: Linux Installed from: Compiled sources sudo valgrind --tool=helgrind ../../bin/TestDetect ==8103== Helgrind, a thread error detector. ==8103== Copyright (C) 2007-2008, and GNU GPL'd, by OpenWorks LLP et al. ==8103== Using LibVEX rev 1878, a library for dynamic binary translation. ==8103== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP. ==8103== Using valgrind-3.4.0, a dynamic binary instrumentation framework. ==8103== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al. ==8103== For more details, rerun with: -v ==8103== ==8106== Warning: invalid file descriptor 1014 in syscall close() ==8106== Warning: invalid file descriptor 1015 in syscall close() ==8106== Warning: invalid file descriptor 1016 in syscall close() ==8106== Use --log-fd=<number> to select an alternative log fd. ==8106== Warning: invalid file descriptor 1017 in syscall close() ==8106== Warning: invalid file descriptor 1018 in syscall close() /bin/sh: es_tool: command not found Running 26 test cases... Helgrind: libhb_core.c:3762 (msm_write): Assertion 'ordxx == POrd_EQ || ordxx == POrd_LT' failed. ==8103== at 0x3801D17D: report_and_quit (m_libcassert.c:140) ==8103== by 0x3801CFE5: vgPlain_assert_fail (m_libcassert.c:205) ==8103== by 0x3800B198: msm_write (libhb_core.c:3762) ==8103== by 0x3800BA4F: zsm_apply32___msm_write (libhb_core.c:3991) ==8103== by 0x380140B9: evh__mem_help_write_4 (hg_main.c:1787) ==8103== by 0x64E3A211: ??? ==8103== by 0x733BEC54: ??? ==8103== by 0xA: ??? sched status: running_tid=6 Thread 1: status = VgTs_WaitSys ==8103== at 0x4515889: pthread_rwlock_unlock (in /lib/libpthread-2.3.6.so) ==8103== by 0x84322F2: common::RWLock::unLock() (sync.cpp:133) ==8103== by 0x83F5D1F: probe::PacketCaptureImpl::starting(probe::QosConfig&, probe::QosRuntime&) (PacketCapture.cpp:332) ==8103== by 0x84C4066: probe::QosRuntime::start() (QosRuntime.cpp:1181) ==8103== by 0x84E3CA8: probe::QosRuntime::installQosRuntime(probe::QosRuntime*) (RuntimeUpdaterImpl.cpp:266) ==8103== by 0x86DCC83: probe::ConfigCreateHelper::replaceQosRuntime() (ConfigHelper.cpp:314) ==8103== by 0x83E89D6: TestSuiteInitConfig() (TestTcpEvents.cpp:256) ==8103== by 0x83F1306: boost::unit_test::ut_detail::unused boost::unit_test::ut_detail::invoker<boost::unit_test::ut_detail::unused>::invoke<void (*)()>(void (*&)()) (callback.hpp:56) ==8103== by 0x83F132F: boost::unit_test::ut_detail::callback0_impl_t<boost::unit_test::ut_detail::unused, void (*)()>::invoke() (callback.hpp:89) ==8103== by 0x423E52C: boost::unit_test::ut_detail::callback0_impl_t<int, boost::unit_test::(anonymous namespace)::zero_return_wrapper>::invoke() (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x422CA7C: boost::execution_monitor::catch_signals(boost::unit_test::callback0<int> const&, bool, int) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x422CE9B: boost::execution_monitor::execute(boost::unit_test::callback0<int> const&, bool, int) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x423E2CE: boost::unit_test::unit_test_monitor_t::execute_and_translate(boost::unit_test::test_case const&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x422F7BB: boost::unit_test::framework_impl::visit(boost::unit_test::test_case const&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x4245A9E: boost::unit_test::traverse_test_tree(boost::unit_test::test_case const&, boost::unit_test::test_tree_visitor&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x4245FA0: boost::unit_test::traverse_test_tree(unsigned long, boost::unit_test::test_tree_visitor&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x4245DB0: boost::unit_test::traverse_test_tree(boost::unit_test::test_suite const&, boost::unit_test::test_tree_visitor&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x4245FD5: boost::unit_test::traverse_test_tree(unsigned long, boost::unit_test::test_tree_visitor&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x4245DB0: boost::unit_test::traverse_test_tree(boost::unit_test::test_suite const&, boost::unit_test::test_tree_visitor&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x4245FD5: boost::unit_test::traverse_test_tree(unsigned long, boost::unit_test::test_tree_visitor&) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x422E2D6: boost::unit_test::framework::run(unsigned long, bool) (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) ==8103== by 0x423B079: main (in /usr/lib/libboost_unit_test_framework-gcc-mt-1_33_1.so.1.33.1) Thread 2: status = VgTs_WaitSys ==8103== at 0x4461CF6: (within /lib/libc-2.3.6.so) ==8103== by 0x8678534: Heartbeat::run() (Heartbeat.cpp:261) ==8103== by 0x842A8AD: common::Thread::startTheThread(void*) (Thread.cpp:84) ==8103== by 0x401EDD5: mythread_wrapper (hg_intercepts.c:194) ==8103== by 0x451315F: start_thread (in /lib/libpthread-2.3.6.so) ==8103== by 0x449325D: clone (in /lib/libc-2.3.6.so) Thread 3: status = VgTs_WaitSys ==8103== at 0x4461CF6: (within /lib/libc-2.3.6.so) ==8103== by 0x8682213: LicenseHandler::run() (License.cpp:718) ==8103== by 0x842A8AD: common::Thread::startTheThread(void*) (Thread.cpp:84) ==8103== by 0x401EDD5: mythread_wrapper (hg_intercepts.c:194) ==8103== by 0x451315F: start_thread (in /lib/libpthread-2.3.6.so) ==8103== by 0x449325D: clone (in /lib/libc-2.3.6.so) Thread 4: status = VgTs_WaitSys ==8103== at 0x4515B86: pthread_cond_wait@@GLIBC_2.3.2 (in /lib/libpthread-2.3.6.so) ==8103== by 0x449ED3C: pthread_cond_wait@@GLIBC_2.3.2 (in /lib/libc-2.3.6.so) ==8103== by 0x8432450: common::Condition::wait() (sync.cpp:173) ==8103== by 0x86B6DC6: EventMsgSender::run() (EventMsgSender.cpp:93) ==8103== by 0x842A8AD: common::Thread::startTheThread(void*) (Thread.cpp:84) ==8103== by 0x401EDD5: mythread_wrapper (hg_intercepts.c:194) ==8103== by 0x451315F: start_thread (in /lib/libpthread-2.3.6.so) ==8103== by 0x449325D: clone (in /lib/libc-2.3.6.so) Thread 5: status = VgTs_WaitSys ==8103== at 0x451822E: __lll_mutex_lock_wait (in /lib/libpthread-2.3.6.so) ==8103== by 0x401F2BB: pthread_mutex_lock (hg_intercepts.c:401) ==8103== by 0x449EF25: pthread_mutex_lock (in /lib/libc-2.3.6.so) ==8103== by 0x84320E2: common::BaseMutex::lock() (sync.cpp:67) ==8103== by 0x84B48A4: probe::CdkPacketDispatcher::deliverSamples(probe::CdkUpdateThread*) (CdkPacketDispatcher.cpp:250) ==8103== by 0x84B49D4: probe::CdkUpdateThread::run() (CdkPacketDispatcher.cpp:42) ==8103== by 0x842A8AD: common::Thread::startTheThread(void*) (Thread.cpp:84) ==8103== by 0x401EDD5: mythread_wrapper (hg_intercepts.c:194) ==8103== by 0x451315F: start_thread (in /lib/libpthread-2.3.6.so) ==8103== by 0x449325D: clone (in /lib/libc-2.3.6.so) Thread 6: status = VgTs_Runnable ==8103== at 0x4515849: pthread_rwlock_unlock (in /lib/libpthread-2.3.6.so) ==8103== by 0x84322F2: common::RWLock::unLock() (sync.cpp:133) ==8103== by 0x843235A: common::ReadLock::~ReadLock() (sync.cpp:144) ==8103== by 0x83F44B2: probe::PacketCaptureImpl::waitMgrThread() (PacketCapture.cpp:659) ==8103== by 0x8400FAE: PacketCaptureWaitMgr::run() (PacketCaptureThread.cpp:35) ==8103== by 0x842A8AD: common::Thread::startTheThread(void*) (Thread.cpp:84) ==8103== by 0x401EDD5: mythread_wrapper (hg_intercepts.c:194) ==8103== by 0x451315F: start_thread (in /lib/libpthread-2.3.6.so) ==8103== by 0x449325D: clone (in /lib/libc-2.3.6.so) Thread 7: status = VgTs_WaitSys ==8103== at 0x4515398: pthread_rwlock_rdlock (in /lib/libpthread-2.3.6.so) ==8103== by 0x843227A: common::RWLock::readLock() (sync.cpp:125) ==8103== by 0x843232E: common::ReadLock::ReadLock(common::RWLock&) (sync.cpp:140) ==8103== by 0x83F16E5: probe::PacketCaptureImpl::writerThread() (PacketCapture.cpp:668) ==8103== by 0x84010EA: PacketCaptureWriter::run() (PacketCaptureThread.cpp:47) ==8103== by 0x842A8AD: common::Thread::startTheThread(void*) (Thread.cpp:84) ==8103== by 0x401EDD5: mythread_wrapper (hg_intercepts.c:194) ==8103== by 0x451315F: start_thread (in /lib/libpthread-2.3.6.so) ==8103== by 0x449325D: clone (in /lib/libc-2.3.6.so) Thread 8: status = VgTs_WaitSys ==8103== at 0x4461CF6: (within /lib/libc-2.3.6.so) ==8103== by 0x86AD3BA: EventDetection::run() (EventDetection.cpp:284) ==8103== by 0x842A8AD: common::Thread::startTheThread(void*) (Thread.cpp:84) ==8103== by 0x401EDD5: mythread_wrapper (hg_intercepts.c:194) ==8103== by 0x451315F: start_thread (in /lib/libpthread-2.3.6.so) ==8103== by 0x449325D: clone (in /lib/libc-2.3.6.so) Thread 9: status = VgTs_WaitSys ==8103== at 0x4515DFC: pthread_cond_timedwait@@GLIBC_2.3.2 (in /lib/libpthread-2.3.6.so) ==8103== by 0x249FF617: ??? Note: see also the FAQ.txt in the source distribution. It contains workarounds to several common problems. If that doesn't help, please report this bug to: www.valgrind.org In the bug report, send all the above text, the valgrind version, and what Linux distro you are using. Thanks. Linux enki 2.6.24-22-generic #1 SMP Mon Nov 24 18:32:42 UTC 2008 i686 i386 i386 GNU/Linux Ubuntu 8.04 I compiled valgrind 3.4.0 from source.
> Helgrind: libhb_core.c:3762 (msm_write): > Assertion 'ordxx == POrd_EQ || ordxx == POrd_LT' failed. Urr, that _really_ shouldn't happen. I can't begin to guess why it did. Are you able to make a small test case that I can use to reproduce and fix the problem? Without being able to reproduce it there's little hope of fixing it.
Potentially a serious problem. I have a possible hypothesis about what this might be, but without a test case it's going to be essentially impossible to develop + test a fix.
I'll try and put a test case together - as you can imagine, it will be very difficult to isolate the exact case!
> I'll try and put a test case together Thanks! Just as a comment, my working hypothesis (now that I actually have one -- for the first couple of weeks I had no idea how this could even in theory happen) -- is that this is a race inside Helgrind itself. If that's correct, then I expect you may find this difficult to reproduce in that identical back-to back runs do and don't fail. So that's one possible behaviour to look out for.
No, it fails every single time reliably. I am starting to reduce the test case - it actually occurs, as you might guess from the stack trace, inside a boost unit test which starts a number of threads.
I can reproduce the same problem here. No short reproduce case unfortunately, as it involves quite a big project.
*** Bug 190391 has been marked as a duplicate of this bug. ***
#190391 is claimed to have a repeatable test case; if so that will be useful in diagnosing the problem.
I managed to get the same result while debugging feng http://cgit.lscube.org/cgit.cgi/feng/commit/?id=89d1d0493fca13a4370e3ceed60430e899f128e3 valgrind version 3.4.1
This appears to be to do with incorrect handling of memory following a race, in code that uses reader-writer locks. I believe the following patch should fix it. It would be good if you (all :-) could test it. It should apply against a standard 3.4.1 tarball, possibly with some fuzz. You may also want to consider applying the patch that fixes #188248. That fixes a different problem in Helgrind's handling rwlocks. It is the case that some programs trigger both bugs, so both fixes are necessary. Index: helgrind/libhb_core.c =================================================================== --- helgrind/libhb_core.c (revision 9900) +++ helgrind/libhb_core.c (working copy) @@ -3670,18 +3670,21 @@ /* assert on sanity of constraints. */ POrd ordxx = VtsID__getOrdering(rmini,wmini); tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT); - svNew = MSM_RACE2ERR - ? SVal__mkE() - /* see comments on corresponding fragment in - msm_write for explanation. */ - /* aggressive setting: */ - /* - : SVal__mkC( VtsID__join2(wmini,tviR), - VtsID__join2(wmini,tviW) ); - */ - /* "consistent" setting: */ - : SVal__mkC( VtsID__join2(rmini,tviR), - VtsID__join2(wmini,tviW) ); + /* Compute svNew following the race. This isn't so + simple. */ + /* see comments on corresponding fragment in + msm_write for explanation. */ + if (MSM_RACE2ERR) { + /* XXX UNUSED; this should be deleted */ + tl_assert(0); + svNew = SVal__mkE(); + } else { + VtsID r_joined = VtsID__join2(rmini,tviR); + VtsID w_joined = VtsID__join2(wmini,tviW); + /* ensure that r_joined <= w_joined */ + w_joined = VtsID__join2( w_joined, r_joined ); + svNew = SVal__mkC( r_joined, w_joined ); + } record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/ ); goto out; } @@ -3747,22 +3750,40 @@ /* assert on sanity of constraints. */ POrd ordxx = VtsID__getOrdering(rmini,wmini); tl_assert(ordxx == POrd_EQ || ordxx == POrd_LT); - svNew = MSM_RACE2ERR - ? SVal__mkE() - /* One possibility is, after a race is seen, to - set the location's constraints as aggressively - (as far ahead) as possible. However, that just - causes lots more races to be reported, which is - very confusing. Hence don't do this. */ - /* - : SVal__mkC( VtsID__join2(wmini,tviR), - VtsID__join2(wmini,tviW) ); - */ - /* instead, re-set the constraints in a way which - is consistent with (ie, as they would have been - computed anyway) had no race been detected. */ - : SVal__mkC( VtsID__join2(rmini,tviR), - VtsID__join2(wmini,tviW) ); + /* Compute svNew following the race. This isn't so + simple. */ + if (MSM_RACE2ERR) { + /* XXX UNUSED; this should be deleted */ + tl_assert(0); + svNew = SVal__mkE(); + /* One possibility is, after a race is seen, to + set the location's constraints as aggressively + (as far ahead) as possible. However, that just + causes lots more races to be reported, which is + very confusing. Hence don't do this. */ + /* + = SVal__mkC( VtsID__join2(wmini,tviR), + VtsID__join2(wmini,tviW) ); + */ + } else { + /* instead, re-set the constraints in a way which is + consistent with (ie, as they would have been computed + anyway) the case where no race was detected. */ + VtsID r_joined = VtsID__join2(rmini,tviR); + VtsID w_joined = VtsID__join2(wmini,tviW); + /* Because of the race, the "normal" ordering constraint + wmini(constraint) <= tviW(actual access) no longer + holds. Hence it can be that the required constraint + (on SVal_Cs) r_joined <= w_joined does not hold either. + To fix this and guarantee we're not generating invalid + SVal_Cs, do w_joined = w_joined `join` r_joined, so as + to force r_joined <= w_joined in the arguments to + SVal__mkC. I think this is only important when we're + dealing with reader-writer locks. + */ + w_joined = VtsID__join2( w_joined, r_joined ); + svNew = SVal__mkC( r_joined, w_joined ); + } record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/ ); goto out; }
This patch and the patch for #188248 both work for me, and my program now runs cleanly.
Applied to valgrind svn and seems to work correctly
Crash fixed here too.
Thanks for testing. Looks good. Ok to close this bug now?
Fixed (trunk r10074). Fix will also be shipped in 3.4.2.