Skip to content

Commit 55b8e16

Browse files
Bryan C. Millsgopherbot
authored andcommitted
testing: use monotonic counts to attribute races in subtests
This implements the approach I described in https://go-review.git.corp.google.com/c/go/+/494057/1#message-5c9773bded2f89b4058848cb036b860aa6716de3. Specifically: - Each level of test atomically records the cumulative number of races seen as of the last race-induced test failure. - When a subtest fails, it logs the race error, and then updates its parents' counters so that they will not log the same error. - We check each test or benchmark for races before it starts running each of its subtests or sub-benchmark, before unblocking parallel subtests, and after running any cleanup functions. With this implementation, it should be the case that every test that is running when a race is detected reports that race, and any race reported for a subtest is not redundantly reported for its parent. The regression tests are based on those added in CL 494057 and CL 501895, with a few additions based on my own review of the code. Fixes #60083. Change-Id: I578ae929f192a7a951b31b17ecb560cbbf1ef7a1 Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-amd64-longtest-race,gotip-windows-amd64-longtest Reviewed-on: https://go-review.googlesource.com/c/go/+/506300 Reviewed-by: Ian Lance Taylor <[email protected]> Auto-Submit: Bryan Mills <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent a57c573 commit 55b8e16

File tree

5 files changed

+459
-22
lines changed

5 files changed

+459
-22
lines changed

src/runtime/race/output_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,8 @@ func TestFail(t *testing.T) {
208208
`, []string{`
209209
==================
210210
--- FAIL: TestFail \([0-9.]+s\)
211-
.*main_test.go:14: true
212211
.*testing.go:.*: race detected during execution of test
212+
.*main_test.go:14: true
213213
FAIL`}},
214214

215215
{"slicebytetostring_pc", "run", "", "atexit_sleep_ms=0", `

src/testing/benchmark.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package testing
77
import (
88
"flag"
99
"fmt"
10-
"internal/race"
1110
"internal/sysinfo"
1211
"io"
1312
"math"
@@ -179,11 +178,14 @@ func (b *B) ReportAllocs() {
179178
func (b *B) runN(n int) {
180179
benchmarkLock.Lock()
181180
defer benchmarkLock.Unlock()
182-
defer b.runCleanup(normalPanic)
181+
defer func() {
182+
b.runCleanup(normalPanic)
183+
b.checkRaces()
184+
}()
183185
// Try to get a comparable environment for each run
184186
// by clearing garbage from previous runs.
185187
runtime.GC()
186-
b.raceErrors = -race.Errors()
188+
b.resetRaces()
187189
b.N = n
188190
b.parallelism = 1
189191
b.ResetTimer()
@@ -192,10 +194,6 @@ func (b *B) runN(n int) {
192194
b.StopTimer()
193195
b.previousN = n
194196
b.previousDuration = b.duration
195-
b.raceErrors += race.Errors()
196-
if b.raceErrors > 0 {
197-
b.Errorf("race detected during execution of benchmark")
198-
}
199197
}
200198

201199
// run1 runs the first iteration of benchFunc. It reports whether more

src/testing/fuzz.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ func fRunner(f *F, fn func(*F)) {
636636
// Unfortunately, recovering here adds stack frames, but the location of
637637
// the original panic should still be
638638
// clear.
639+
f.checkRaces()
639640
if f.Failed() {
640641
numFailed.Add(1)
641642
}
@@ -719,6 +720,7 @@ func fRunner(f *F, fn func(*F)) {
719720
}()
720721

721722
f.start = time.Now()
723+
f.resetRaces()
722724
fn(f)
723725

724726
// Code beyond this point will not be executed when FailNow or SkipNow

src/testing/testing.go

Lines changed: 107 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,6 @@ type common struct {
611611
bench bool // Whether the current test is a benchmark.
612612
hasSub atomic.Bool // whether there are sub-benchmarks.
613613
cleanupStarted atomic.Bool // Registered cleanup callbacks have started to execute
614-
raceErrors int // Number of races detected during test.
615614
runner string // Function name of tRunner running the test.
616615
isParallel bool // Whether the test is parallel.
617616

@@ -625,6 +624,9 @@ type common struct {
625624
signal chan bool // To signal a test is done.
626625
sub []*T // Queue of subtests to be run in parallel.
627626

627+
lastRaceErrors atomic.Int64 // Max value of race.Errors seen during the test or its subtests.
628+
raceErrorLogged atomic.Bool
629+
628630
tempDirMu sync.Mutex
629631
tempDir string
630632
tempDirErr error
@@ -955,9 +957,15 @@ func (c *common) Fail() {
955957
// Failed reports whether the function has failed.
956958
func (c *common) Failed() bool {
957959
c.mu.RLock()
958-
failed := c.failed
959-
c.mu.RUnlock()
960-
return failed || c.raceErrors+race.Errors() > 0
960+
defer c.mu.RUnlock()
961+
962+
if !c.done && int64(race.Errors()) > c.lastRaceErrors.Load() {
963+
c.mu.RUnlock()
964+
c.checkRaces()
965+
c.mu.RLock()
966+
}
967+
968+
return c.failed
961969
}
962970

963971
// FailNow marks the function as having failed and stops its execution
@@ -1346,6 +1354,69 @@ func (c *common) runCleanup(ph panicHandling) (panicVal any) {
13461354
}
13471355
}
13481356

1357+
// resetRaces updates c.parent's count of data race errors (or the global count,
1358+
// if c has no parent), and updates c.lastRaceErrors to match.
1359+
//
1360+
// Any races that occurred prior to this call to resetRaces will
1361+
// not be attributed to c.
1362+
func (c *common) resetRaces() {
1363+
if c.parent == nil {
1364+
c.lastRaceErrors.Store(int64(race.Errors()))
1365+
} else {
1366+
c.lastRaceErrors.Store(c.parent.checkRaces())
1367+
}
1368+
}
1369+
1370+
// checkRaces checks whether the global count of data race errors has increased
1371+
// since c's count was last reset.
1372+
//
1373+
// If so, it marks c as having failed due to those races (logging an error for
1374+
// the first such race), and updates the race counts for the parents of c so
1375+
// that if they are currently suspended (such as in a call to T.Run) they will
1376+
// not log separate errors for the race(s).
1377+
//
1378+
// Note that multiple tests may be marked as failed due to the same race if they
1379+
// are executing in parallel.
1380+
func (c *common) checkRaces() (raceErrors int64) {
1381+
raceErrors = int64(race.Errors())
1382+
for {
1383+
last := c.lastRaceErrors.Load()
1384+
if raceErrors <= last {
1385+
// All races have already been reported.
1386+
return raceErrors
1387+
}
1388+
if c.lastRaceErrors.CompareAndSwap(last, raceErrors) {
1389+
break
1390+
}
1391+
}
1392+
1393+
if c.raceErrorLogged.CompareAndSwap(false, true) {
1394+
// This is the first race we've encountered for this test.
1395+
// Mark the test as failed, and log the reason why only once.
1396+
// (Note that the race detector itself will still write a goroutine
1397+
// dump for any further races it detects.)
1398+
c.Errorf("race detected during execution of test")
1399+
}
1400+
1401+
// Update the parent(s) of this test so that they don't re-report the race.
1402+
parent := c.parent
1403+
for parent != nil {
1404+
for {
1405+
last := parent.lastRaceErrors.Load()
1406+
if raceErrors <= last {
1407+
// This race was already reported by another (likely parallel) subtest.
1408+
return raceErrors
1409+
}
1410+
if parent.lastRaceErrors.CompareAndSwap(last, raceErrors) {
1411+
break
1412+
}
1413+
}
1414+
parent = parent.parent
1415+
}
1416+
1417+
return raceErrors
1418+
}
1419+
13491420
// callerName gives the function name (qualified with a package path)
13501421
// for the caller after skip frames (where 0 means the current function).
13511422
func callerName(skip int) string {
@@ -1390,7 +1461,18 @@ func (t *T) Parallel() {
13901461

13911462
// Add to the list of tests to be released by the parent.
13921463
t.parent.sub = append(t.parent.sub, t)
1393-
t.raceErrors += race.Errors()
1464+
1465+
// Report any races during execution of this test up to this point.
1466+
//
1467+
// We will assume that any races that occur between here and the point where
1468+
// we unblock are not caused by this subtest. That assumption usually holds,
1469+
// although it can be wrong if the test spawns a goroutine that races in the
1470+
// background while the rest of the test is blocked on the call to Parallel.
1471+
// If that happens, we will misattribute the background race to some other
1472+
// test, or to no test at all — but that false-negative is so unlikely that it
1473+
// is not worth adding race-report noise for the common case where the test is
1474+
// completely suspended during the call to Parallel.
1475+
t.checkRaces()
13941476

13951477
if t.chatty != nil {
13961478
t.chatty.Updatef(t.name, "=== PAUSE %s\n", t.name)
@@ -1405,9 +1487,16 @@ func (t *T) Parallel() {
14051487
t.chatty.Updatef(t.name, "=== CONT %s\n", t.name)
14061488
}
14071489
running.Store(t.name, time.Now())
1408-
14091490
t.start = time.Now()
1410-
t.raceErrors += -race.Errors()
1491+
1492+
// Reset the local race counter to ignore any races that happened while this
1493+
// goroutine was blocked, such as in the parent test or in other parallel
1494+
// subtests.
1495+
//
1496+
// (Note that we don't call parent.checkRaces here:
1497+
// if other parallel subtests have already introduced races, we want to
1498+
// let them report those races instead of attributing them to the parent.)
1499+
t.lastRaceErrors.Store(int64(race.Errors()))
14111500
}
14121501

14131502
// Setenv calls os.Setenv(key, value) and uses Cleanup to
@@ -1455,14 +1544,13 @@ func tRunner(t *T, fn func(t *T)) {
14551544
// a call to runtime.Goexit, record the duration and send
14561545
// a signal saying that the test is done.
14571546
defer func() {
1547+
t.checkRaces()
1548+
1549+
// TODO(#61034): This is the wrong place for this check.
14581550
if t.Failed() {
14591551
numFailed.Add(1)
14601552
}
14611553

1462-
if t.raceErrors+race.Errors() > 0 {
1463-
t.Errorf("race detected during execution of test")
1464-
}
1465-
14661554
// Check if the test panicked or Goexited inappropriately.
14671555
//
14681556
// If this happens in a normal test, print output but continue panicking.
@@ -1564,6 +1652,7 @@ func tRunner(t *T, fn func(t *T)) {
15641652
if err != nil {
15651653
doPanic(err)
15661654
}
1655+
t.checkRaces()
15671656
if !t.isParallel {
15681657
// Reacquire the count for sequential tests. See comment in Run.
15691658
t.context.waitParallel()
@@ -1589,7 +1678,7 @@ func tRunner(t *T, fn func(t *T)) {
15891678
}()
15901679

15911680
t.start = time.Now()
1592-
t.raceErrors = -race.Errors()
1681+
t.resetRaces()
15931682
fn(t)
15941683

15951684
// code beyond here will not be executed when FailNow is invoked
@@ -1936,7 +2025,12 @@ func (m *M) Run() (code int) {
19362025
testOk = false
19372026
}
19382027
}
1939-
if !testOk || !exampleOk || !fuzzTargetsOk || !runBenchmarks(m.deps.ImportPath(), m.deps.MatchString, m.benchmarks) || race.Errors() > 0 {
2028+
anyFailed := !testOk || !exampleOk || !fuzzTargetsOk || !runBenchmarks(m.deps.ImportPath(), m.deps.MatchString, m.benchmarks)
2029+
if !anyFailed && race.Errors() > 0 {
2030+
fmt.Print(chatty.prefix(), "testing: race detected outside of test execution\n")
2031+
anyFailed = true
2032+
}
2033+
if anyFailed {
19402034
fmt.Print(chatty.prefix(), "FAIL\n")
19412035
m.exitCode = 1
19422036
return

0 commit comments

Comments
 (0)