Skip to content

Commit e3dfd22

Browse files
connpool: fix connection leak during idle connection reopen (#18967)
Signed-off-by: Arthur Schreiber <[email protected]>
1 parent abce8c9 commit e3dfd22

File tree

2 files changed

+105
-4
lines changed

2 files changed

+105
-4
lines changed

go/pools/smartconnpool/pool.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -460,9 +460,13 @@ func (pool *ConnPool[C]) pop(stack *connStack[C]) *Pooled[C] {
460460
// to expire this connection (even if it's still visible to them), so it's
461461
// safe to return it
462462
for conn, ok := stack.Pop(); ok; conn, ok = stack.Pop() {
463-
if conn.timeUsed.borrow() {
464-
return conn
463+
if !conn.timeUsed.borrow() {
464+
// Ignore the connection that couldn't be borrowed;
465+
// it's being closed by the idle worker and replaced by a new connection.
466+
continue
465467
}
468+
469+
return conn
466470
}
467471
return nil
468472
}
@@ -787,11 +791,23 @@ func (pool *ConnPool[C]) closeIdleResources(now time.Time) {
787791
for conn := s.Peek(); conn != nil; conn = conn.next.Load() {
788792
if conn.timeUsed.expired(mono, timeout) {
789793
pool.Metrics.idleClosed.Add(1)
794+
790795
conn.Close()
796+
pool.closedConn()
797+
791798
// Using context.Background() is fine since MySQL connection already enforces
792799
// a connect timeout via the `db-connect-timeout-ms` config param.
793-
if err := pool.connReopen(context.Background(), conn, mono); err != nil {
794-
pool.closedConn()
800+
c, err := pool.getNew(context.Background())
801+
if err != nil {
802+
// If we couldn't open a new connection, just continue
803+
continue
804+
}
805+
806+
// opening a new connection might have raced with other goroutines,
807+
// so it's possible that we got back `nil` here
808+
if c != nil {
809+
// Return the new connection to the pool
810+
pool.tryReturnConn(c)
795811
}
796812
}
797813
}

go/pools/smartconnpool/pool_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,3 +1319,88 @@ func TestCloseDuringWaitForConn(t *testing.T) {
13191319
require.EqualValues(t, 0, state.open.Load())
13201320
}
13211321
}
1322+
1323+
// TestIdleTimeoutConnectionLeak checks for leaked connections after idle timeout
1324+
func TestIdleTimeoutConnectionLeak(t *testing.T) {
1325+
var state TestState
1326+
1327+
// Slow connection creation to ensure idle timeout happens during reopening
1328+
state.chaos.delayConnect = 300 * time.Millisecond
1329+
1330+
p := NewPool(&Config[*TestConn]{
1331+
Capacity: 2,
1332+
IdleTimeout: 50 * time.Millisecond,
1333+
LogWait: state.LogWait,
1334+
}).Open(newConnector(&state), nil)
1335+
1336+
getCtx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond)
1337+
defer cancel()
1338+
1339+
// Get and return two connections
1340+
conn1, err := p.Get(getCtx, nil)
1341+
require.NoError(t, err)
1342+
1343+
conn2, err := p.Get(getCtx, nil)
1344+
require.NoError(t, err)
1345+
1346+
p.put(conn1)
1347+
p.put(conn2)
1348+
1349+
// At this point: Active=2, InUse=0, Available=2
1350+
require.EqualValues(t, 2, p.Active())
1351+
require.EqualValues(t, 0, p.InUse())
1352+
require.EqualValues(t, 2, p.Available())
1353+
1354+
// Wait for idle timeout to kick in and start expiring connections
1355+
require.EventuallyWithT(t, func(c *assert.CollectT) {
1356+
// Check the actual number of currently open connections
1357+
assert.Equal(c, int64(2), state.open.Load())
1358+
// Check the total number of closed connections
1359+
assert.Equal(c, int64(1), state.close.Load())
1360+
}, 100*time.Millisecond, 10*time.Millisecond)
1361+
1362+
// At this point, the idle timeout worker has expired the connections
1363+
// and is trying to reopen them (which takes 300ms due to delayConnect)
1364+
1365+
// Try to get connections while they're being reopened
1366+
// This should trigger the bug where connections get discarded
1367+
for i := 0; i < 2; i++ {
1368+
getCtx, cancel := context.WithTimeout(t.Context(), 50*time.Millisecond)
1369+
defer cancel()
1370+
1371+
conn, err := p.Get(getCtx, nil)
1372+
require.NoError(t, err)
1373+
1374+
p.put(conn)
1375+
}
1376+
1377+
// Wait a moment for all reopening to complete
1378+
require.EventuallyWithT(t, func(c *assert.CollectT) {
1379+
// Check the actual number of currently open connections
1380+
require.Equal(c, int64(2), state.open.Load())
1381+
// Check the total number of closed connections
1382+
require.Equal(c, int64(2), state.close.Load())
1383+
}, 400*time.Millisecond, 10*time.Millisecond)
1384+
1385+
// Check the pool state
1386+
assert.Equal(t, int64(2), p.Active())
1387+
assert.Equal(t, int64(0), p.InUse())
1388+
assert.Equal(t, int64(2), p.Available())
1389+
assert.Equal(t, int64(2), p.Metrics.IdleClosed())
1390+
1391+
// Try to close the pool - if there are leaked connections, this will timeout
1392+
closeCtx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond)
1393+
defer cancel()
1394+
1395+
err = p.CloseWithContext(closeCtx)
1396+
require.NoError(t, err)
1397+
1398+
// Pool should be completely closed now
1399+
assert.Equal(t, int64(0), p.Active())
1400+
assert.Equal(t, int64(0), p.InUse())
1401+
assert.Equal(t, int64(0), p.Available())
1402+
assert.Equal(t, int64(2), p.Metrics.IdleClosed())
1403+
1404+
assert.Equal(t, int64(0), state.open.Load())
1405+
assert.Equal(t, int64(4), state.close.Load())
1406+
}

0 commit comments

Comments
 (0)