diff --git a/src/db.c b/src/db.c index df226f02cdf..3e239dd786f 100644 --- a/src/db.c +++ b/src/db.c @@ -1709,6 +1709,9 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { } for (i = 0; i < metas->num; i++) { scanMeta *meta = metas->metas+i; + if (scanMetaExpireIfNeeded(c->db, meta)) { + continue; + } if (use_pattern && !stringmatchlen(pat, patlen,meta->key, sdslen(meta->key), 0)) { continue; } @@ -1718,9 +1721,6 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { if (type != LLONG_MAX && meta->swap_type != type) { continue; } - if (scanMetaExpireIfNeeded(c->db, meta)) { - continue; - } listAddNodeTail(keys,meta->key); } } else diff --git a/src/iothread.c b/src/iothread.c index 0f52f753d38..322f53476ea 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -942,8 +942,13 @@ void ioThreadsScaleDownTryEnd(void) { if (thread_state == THREAD_STATE_STOPPED) { destroyIOThread(t); server.io_threads_num--; - /* scaling down is not urgent - (it doesn't consume time from the main thread) */ + /* If all threads have been scaled down to the target + * count, transition immediately so callers see NONE + * in the same beforeSleep pass. */ + if (server.io_threads_num <= server.config_io_threads_num) { + server.io_threads_scale_status = IO_THREAD_SCALE_STATUS_NONE; + serverLog(LL_NOTICE, "IO threads scale-down end"); + } } } else { pauseIOThread(t->id); diff --git a/tests/gtid/psync2-master-restart.tcl b/tests/gtid/psync2-master-restart.tcl index 52b785d6b41..12a07ec4ada 100644 --- a/tests/gtid/psync2-master-restart.tcl +++ b/tests/gtid/psync2-master-restart.tcl @@ -232,6 +232,21 @@ start_server {overrides {gtid-enabled yes}} { assert {[status $master sync_partial_ok] == 1} assert {[status $replica sync_partial_ok] == 1} + if {$::swap} { + wait_for_condition 500 100 { + [status $master master_repl_offset] == [status $replica master_repl_offset] && + [status $master master_repl_offset] == [status $sub_replica master_repl_offset] && + [$master debug digest] eq [$replica debug digest] && + [$master debug digest] eq [$sub_replica debug digest] + } else { + show_cluster_status + puts "master digest: [$master debug digest]" + puts "replica digest: [$replica debug digest]" + puts "sub_replica digest: [$sub_replica debug digest]" + fail "Replication chain did not converge after expire restart." + } + } + set digest [$master debug digest] assert {$digest eq [$replica debug digest]} assert {$digest eq [$sub_replica debug digest]} diff --git a/tests/gtid/replication-psync.tcl b/tests/gtid/replication-psync.tcl index 4d6602fd64f..00053c6d026 100644 --- a/tests/gtid/replication-psync.tcl +++ b/tests/gtid/replication-psync.tcl @@ -25,6 +25,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco if {$::swap} { set load_handle0 [start_bg_complex_data $master_host $master_port 0 100000] + set load_handle1 [start_bg_complex_data $master_host $master_port 1 100000] } else { set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000] set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000] @@ -74,6 +75,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco if {$::swap} { stop_bg_complex_data $load_handle0 + stop_bg_complex_data $load_handle1 } else { stop_bg_complex_data $load_handle0 stop_bg_complex_data $load_handle1 diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl index aeb1cac8575..adf278837df 100644 --- a/tests/integration/logging.tcl +++ b/tests/integration/logging.tcl @@ -9,6 +9,19 @@ if {$system_name eq {linux}} { # look for the DEBUG command in the backtrace, used when we triggered # a stack trace print while we know redis is running that command. +proc get_last_stacktrace_progress {srv_idx} { + set stdout [srv $srv_idx stdout] + if {[catch { + set summary [string trim [exec grep -E {[0-9]+/[0-9]+ expected stacktraces\.} $stdout | tail -n 1]] + } err]} { + fail "failed to read stacktrace progress from $stdout: $err" + } + if {![regexp {([0-9]+)/([0-9]+) expected stacktraces\.} $summary -> collected expected]} { + fail "failed to parse stacktrace progress from $stdout: $summary" + } + return [list $collected $expected] +} + proc check_log_backtrace_for_debug {log_pattern} { # search for the final line in the stacktraces generation to make sure it was completed. set pattern "* STACK TRACE DONE *" @@ -30,8 +43,13 @@ proc check_log_backtrace_for_debug {log_pattern} { # the following are skipped since valgrind is slow and a timeout can happen if {!$::valgrind} { assert_equal [count_log_message 0 "wait_threads(): waiting threads timed out"] 0 - # make sure redis prints stack trace for all threads. we know 3 threads are idle in bio.c - assert_equal [count_log_message 0 "bioProcessBackgroundJobs"] 3 + if {$::swap} { + lassign [get_last_stacktrace_progress 0] collected expected + assert_equal $collected $expected + assert {$expected >= 4} + } else { + assert_equal [count_log_message 0 "bioProcessBackgroundJobs"] 3 + } } } diff --git a/tests/integration/psync2-master-restart.tcl b/tests/integration/psync2-master-restart.tcl index 94103bcf2a9..840a5dfcb40 100644 --- a/tests/integration/psync2-master-restart.tcl +++ b/tests/integration/psync2-master-restart.tcl @@ -174,6 +174,21 @@ start_server {} { assert {[status $master sync_partial_ok] == 1} assert {[status $replica sync_partial_ok] == 1} + if {$::swap} { + wait_for_condition 500 100 { + [status $master master_repl_offset] == [status $replica master_repl_offset] && + [status $master master_repl_offset] == [status $sub_replica master_repl_offset] && + [$master debug digest] eq [$replica debug digest] && + [$master debug digest] eq [$sub_replica debug digest] + } else { + show_cluster_status + puts "master digest: [$master debug digest]" + puts "replica digest: [$replica debug digest]" + puts "sub_replica digest: [$sub_replica debug digest]" + fail "Replication chain did not converge after expire restart." + } + } + set digest [$master debug digest] assert {$digest eq [$replica debug digest]} assert {$digest eq [$sub_replica debug digest]} diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 2ba7c2af158..d21ada107a0 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -1110,14 +1110,16 @@ foreach mdl {yes no} { set master_pid [srv 0 pid] $master config set repl-diskless-sync $mdl $master config set repl-diskless-sync-delay 0 - # create keys that will take 10 seconds to save - # In SWAP mode, use swap-debug-rdb-key-save-delay-micro instead + # create keys that will take ~10 seconds to save. + # rdb-key-save-delay applies to all hot keys in both SWAP and non-SWAP mode. + # swap-debug-rdb-key-save-delay-micro only delays cold/warm keys in SWAP mode, + # but debug populate creates hot keys, so rdb-key-save-delay must be used here + # to ensure the child is still saving when the parent is killed. if {$::swap} { $master config set swap-repl-rordb-sync no $master config set swap-debug-rdb-key-save-delay-micro 1000 - } else { - $master config set rdb-key-save-delay 1000 - } + } + $master config set rdb-key-save-delay 1000 $master debug populate 10000 start_server {overrides {save ""}} { set replica [srv 0 client] @@ -1135,7 +1137,8 @@ foreach mdl {yes no} { exec kill -9 $master_pid # wait for the child to notice the parent died have exited - wait_for_condition 500 10 { + set rdb_child_timeout [expr {$::swap ? 2000 : 500}] + wait_for_condition $rdb_child_timeout 10 { [process_is_alive $fork_child_pid] == 0 } else { fail "rdb child didn't terminate" @@ -1551,6 +1554,9 @@ foreach disklessload {disabled on-empty-db} { $replica config set repl-diskless-load $disklessload # Populate replica with many keys, master with a few keys. + if {$::swap} { + $replica config set swap-debug-evict-keys 0 + } $replica debug populate 4000000 populate 3 master 10 @@ -1571,7 +1577,9 @@ foreach disklessload {disabled on-empty-db} { # Discarding old db will take a long time and loading new one # will be quick. So, if we receive -LOADING, most probably it is # when flushing the db. - wait_for_condition 1 10000 { + set loading_tries [expr {$::swap ? 100 : 1}] + set loading_delay [expr {$::swap ? 100 : 10000}] + wait_for_condition $loading_tries $loading_delay { [catch {$replica ping} err] && [string match *LOADING* $err] } else { @@ -1876,10 +1884,22 @@ start_server {tags {"repl external:skip"}} { # Connect to an invalid master $slave slaveof $master_host 0 - after 1000 - # Expect current sync attempts to increase - assert {[status $slave master_current_sync_attempts] >= 2} + # Expect current sync attempts to increase. + # In SWAP mode, swap_draining_master can defer the first connection + # attempt by ~100ms (waitSwapDrainingMaster timer), shifting the + # second attempt past the 1s replicationCron window. Use + # wait_for_condition only in SWAP mode to avoid the race. + if {$::swap} { + wait_for_condition 200 50 { + [status $slave master_current_sync_attempts] >= 2 + } else { + fail "master_current_sync_attempts didn't increase after switching to invalid master" + } + } else { + after 1000 + assert {[status $slave master_current_sync_attempts] >= 2} + } } } } diff --git a/tests/swap/unit/dirty.tcl b/tests/swap/unit/dirty.tcl index 148a0ec7bc1..a97d040d9c8 100644 --- a/tests/swap/unit/dirty.tcl +++ b/tests/swap/unit/dirty.tcl @@ -177,6 +177,7 @@ start_server {tags {"dirty subkeys"} overrides {swap-dirty-subkeys-enabled yes}} assert_equal [object_is_warm r hash3] 1 r swap.evict hash3 + wait_key_cold r hash3 assert_equal [object_is_cold r hash3] 1 assert_equal [r hmget hash3 a b c 1 2] {a1 b0 c0 10 20} @@ -274,6 +275,7 @@ start_server {tags {"dirty subkeys"} overrides {swap-dirty-subkeys-enabled yes}} assert_equal [object_is_warm r set3] 1 r swap.evict set3 + wait_key_cold r set3 assert_equal [object_is_cold r set3] 1 assert_equal [r smismember set3 a b c 1 2] {1 1 1 1 1} diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index cc66dfb2bfd..e135922dad9 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -852,22 +852,23 @@ start_server {tags {"expire"}} { r debug set-active-expire 0 r flushall - r set foo1 bar PX 1 - if {$::swap} { - #TODO optimize wait_key_cold - catch {wait_key_cold r foo1} err - } - r set foo2 bar PX 1 - if {$::swap} { - #TODO optimize wait_key_cold - catch {wait_key_cold r foo2} err + if {!$::swap} { + r set foo1 bar PX 1 + r set foo2 bar PX 1 + after 2 + } else { + r set foo1 bar PX 80 + r set foo2 bar PX 80 + r swap.evict foo1 + r swap.evict foo2 + wait_key_cold r foo1 + wait_key_cold r foo2 + after 100 } - after 2 - set repl [attach_to_replication_stream] if {!$::swap} { - r scan 0 + r scan 0 } else { set next_cursor [lindex [r scan 0] 0] r scan $next_cursor @@ -893,17 +894,19 @@ start_server {tags {"expire"}} { r debug set-active-expire 0 r flushall - r set foo1 bar PX 1 - if {$::swap} { - #TODO optimize wait_key_cold - catch {wait_key_cold r foo1} err - } - r set foo2 bar PX 1 - if {$::swap} { - #TODO optimize wait_key_cold - catch {wait_key_cold r foo2} err + if {!$::swap} { + r set foo1 bar PX 1 + r set foo2 bar PX 1 + after 2 + } else { + r set foo1 bar PX 80 + r set foo2 bar PX 80 + r swap.evict foo1 + r swap.evict foo2 + wait_key_cold r foo1 + wait_key_cold r foo2 + after 100 } - after 2 set repl [attach_to_replication_stream] diff --git a/tests/unit/io_thread.tcl b/tests/unit/io_thread.tcl index ab298241321..a2c8de122f0 100644 --- a/tests/unit/io_thread.tcl +++ b/tests/unit/io_thread.tcl @@ -153,7 +153,15 @@ start_server {overrides {}} { } lappend clients $cli } - assert_equal [get_kv_value [get_info_field [r info threads] io_thread_1 ] clients] 101 + # Wait for all 100 new clients to be accepted and assigned by the server. + # When singledb=1 (e.g. swap build), no SELECT is sent so there is no + # synchronous round-trip to guarantee the server has accepted every + # connection before we query the thread info. + wait_for_condition 100 50 { + [get_kv_value [get_info_field [r info threads] io_thread_1 ] clients] == 101 + } else { + fail "Expected 101 clients on io_thread_1 after adding 100 connections" + } # set io-threads n # wait CLIENT_IO_PENDING_CRON ,load balancing @@ -181,12 +189,16 @@ start_server {overrides {}} { } r config set io-threads 2 assert_equal [get_info_field [r info threads] io_thread_scale_status] "down" + # ioThreadsScaleDownTryEnd() destroys the thread and decrements + # io_threads_num in one beforeSleep pass, but only transitions + # scale_status from DOWN to NONE in the *next* pass. Use + # wait_for_condition for both checks to avoid the one-iteration race. wait_for_condition 100 50 { - [get_info_field [r info threads] io_thread_2 ] eq "" + [get_info_field [r info threads] io_thread_2 ] eq "" && + [get_info_field [r info threads] io_thread_scale_status] eq "none" } else { fail "thread down n => 2 fail" } - assert_equal [get_info_field [r info threads] io_thread_scale_status] "none" if {!$::external} { verify_log_message 0 "*IO threads scale-down end*" $lines } diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index c77629edf03..009ee6a3234 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -1119,6 +1119,12 @@ foreach {pop} {BLPOP BLMPOP_LEFT} { } else { fail "exec did not arrive" } + if {$::swap} { + wait_for_condition 200 10 { + [regexp {cmd=exec} [r client list]] eq 0 + } else { + } + } # Blocked BLPOPLPUSH may create problems, unblock it. r lpush srclist{t} element set res [$watching_client read]