diff options
Diffstat (limited to 'tests/sentinel')
| -rw-r--r-- | tests/sentinel/run.tcl | 23 | ||||
| -rw-r--r-- | tests/sentinel/tests/00-base.tcl | 126 | ||||
| -rw-r--r-- | tests/sentinel/tests/01-conf-update.tcl | 39 | ||||
| -rw-r--r-- | tests/sentinel/tests/02-slaves-reconf.tcl | 84 | ||||
| -rw-r--r-- | tests/sentinel/tests/03-runtime-reconf.tcl | 1 | ||||
| -rw-r--r-- | tests/sentinel/tests/04-slave-selection.tcl | 5 | ||||
| -rw-r--r-- | tests/sentinel/tests/05-manual.tcl | 45 | ||||
| -rw-r--r-- | tests/sentinel/tests/06-ckquorum.tcl | 34 | ||||
| -rw-r--r-- | tests/sentinel/tests/07-down-conditions.tcl | 68 | ||||
| -rw-r--r-- | tests/sentinel/tests/includes/init-tests.tcl | 72 | ||||
| -rw-r--r-- | tests/sentinel/tmp/.gitignore | 2 |
11 files changed, 499 insertions, 0 deletions
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl new file mode 100644 index 0000000..9a2fcfb --- /dev/null +++ b/tests/sentinel/run.tcl @@ -0,0 +1,23 @@ +# Sentinel test suite. Copyright (C) 2014 Salvatore Sanfilippo [email protected] +# This software is released under the BSD License. See the COPYING file for +# more information. + +cd tests/sentinel +source ../instances.tcl + +set ::instances_count 5 ; # How many instances we use at max. + +proc main {} { + parse_options + spawn_instance sentinel $::sentinel_base_port $::instances_count + spawn_instance redis $::redis_base_port $::instances_count + run_tests + cleanup + end_tests +} + +if {[catch main e]} { + puts $::errorInfo + cleanup + exit 1 +} diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl new file mode 100644 index 0000000..a79d0c3 --- /dev/null +++ b/tests/sentinel/tests/00-base.tcl @@ -0,0 +1,126 @@ +# Check the basic monitoring and failover capabilities. + +source "../tests/includes/init-tests.tcl" + +if {$::simulate_error} { + test "This test will fail" { + fail "Simulated error" + } +} + +test "Basic failover works if the master is down" { + set old_port [RI $master_id tcp_port] + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + kill_instance redis $master_id + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "At least one Sentinel did not received failover info" + } + } + restart_instance redis $master_id + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + set master_id [get_instance_id_by_port redis [lindex $addr 1]] +} + +test "New master [join $addr {:}] role matches" { + assert {[RI $master_id role] eq {master}} +} + +test "All the other slaves now point to the new master" { + foreach_redis_id id { + if {$id != $master_id && $id != 0} { + wait_for_condition 1000 50 { + [RI $id master_port] == [lindex $addr 1] + } else { + fail "Redis ID $id not configured to replicate with new master" + } + } + } +} + +test "The old master eventually gets reconfigured as a slave" { + wait_for_condition 1000 50 { + [RI 0 master_port] == [lindex $addr 1] + } else { + fail "Old master not reconfigured as slave of new master" + } +} + +test "ODOWN is not possible without N (quorum) Sentinels reports" { + foreach_sentinel_id id { + S $id SENTINEL SET mymaster quorum [expr $sentinels+1] + } + set old_port [RI $master_id tcp_port] + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + kill_instance redis $master_id + + # Make sure failover did not happened. + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + restart_instance redis $master_id +} + +test "Failover is not possible without majority agreement" { + foreach_sentinel_id id { + S $id SENTINEL SET mymaster quorum $quorum + } + + # Crash majority of sentinels + for {set id 0} {$id < $quorum} {incr id} { + kill_instance sentinel $id + } + + # Kill the current master + kill_instance redis $master_id + + # Make sure failover did not happened. + set addr [S $quorum SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + restart_instance redis $master_id + + # Cleanup: restart Sentinels to monitor the master. + for {set id 0} {$id < $quorum} {incr id} { + restart_instance sentinel $id + } +} + +test "Failover works if we configure for absolute agreement" { + foreach_sentinel_id id { + S $id SENTINEL SET mymaster quorum $sentinels + } + + # Wait for Sentinels to monitor the master again + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [dict get [S $id SENTINEL MASTER mymaster] info-refresh] < 100000 + } else { + fail "At least one Sentinel is not monitoring the master" + } + } + + kill_instance redis $master_id + + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "At least one Sentinel did not received failover info" + } + } + restart_instance redis $master_id + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + set master_id [get_instance_id_by_port redis [lindex $addr 1]] + + # Set the min ODOWN agreement back to strict majority. + foreach_sentinel_id id { + S $id SENTINEL SET mymaster quorum $quorum + } +} + +test "New master [join $addr {:}] role matches" { + assert {[RI $master_id role] eq {master}} +} diff --git a/tests/sentinel/tests/01-conf-update.tcl b/tests/sentinel/tests/01-conf-update.tcl new file mode 100644 index 0000000..4998104 --- /dev/null +++ b/tests/sentinel/tests/01-conf-update.tcl @@ -0,0 +1,39 @@ +# Test Sentinel configuration consistency after partitions heal. + +source "../tests/includes/init-tests.tcl" + +test "We can failover with Sentinel 1 crashed" { + set old_port [RI $master_id tcp_port] + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + + # Crash Sentinel 1 + kill_instance sentinel 1 + + kill_instance redis $master_id + foreach_sentinel_id id { + if {$id != 1} { + wait_for_condition 1000 50 { + [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "Sentinel $id did not received failover info" + } + } + } + restart_instance redis $master_id + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + set master_id [get_instance_id_by_port redis [lindex $addr 1]] +} + +test "After Sentinel 1 is restarted, its config gets updated" { + restart_instance sentinel 1 + wait_for_condition 1000 50 { + [lindex [S 1 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "Restarted Sentinel did not received failover info" + } +} + +test "New master [join $addr {:}] role matches" { + assert {[RI $master_id role] eq {master}} +} diff --git a/tests/sentinel/tests/02-slaves-reconf.tcl b/tests/sentinel/tests/02-slaves-reconf.tcl new file mode 100644 index 0000000..fa15d2e --- /dev/null +++ b/tests/sentinel/tests/02-slaves-reconf.tcl @@ -0,0 +1,84 @@ +# Check that slaves are reconfigured at a latter time if they are partitioned. +# +# Here we should test: +# 1) That slaves point to the new master after failover. +# 2) That partitioned slaves point to new master when they are partitioned +# away during failover and return at a latter time. + +source "../tests/includes/init-tests.tcl" + +proc 02_test_slaves_replication {} { + uplevel 1 { + test "Check that slaves replicate from current master" { + set master_port [RI $master_id tcp_port] + foreach_redis_id id { + if {$id == $master_id} continue + if {[instance_is_killed redis $id]} continue + wait_for_condition 1000 50 { + ([RI $id master_port] == $master_port) && + ([RI $id master_link_status] eq {up}) + } else { + fail "Redis slave $id is replicating from wrong master" + } + } + } + } +} + +proc 02_crash_and_failover {} { + uplevel 1 { + test "Crash the master and force a failover" { + set old_port [RI $master_id tcp_port] + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + kill_instance redis $master_id + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "At least one Sentinel did not received failover info" + } + } + restart_instance redis $master_id + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + set master_id [get_instance_id_by_port redis [lindex $addr 1]] + } + } +} + +02_test_slaves_replication +02_crash_and_failover +02_test_slaves_replication + +test "Kill a slave instance" { + foreach_redis_id id { + if {$id == $master_id} continue + set killed_slave_id $id + kill_instance redis $id + break + } +} + +02_crash_and_failover +02_test_slaves_replication + +test "Wait for failover to end" { + set inprogress 1 + while {$inprogress} { + set inprogress 0 + foreach_sentinel_id id { + if {[dict exists [S $id SENTINEL MASTER mymaster] failover-state]} { + incr inprogress + } + } + if {$inprogress} {after 100} + } +} + +test "Restart killed slave and test replication of slaves again..." { + restart_instance redis $killed_slave_id +} + +# Now we check if the slave rejoining the partition is reconfigured even +# if the failover finished. +02_test_slaves_replication diff --git a/tests/sentinel/tests/03-runtime-reconf.tcl b/tests/sentinel/tests/03-runtime-reconf.tcl new file mode 100644 index 0000000..426596c --- /dev/null +++ b/tests/sentinel/tests/03-runtime-reconf.tcl @@ -0,0 +1 @@ +# Test runtime reconfiguration command SENTINEL SET. diff --git a/tests/sentinel/tests/04-slave-selection.tcl b/tests/sentinel/tests/04-slave-selection.tcl new file mode 100644 index 0000000..3d2ca64 --- /dev/null +++ b/tests/sentinel/tests/04-slave-selection.tcl @@ -0,0 +1,5 @@ +# Test slave selection algorithm. +# +# This unit should test: +# 1) That when there are no suitable slaves no failover is performed. +# 2) That among the available slaves, the one with better offset is picked. diff --git a/tests/sentinel/tests/05-manual.tcl b/tests/sentinel/tests/05-manual.tcl new file mode 100644 index 0000000..5214fdc --- /dev/null +++ b/tests/sentinel/tests/05-manual.tcl @@ -0,0 +1,45 @@ +# Test manual failover + +source "../tests/includes/init-tests.tcl" + +test "Manual failover works" { + set old_port [RI $master_id tcp_port] + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + catch {S 0 SENTINEL FAILOVER mymaster} reply + assert {$reply eq "OK"} + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "At least one Sentinel did not received failover info" + } + } + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + set master_id [get_instance_id_by_port redis [lindex $addr 1]] +} + +test "New master [join $addr {:}] role matches" { + assert {[RI $master_id role] eq {master}} +} + +test "All the other slaves now point to the new master" { + foreach_redis_id id { + if {$id != $master_id && $id != 0} { + wait_for_condition 1000 50 { + [RI $id master_port] == [lindex $addr 1] + } else { + fail "Redis ID $id not configured to replicate with new master" + } + } + } +} + +test "The old master eventually gets reconfigured as a slave" { + wait_for_condition 1000 50 { + [RI 0 master_port] == [lindex $addr 1] + } else { + fail "Old master not reconfigured as slave of new master" + } +} + diff --git a/tests/sentinel/tests/06-ckquorum.tcl b/tests/sentinel/tests/06-ckquorum.tcl new file mode 100644 index 0000000..31e5fa2 --- /dev/null +++ b/tests/sentinel/tests/06-ckquorum.tcl @@ -0,0 +1,34 @@ +# Test for the SENTINEL CKQUORUM command + +source "../tests/includes/init-tests.tcl" +set num_sentinels [llength $::sentinel_instances] + +test "CKQUORUM reports OK and the right amount of Sentinels" { + foreach_sentinel_id id { + assert_match "*OK $num_sentinels usable*" [S $id SENTINEL CKQUORUM mymaster] + } +} + +test "CKQUORUM detects quorum cannot be reached" { + set orig_quorum [expr {$num_sentinels/2+1}] + S 0 SENTINEL SET mymaster quorum [expr {$num_sentinels+1}] + catch {[S 0 SENTINEL CKQUORUM mymaster]} err + assert_match "*NOQUORUM*" $err + S 0 SENTINEL SET mymaster quorum $orig_quorum +} + +test "CKQUORUM detects failover authorization cannot be reached" { + set orig_quorum [expr {$num_sentinels/2+1}] + S 0 SENTINEL SET mymaster quorum 1 + kill_instance sentinel 1 + kill_instance sentinel 2 + kill_instance sentinel 3 + after 5000 + catch {[S 0 SENTINEL CKQUORUM mymaster]} err + assert_match "*NOQUORUM*" $err + S 0 SENTINEL SET mymaster quorum $orig_quorum + restart_instance sentinel 1 + restart_instance sentinel 2 + restart_instance sentinel 3 +} + diff --git a/tests/sentinel/tests/07-down-conditions.tcl b/tests/sentinel/tests/07-down-conditions.tcl new file mode 100644 index 0000000..a60656e --- /dev/null +++ b/tests/sentinel/tests/07-down-conditions.tcl @@ -0,0 +1,68 @@ +# Test conditions where an instance is considered to be down + +source "../tests/includes/init-tests.tcl" + +proc ensure_master_up {} { + wait_for_condition 1000 50 { + [dict get [S 4 sentinel master mymaster] flags] eq "master" + } else { + fail "Master flags are not just 'master'" + } +} + +proc ensure_master_down {} { + wait_for_condition 1000 50 { + [string match *down* \ + [dict get [S 4 sentinel master mymaster] flags]] + } else { + fail "Master is not flagged SDOWN" + } +} + +test "Crash the majority of Sentinels to prevent failovers for this unit" { + for {set id 0} {$id < $quorum} {incr id} { + kill_instance sentinel $id + } +} + +test "SDOWN is triggered by non-responding but not crashed instance" { + lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port + ensure_master_up + exec ../../../src/redis-cli -h $host -p $port debug sleep 10 > /dev/null & + ensure_master_down + ensure_master_up +} + +test "SDOWN is triggered by crashed instance" { + lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port + ensure_master_up + kill_instance redis 0 + ensure_master_down + restart_instance redis 0 + ensure_master_up +} + +test "SDOWN is triggered by masters advertising as slaves" { + ensure_master_up + R 0 slaveof 127.0.0.1 34567 + ensure_master_down + R 0 slaveof no one + ensure_master_up +} + +test "SDOWN is triggered by misconfigured instance repling with errors" { + ensure_master_up + set orig_dir [lindex [R 0 config get dir] 1] + set orig_save [lindex [R 0 config get save] 1] + # Set dir to / and filename to "tmp" to make sure it will fail. + R 0 config set dir / + R 0 config set dbfilename tmp + R 0 config set save "1000000 1000000" + R 0 bgsave + ensure_master_down + R 0 config set save $orig_save + R 0 config set dir $orig_dir + R 0 config set dbfilename dump.rdb + R 0 bgsave + ensure_master_up +} diff --git a/tests/sentinel/tests/includes/init-tests.tcl b/tests/sentinel/tests/includes/init-tests.tcl new file mode 100644 index 0000000..c8165dc --- /dev/null +++ b/tests/sentinel/tests/includes/init-tests.tcl @@ -0,0 +1,72 @@ +# Initialization tests -- most units will start including this. + +test "(init) Restart killed instances" { + foreach type {redis sentinel} { + foreach_${type}_id id { + if {[get_instance_attrib $type $id pid] == -1} { + puts -nonewline "$type/$id " + flush stdout + restart_instance $type $id + } + } + } +} + +test "(init) Remove old master entry from sentinels" { + foreach_sentinel_id id { + catch {S $id SENTINEL REMOVE mymaster} + } +} + +set redis_slaves 4 +test "(init) Create a master-slaves cluster of [expr $redis_slaves+1] instances" { + create_redis_master_slave_cluster [expr {$redis_slaves+1}] +} +set master_id 0 + +test "(init) Sentinels can start monitoring a master" { + set sentinels [llength $::sentinel_instances] + set quorum [expr {$sentinels/2+1}] + foreach_sentinel_id id { + S $id SENTINEL MONITOR mymaster \ + [get_instance_attrib redis $master_id host] \ + [get_instance_attrib redis $master_id port] $quorum + } + foreach_sentinel_id id { + assert {[S $id sentinel master mymaster] ne {}} + S $id SENTINEL SET mymaster down-after-milliseconds 2000 + S $id SENTINEL SET mymaster failover-timeout 20000 + S $id SENTINEL SET mymaster parallel-syncs 10 + } +} + +test "(init) Sentinels can talk with the master" { + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [catch {S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster}] == 0 + } else { + fail "Sentinel $id can't talk with the master." + } + } +} + +test "(init) Sentinels are able to auto-discover other sentinels" { + set sentinels [llength $::sentinel_instances] + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [dict get [S $id SENTINEL MASTER mymaster] num-other-sentinels] == ($sentinels-1) + } else { + fail "At least some sentinel can't detect some other sentinel" + } + } +} + +test "(init) Sentinels are able to auto-discover slaves" { + foreach_sentinel_id id { + wait_for_condition 1000 50 { + [dict get [S $id SENTINEL MASTER mymaster] num-slaves] == $redis_slaves + } else { + fail "At least some sentinel can't detect some slave" + } + } +} diff --git a/tests/sentinel/tmp/.gitignore b/tests/sentinel/tmp/.gitignore new file mode 100644 index 0000000..f581f73 --- /dev/null +++ b/tests/sentinel/tmp/.gitignore @@ -0,0 +1,2 @@ +redis_* +sentinel_* |
