18 files changed, 1410 insertions, 0 deletions
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
new file mode 100644
index 0000000..0647914
--- /dev/null
+++ b/tests/cluster/cluster.tcl
@@ -0,0 +1,130 @@
+# Cluster-specific test functions.
+#
+# Copyright (C) 2014 Salvatore Sanfilippo [email protected]
+# This software is released under the BSD License. See the COPYING file for
+# more information.
+
+# Returns a parsed CLUSTER NODES output as a list of dictionaries.
+proc get_cluster_nodes id {
+    set lines [split [R $id cluster nodes] "\r\n"]
+    set nodes {}
+    foreach l $lines {
+        set l [string trim $l]
+        if {$l eq {}} continue
+        set args [split $l]
+        set node [dict create \
+            id [lindex $args 0] \
+            addr [lindex $args 1] \
+            flags [split [lindex $args 2] ,] \
+            slaveof [lindex $args 3] \
+            ping_sent [lindex $args 4] \
+            pong_recv [lindex $args 5] \
+            config_epoch [lindex $args 6] \
+            linkstate [lindex $args 7] \
+            slots [lrange $args 8 -1] \
+        ]
+        lappend nodes $node
+    }
+    return $nodes
+}
+
+# Test node for flag.
+proc has_flag {node flag} {
+    expr {[lsearch -exact [dict get $node flags] $flag] != -1}
+}
+
+# Returns the parsed myself node entry as a dictionary.
+proc get_myself id {
+    set nodes [get_cluster_nodes $id]
+    foreach n $nodes {
+        if {[has_flag $n myself]} {return $n}
+    }
+    return {}
+}
+
+# Return the value of the specified CLUSTER INFO field.
+proc CI {n field} {
+    get_info_field [R $n cluster info] $field
+}
+
+# Assuming nodes are reest, this function performs slots allocation.
+# Only the first 'n' nodes are used.
+proc cluster_allocate_slots {n} {
+    set slot 16383
+    while {$slot >= 0} {
+        # Allocate successive slots to random nodes.
+        set node [randomInt $n]
+        lappend slots_$node $slot
+        incr slot -1
+    }
+    for {set j 0} {$j < $n} {incr j} {
+        R $j cluster addslots {*}[set slots_${j}]
+    }
+}
+
+# Check that cluster nodes agree about "state", or raise an error.
+proc assert_cluster_state {state} {
+    foreach_redis_id id {
+        if {[instance_is_killed redis $id]} continue
+        wait_for_condition 1000 50 {
+            [CI $id cluster_state] eq $state
+        } else {
+            fail "Cluster node $id cluster_state:[CI $id cluster_state]"
+        }
+    }
+}
+
+# Search the first node starting from ID $first that is not
+# already configured as a slave.
+proc cluster_find_available_slave {first} {
+    foreach_redis_id id {
+        if {$id < $first} continue
+        if {[instance_is_killed redis $id]} continue
+        set me [get_myself $id]
+        if {[dict get $me slaveof] eq {-}} {return $id}
+    }
+    fail "No available slaves"
+}
+
+# Add 'slaves' slaves to a cluster composed of 'masters' masters.
+# It assumes that masters are allocated sequentially from instance ID 0
+# to N-1.
+proc cluster_allocate_slaves {masters slaves} {
+    for {set j 0} {$j < $slaves} {incr j} {
+        set master_id [expr {$j % $masters}]
+        set slave_id [cluster_find_available_slave $masters]
+        set master_myself [get_myself $master_id]
+        R $slave_id cluster replicate [dict get $master_myself id]
+    }
+}
+
+# Create a cluster composed of the specified number of masters and slaves.
+proc create_cluster {masters slaves} {
+    cluster_allocate_slots $masters
+    if {$slaves} {
+        cluster_allocate_slaves $masters $slaves
+    }
+    assert_cluster_state ok
+}
+
+# Set the cluster node-timeout to all the reachalbe nodes.
+proc set_cluster_node_timeout {to} {
+    foreach_redis_id id {
+        catch {R $id CONFIG SET cluster-node-timeout $to}
+    }
+}
+
+# Check if the cluster is writable and readable. Use node "id"
+# as a starting point to talk with the cluster.
+proc cluster_write_test {id} {
+    set prefix [randstring 20 20 alpha]
+    set port [get_instance_attrib redis $id port]
+    set cluster [redis_cluster 127.0.0.1:$port]
+    for {set j 0} {$j < 100} {incr j} {
+        $cluster set key.$j $prefix.$j
+    }
+    for {set j 0} {$j < 100} {incr j} {
+        assert {[$cluster get key.$j] eq "$prefix.$j"}
+    }
+    $cluster close
+}
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
new file mode 100644
index 0000000..93603dd
--- /dev/null
+++ b/tests/cluster/run.tcl
@@ -0,0 +1,28 @@
+# Cluster test suite. Copyright (C) 2014 Salvatore Sanfilippo [email protected]
+# This software is released under the BSD License. See the COPYING file for
+# more information.
+
+cd tests/cluster
+source cluster.tcl
+source ../instances.tcl
+source ../../support/cluster.tcl ; # Redis Cluster client.
+
+set ::instances_count 20 ; # How many instances we use at max.
+
+proc main {} {
+    parse_options
+    spawn_instance redis $::redis_base_port $::instances_count {
+        "cluster-enabled yes"
+        "appendonly yes"
+    }
+    run_tests
+    cleanup
+    end_tests
+}
+
+if {[catch main e]} {
+    puts $::errorInfo
+    if {$::pause_on_error} pause_on_error
+    cleanup
+    exit 1
+}
diff --git a/tests/cluster/tests/00-base.tcl b/tests/cluster/tests/00-base.tcl
new file mode 100644
index 0000000..280befb
--- /dev/null
+++ b/tests/cluster/tests/00-base.tcl
@@ -0,0 +1,59 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+if {$::simulate_error} {
+    test "This test will fail" {
+        fail "Simulated error"
+    }
+}
+
+test "Different nodes have different IDs" {
+    set ids {}
+    set numnodes 0
+    foreach_redis_id id {
+        incr numnodes
+        # Every node should just know itself.
+        set nodeid [dict get [get_myself $id] id]
+        assert {$nodeid ne {}}
+        lappend ids $nodeid
+    }
+    set numids [llength [lsort -unique $ids]]
+    assert {$numids == $numnodes}
+}
+
+test "It is possible to perform slot allocation" {
+    cluster_allocate_slots 5
+}
+
+test "After the join, every node gets a different config epoch" {
+    set trynum 60
+    while {[incr trynum -1] != 0} {
+        # We check that this condition is true for *all* the nodes.
+        set ok 1 ; # Will be set to 0 every time a node is not ok.
+        foreach_redis_id id {
+            set epochs {}
+            foreach n [get_cluster_nodes $id] {
+                lappend epochs [dict get $n config_epoch]
+            }
+            if {[lsort $epochs] != [lsort -unique $epochs]} {
+                set ok 0 ; # At least one collision!
+            }
+        }
+        if {$ok} break
+        after 1000
+        puts -nonewline .
+        flush stdout
+    }
+    if {$trynum == 0} {
+        fail "Config epoch conflict resolution is not working."
+    }
+}
+
+test "Nodes should report cluster_state is ok now" {
+    assert_cluster_state ok
+}
+
+test "It is possible to write and read from the cluster" {
+    cluster_write_test 0
+}
diff --git a/tests/cluster/tests/01-faildet.tcl b/tests/cluster/tests/01-faildet.tcl
new file mode 100644
index 0000000..8fe87c9
--- /dev/null
+++ b/tests/cluster/tests/01-faildet.tcl
@@ -0,0 +1,38 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster should start ok" {
+    assert_cluster_state ok
+}
+
+test "Killing two slave nodes" {
+    kill_instance redis 5
+    kill_instance redis 6
+}
+
+test "Cluster should be still up" {
+    assert_cluster_state ok
+}
+
+test "Killing one master node" {
+    kill_instance redis 0
+}
+
+# Note: the only slave of instance 0 is already down so no
+# failover is possible, that would change the state back to ok.
+test "Cluster should be down now" {
+    assert_cluster_state fail
+}
+
+test "Restarting master node" {
+    restart_instance redis 0
+}
+
+test "Cluster should be up again" {
+    assert_cluster_state ok
+}
diff --git a/tests/cluster/tests/02-failover.tcl b/tests/cluster/tests/02-failover.tcl
new file mode 100644
index 0000000..6b2fd09
--- /dev/null
+++ b/tests/cluster/tests/02-failover.tcl
@@ -0,0 +1,65 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing one master node" {
+    kill_instance redis 0
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Restarting the previously killed master node" {
+    restart_instance redis 0
+}
+
+test "Instance #0 gets converted into a slave" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave}
+    } else {
+        fail "Old master was not converted into slave"
+    }
+}
diff --git a/tests/cluster/tests/03-failover-loop.tcl b/tests/cluster/tests/03-failover-loop.tcl
new file mode 100644
index 0000000..8e1bcd6
--- /dev/null
+++ b/tests/cluster/tests/03-failover-loop.tcl
@@ -0,0 +1,115 @@
+# Failover stress test.
+# In this test a different node is killed in a loop for N
+# iterations. The test checks that certain properties
+# are preseved across iterations.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+set iterations 20
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+
+while {[incr iterations -1]} {
+    set tokill [randomInt 10]
+    set other [expr {($tokill+1)%10}] ; # Some other instance.
+    set key [randstring 20 20 alpha]
+    set val [randstring 20 20 alpha]
+    set role [RI $tokill role]
+    if {$role eq {master}} {
+        set slave {}
+        set myid [dict get [get_myself $tokill] id]
+        foreach_redis_id id {
+            if {$id == $tokill} continue
+            if {[dict get [get_myself $id] slaveof] eq $myid} {
+                set slave $id
+            }
+        }
+        if {$slave eq {}} {
+            fail "Unable to retrieve slave's ID for master #$tokill"
+        }
+    }
+
+    puts "--- Iteration $iterations ---"
+
+    if {$role eq {master}} {
+        test "Wait for slave of #$tokill to sync" {
+            wait_for_condition 1000 50 {
+                [string match {*state=online*} [RI $tokill slave0]]
+            } else {
+                fail "Slave of node #$tokill is not ok"
+            }
+        }
+        set slave_config_epoch [CI $slave cluster_my_epoch]
+    }
+
+    test "Cluster is writable before failover" {
+        for {set i 0} {$i < 100} {incr i} {
+            catch {$cluster set $key:$i $val:$i} err
+            assert {$err eq {OK}}
+        }
+        # Wait for the write to propagate to the slave if we
+        # are going to kill a master.
+        if {$role eq {master}} {
+            R $tokill wait 1 20000
+        }
+    }
+
+    test "Killing node #$tokill" {
+        kill_instance redis $tokill
+    }
+
+    if {$role eq {master}} {
+        test "Wait failover by #$slave with old epoch $slave_config_epoch" {
+            wait_for_condition 1000 50 {
+                [CI $slave cluster_my_epoch] > $slave_config_epoch
+            } else {
+                fail "No failover detected, epoch is still [CI $slave cluster_my_epoch]"
+            }
+        }
+    }
+
+    test "Cluster should eventually be up again" {
+        assert_cluster_state ok
+    }
+
+    test "Cluster is writable again" {
+        for {set i 0} {$i < 100} {incr i} {
+            catch {$cluster set $key:$i:2 $val:$i:2} err
+            assert {$err eq {OK}}
+        }
+    }
+
+    test "Restarting node #$tokill" {
+        restart_instance redis $tokill
+    }
+
+    test "Instance #$tokill is now a slave" {
+        wait_for_condition 1000 50 {
+            [RI $tokill role] eq {slave}
+        } else {
+            fail "Restarted instance is not a slave"
+        }
+    }
+
+    test "We can read back the value we set before" {
+        for {set i 0} {$i < 100} {incr i} {
+            catch {$cluster get $key:$i} err
+            assert {$err eq "$val:$i"}
+            catch {$cluster get $key:$i:2} err
+            assert {$err eq "$val:$i:2"}
+        }
+    }
+}
+
+test "Post condition: current_epoch >= my_epoch everywhere" {
+    foreach_redis_id id {
+        assert {[CI $id cluster_current_epoch] >= [CI $id cluster_my_epoch]}
+    }
+}
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
new file mode 100644
index 0000000..0ccbf71
--- /dev/null
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -0,0 +1,172 @@
+# Failover stress test.
+# In this test a different node is killed in a loop for N
+# iterations. The test checks that certain properties
+# are preseved across iterations.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Enable AOF in all the instances" {
+    foreach_redis_id id {
+        R $id config set appendonly yes
+        # We use "appendfsync no" because it's fast but also guarantees that
+        # write(2) is performed before replying to client.
+        R $id config set appendfsync no
+    }
+
+    foreach_redis_id id {
+        wait_for_condition 1000 500 {
+            [RI $id aof_rewrite_in_progress] == 0 &&
+            [RI $id aof_enabled] == 1
+        } else {
+            fail "Failed to enable AOF on instance #$id"
+        }
+    }
+}
+
+# Return nno-zero if the specified PID is about a process still in execution,
+# otherwise 0 is returned.
+proc process_is_running {pid} {
+    # PS should return with an error if PID is non existing,
+    # and catch will return non-zero. We want to return non-zero if
+    # the PID exists, so we invert the return value with expr not operator.
+    expr {![catch {exec ps -p $pid}]}
+}
+
+# Our resharding test performs the following actions:
+#
+# - N commands are sent to the cluster in the course of the test.
+# - Every command selects a random key from key:0 to key:MAX-1.
+# - The operation RPUSH key <randomvalue> is perforemd.
+# - Tcl remembers into an array all the values pushed to each list.
+# - After N/2 commands, the resharding process is started in background.
+# - The test continues while the resharding is in progress.
+# - At the end of the test, we wait for the resharding process to stop.
+# - Finally the keys are checked to see if they contain the value they should.
+
+set numkeys 50000
+set numops 200000
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset content}
+array set content {}
+set tribpid {}
+
+test "Cluster consistency during live resharding" {
+    set ele 0
+    for {set j 0} {$j < $numops} {incr j} {
+        # Trigger the resharding once we execute half the ops.
+        if {$tribpid ne {} &&
+            ($j % 10000) == 0 &&
+            ![process_is_running $tribpid]} {
+            set tribpid {}
+        }
+
+        if {$j >= $numops/2 && $tribpid eq {}} {
+            puts -nonewline "...Starting resharding..."
+            flush stdout
+            set target [dict get [get_myself [randomInt 5]] id]
+            set tribpid [lindex [exec \
+                ../../../src/redis-trib.rb reshard \
+                --from all \
+                --to $target \
+                --slots 100 \
+                --yes \
+                127.0.0.1:[get_instance_attrib redis 0 port] \
+                | [info nameofexecutable] \
+                ../tests/helpers/onlydots.tcl \
+                &] 0]
+        }
+
+        # Write random data to random list.
+        set listid [randomInt $numkeys]
+        set key "key:$listid"
+        incr ele
+        # We write both with Lua scripts and with plain commands.
+        # This way we are able to stress Lua -> Redis command invocation
+        # as well, that has tests to prevent Lua to write into wrong
+        # hash slots.
+        if {$listid % 2} {
+            $cluster rpush $key $ele
+        } else {
+            $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+        }
+        lappend content($key) $ele
+
+        if {($j % 1000) == 0} {
+            puts -nonewline W; flush stdout
+        }
+    }
+
+    # Wait for the resharding process to end
+    wait_for_condition 1000 500 {
+        [process_is_running $tribpid] == 0
+    } else {
+        fail "Resharding is not terminating after some time."
+    }
+
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+    # Check that the Redis Cluster content matches our logical content.
+    foreach {key value} [array get content] {
+        if {[$cluster lrange $key 0 -1] ne $value} {
+            fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+        }
+    }
+}
+
+test "Crash and restart all the instances" {
+    foreach_redis_id id {
+        kill_instance redis $id
+        restart_instance redis $id
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Verify $numkeys keys after the crash & restart" {
+    # Check that the Redis Cluster content matches our logical content.
+    foreach {key value} [array get content] {
+        if {[$cluster lrange $key 0 -1] ne $value} {
+            fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+        }
+    }
+}
+
+test "Disable AOF in all the instances" {
+    foreach_redis_id id {
+        R $id config set appendonly no
+    }
+}
+
+test "Verify slaves consistency" {
+    set verified_masters 0
+    foreach_redis_id id {
+        set role [R $id role]
+        lassign $role myrole myoffset slaves
+        if {$myrole eq {slave}} continue
+        set masterport [get_instance_attrib redis $id port]
+        set masterdigest [R $id debug digest]
+        foreach_redis_id sid {
+            set srole [R $sid role]
+            if {[lindex $srole 0] eq {master}} continue
+            if {[lindex $srole 2] != $masterport} continue
+            wait_for_condition 1000 500 {
+                [R $sid debug digest] eq $masterdigest
+            } else {
+                fail "Master and slave data digest are different"
+            }
+            incr verified_masters
+        }
+    }
+    assert {$verified_masters >= 5}
+}
diff --git a/tests/cluster/tests/05-slave-selection.tcl b/tests/cluster/tests/05-slave-selection.tcl
new file mode 100644
index 0000000..6efedce
--- /dev/null
+++ b/tests/cluster/tests/05-slave-selection.tcl
@@ -0,0 +1,94 @@
+# Slave selection test
+# Check the algorithm trying to pick the slave with the most complete history.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 10
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "The first master has actually two slaves" {
+    assert {[llength [lindex [R 0 role] 2]] == 2}
+}
+
+test {Slaves of #0 are instance #5 and #10 as expected} {
+    set port0 [get_instance_attrib redis 0 port]
+    assert {[lindex [R 5 role] 2] == $port0}
+    assert {[lindex [R 10 role] 2] == $port0}
+}
+
+test "Instance #5 and #10 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up} &&
+        [RI 10 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 or #10 master link status is not up"
+    }
+}
+
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+
+test "Slaves are both able to receive and acknowledge writes" {
+    for {set j 0} {$j < 100} {incr j} {
+        $cluster set $j $j
+    }
+    assert {[R 0 wait 2 60000] == 2}
+}
+
+test "Write data while slave #10 is paused and can't receive it" {
+    # Stop the slave with a multi/exec transaction so that the master will
+    # be killed as soon as it can accept writes again.
+    R 10 multi
+    R 10 debug sleep 10
+    R 10 client kill 127.0.0.1:$port0
+    R 10 deferred 1
+    R 10 exec
+
+    # Write some data the slave can't receive.
+    for {set j 0} {$j < 100} {incr j} {
+        $cluster set $j $j
+    }
+
+    # Prevent the master from accepting new slaves.
+    # Use a large pause value since we'll kill it anyway.
+    R 0 CLIENT PAUSE 60000
+
+    # Wait for the slave to return available again
+    R 10 deferred 0
+    assert {[R 10 read] eq {OK OK}}
+
+    # Kill the master so that a reconnection will not be possible.
+    kill_instance redis 0
+}
+
+test "Wait for instance #5 (and not #10) to turn into a master" {
+    wait_for_condition 1000 50 {
+        [RI 5 role] eq {master}
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Wait for the node #10 to return alive before ending the test" {
+    R 10 ping
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Node #10 should eventually replicate node #5" {
+    set port5 [get_instance_attrib redis 5 port]
+    wait_for_condition 1000 50 {
+        ([lindex [R 10 role] 2] == $port5) &&
+        ([lindex [R 10 role] 3] eq {connected})
+    } else {
+        fail "#10 didn't became slave of #5"
+    }
+}
diff --git a/tests/cluster/tests/06-slave-stop-cond.tcl b/tests/cluster/tests/06-slave-stop-cond.tcl
new file mode 100644
index 0000000..f2e6705
--- /dev/null
+++ b/tests/cluster/tests/06-slave-stop-cond.tcl
@@ -0,0 +1,73 @@
+# Slave stop condition test
+# Check that if there is a disconnection time limit, the slave will not try
+# to failover its master.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 5 slaves.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "The first master has actually one slave" {
+    assert {[llength [lindex [R 0 role] 2]] == 1}
+}
+
+test {Slaves of #0 is instance #5 as expected} {
+    set port0 [get_instance_attrib redis 0 port]
+    assert {[lindex [R 5 role] 2] == $port0}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+test "Lower the slave validity factor of #5 to the value of 2" {
+    assert {[R 5 config set cluster-slave-validity-factor 2] eq {OK}}
+}
+
+test "Break master-slave link and prevent further reconnections" {
+    # Stop the slave with a multi/exec transaction so that the master will
+    # be killed as soon as it can accept writes again.
+    R 5 multi
+    R 5 client kill 127.0.0.1:$port0
+    # here we should sleep 6 or more seconds (node_timeout * slave_validity)
+    # but the actual validity time is actually incremented by the
+    # repl-ping-slave-period value which is 10 seconds by default. So we
+    # need to wait more than 16 seconds.
+    R 5 debug sleep 20
+    R 5 deferred 1
+    R 5 exec
+
+    # Prevent the master from accepting new slaves.
+    # Use a large pause value since we'll kill it anyway.
+    R 0 CLIENT PAUSE 60000
+
+    # Wait for the slave to return available again
+    R 5 deferred 0
+    assert {[R 5 read] eq {OK OK}}
+
+    # Kill the master so that a reconnection will not be possible.
+    kill_instance redis 0
+}
+
+test "Slave #5 is reachable and alive" {
+    assert {[R 5 ping] eq {PONG}}
+}
+
+test "Slave #5 should not be able to failover" {
+    after 10000
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Cluster should be down" {
+    assert_cluster_state fail
+}
diff --git a/tests/cluster/tests/07-replica-migration.tcl b/tests/cluster/tests/07-replica-migration.tcl
new file mode 100644
index 0000000..68231cd
--- /dev/null
+++ b/tests/cluster/tests/07-replica-migration.tcl
@@ -0,0 +1,103 @@
+# Replica migration test.
+# Check that orphaned masters are joined by replicas of masters having
+# multiple replicas attached, according to the migration barrier settings.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 10
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Each master should have two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R 0 role] 2]] == 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
+test "Killing all the slaves of master #0 and #1" {
+    kill_instance redis 5
+    kill_instance redis 10
+    kill_instance redis 6
+    kill_instance redis 11
+    after 4000
+}
+
+foreach_redis_id id {
+    if {$id < 5} {
+        test "Master #$id should have at least one replica" {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R $id role] 2]] >= 1
+            } else {
+                fail "Master #$id has no replicas"
+            }
+        }
+    }
+}
+
+# Now test the migration to a master which used to be a slave, after
+# a failver.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 10
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Kill slave #7 of master #2. Only slave left is #12 now" {
+    kill_instance redis 7
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing master node #2, #12 should failover" {
+    kill_instance redis 2
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance 12 is now a master without slaves" {
+    assert {[RI 12 role] eq {master}}
+}
+
+# The remaining instance is now without slaves. Some other slave
+# should migrate to it.
+
+test "Master #12 should get at least one migrated replica" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 12 role] 2]] >= 1
+    } else {
+        fail "Master #12 has no replicas"
+    }
+}
diff --git a/tests/cluster/tests/08-update-msg.tcl b/tests/cluster/tests/08-update-msg.tcl
new file mode 100644
index 0000000..6f9661d
--- /dev/null
+++ b/tests/cluster/tests/08-update-msg.tcl
@@ -0,0 +1,90 @@
+# Test UPDATE messages sent by other nodes when the currently authorirative
+# master is unavaialble. The test is performed in the following steps:
+#
+# 1) Master goes down.
+# 2) Slave failover and becomes new master.
+# 3) New master is partitoned away.
+# 4) Old master returns.
+# 5) At this point we expect the old master to turn into a slave ASAP because
+#    of the UPDATE messages it will receive from the other nodes when its
+#    configuration will be found to be outdated.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing one master node" {
+    kill_instance redis 0
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Killing the new master #5" {
+    kill_instance redis 5
+}
+
+test "Cluster should be down now" {
+    assert_cluster_state fail
+}
+
+test "Restarting the old master node" {
+    restart_instance redis 0
+}
+
+test "Instance #0 gets converted into a slave" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave}
+    } else {
+        fail "Old master was not converted into slave"
+    }
+}
+
+test "Restarting the new master node" {
+    restart_instance redis 5
+}
+
+test "Cluster is up again" {
+    assert_cluster_state ok
+}
diff --git a/tests/cluster/tests/09-pubsub.tcl b/tests/cluster/tests/09-pubsub.tcl
new file mode 100644
index 0000000..e62b91c
--- /dev/null
+++ b/tests/cluster/tests/09-pubsub.tcl
@@ -0,0 +1,40 @@
+# Test PUBLISH propagation across the cluster.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+proc test_cluster_publish {instance instances} {
+    # Subscribe all the instances but the one we use to send.
+    for {set j 0} {$j < $instances} {incr j} {
+        if {$j != $instance} {
+            R $j deferred 1
+            R $j subscribe testchannel
+            R $j read; # Read the subscribe reply
+        }
+    }
+
+    set data [randomValue]
+    R $instance PUBLISH testchannel $data
+
+    # Read the message back from all the nodes.
+    for {set j 0} {$j < $instances} {incr j} {
+        if {$j != $instance} {
+            set msg [R $j read]
+            assert {$data eq [lindex $msg 2]}
+            R $j unsubscribe testchannel
+            R $j read; # Read the unsubscribe reply
+            R $j deferred 0
+        }
+    }
+}
+
+test "Test publishing to master" {
+    test_cluster_publish 0 10
+}
+
+test "Test publishing to slave" {
+    test_cluster_publish 5 10
+}
diff --git a/tests/cluster/tests/10-manual-failover.tcl b/tests/cluster/tests/10-manual-failover.tcl
new file mode 100644
index 0000000..5441b79
--- /dev/null
+++ b/tests/cluster/tests/10-manual-failover.tcl
@@ -0,0 +1,192 @@
+# Check the manual failover
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+set numkeys 50000
+set numops 10000
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset content}
+array set content {}
+
+test "Send CLUSTER FAILOVER to #5, during load" {
+    for {set j 0} {$j < $numops} {incr j} {
+        # Write random data to random list.
+        set listid [randomInt $numkeys]
+        set key "key:$listid"
+        set ele [randomValue]
+        # We write both with Lua scripts and with plain commands.
+        # This way we are able to stress Lua -> Redis command invocation
+        # as well, that has tests to prevent Lua to write into wrong
+        # hash slots.
+        if {$listid % 2} {
+            $cluster rpush $key $ele
+        } else {
+           $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+        }
+        lappend content($key) $ele
+
+        if {($j % 1000) == 0} {
+            puts -nonewline W; flush stdout
+        }
+
+        if {$j == $numops/2} {R 5 cluster failover}
+    }
+}
+
+test "Wait for failover" {
+    wait_for_condition 1000 50 {
+        [CI 1 cluster_current_epoch] > $current_epoch
+    } else {
+        fail "No failover detected"
+    }
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+    # Check that the Redis Cluster content matches our logical content.
+    foreach {key value} [array get content] {
+        assert {[$cluster lrange $key 0 -1] eq $value}
+    }
+}
+
+test "Instance #0 gets converted into a slave" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave}
+    } else {
+        fail "Old master was not converted into slave"
+    }
+}
+
+## Check that manual failover does not happen if we can't talk with the master.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+test "Make instance #0 unreachable without killing it" {
+    R 0 deferred 1
+    R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+    R 5 cluster failover
+}
+
+test "Instance #5 is still a slave after some time (no failover)" {
+    after 5000
+    assert {[RI 5 role] eq {master}}
+}
+
+test "Wait for instance #0 to return back alive" {
+    R 0 deferred 0
+    assert {[R 0 read] eq {OK}}
+}
+
+## Check with "force" failover happens anyway.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+    assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+    wait_for_condition 1000 50 {
+        [RI 5 master_link_status] eq {up}
+    } else {
+        fail "Instance #5 master link status is not up"
+    }
+}
+
+test "Make instance #0 unreachable without killing it" {
+    R 0 deferred 1
+    R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+    R 5 cluster failover force
+}
+
+test "Instance #5 is a master after some time" {
+    wait_for_condition 1000 50 {
+        [RI 5 role] eq {master}
+    } else {
+        fail "Instance #5 is not a master after some time regardless of FORCE"
+    }
+}
+
+test "Wait for instance #0 to return back alive" {
+    R 0 deferred 0
+    assert {[R 0 read] eq {OK}}
+}
diff --git a/tests/cluster/tests/11-manual-takeover.tcl b/tests/cluster/tests/11-manual-takeover.tcl
new file mode 100644
index 0000000..f567c69
--- /dev/null
+++ b/tests/cluster/tests/11-manual-takeover.tcl
@@ -0,0 +1,59 @@
+# Manual takeover test
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+    create_cluster 5 5
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 0
+}
+
+test "Killing majority of master nodes" {
+    kill_instance redis 0
+    kill_instance redis 1
+    kill_instance redis 2
+}
+
+test "Cluster should eventually be down" {
+    assert_cluster_state fail
+}
+
+test "Use takeover to bring slaves back" {
+    R 5 cluster failover takeover
+    R 6 cluster failover takeover
+    R 7 cluster failover takeover
+}
+
+test "Cluster should eventually be up again" {
+    assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+    cluster_write_test 4
+}
+
+test "Instance #5, #6, #7 are now masters" {
+    assert {[RI 5 role] eq {master}}
+    assert {[RI 6 role] eq {master}}
+    assert {[RI 7 role] eq {master}}
+}
+
+test "Restarting the previously killed master nodes" {
+    restart_instance redis 0
+    restart_instance redis 1
+    restart_instance redis 2
+}
+
+test "Instance #0, #1, #2 gets converted into a slaves" {
+    wait_for_condition 1000 50 {
+        [RI 0 role] eq {slave} && [RI 1 role] eq {slave} && [RI 2 role] eq {slave}
+    } else {
+        fail "Old masters not converted into slaves"
+    }
+}
diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl
new file mode 100644
index 0000000..48ecd1d
--- /dev/null
+++ b/tests/cluster/tests/12-replica-migration-2.tcl
@@ -0,0 +1,64 @@
+# Replica migration test #2.
+#
+# Check that the status of master that can be targeted by replica migration
+# is acquired again, after being getting slots again, in a cluster where the
+# other masters have slaves.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 15 slaves, to make sure there are no
+# empty masters and make rebalancing simpler to handle during the test.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R 0 role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
+set master0_id [dict get [get_myself 0] id]
+test "Resharding all the master #0 slots away from it" {
+    set output [exec \
+        ../../../src/redis-trib.rb rebalance \
+        --weight ${master0_id}=0 \
+        127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout]
+}
+
+test "Master #0 should lose its replicas" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 0 role] 2]] == 0
+    } else {
+        fail "Master #0 still has replicas"
+    }
+}
+
+test "Resharding back some slot to master #0" {
+    # Wait for the cluster config to propagate before attempting a
+    # new resharding.
+    after 10000
+    set output [exec \
+        ../../../src/redis-trib.rb rebalance \
+        --weight ${master0_id}=.01 \
+        --use-empty-masters \
+        127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout]
+}
+
+test "Master #0 should re-acquire one or more replicas" {
+    wait_for_condition 1000 50 {
+        [llength [lindex [R 0 role] 2]] >= 1
+    } else {
+        fail "Master #0 has no has replicas"
+    }
+}
diff --git a/tests/cluster/tests/helpers/onlydots.tcl b/tests/cluster/tests/helpers/onlydots.tcl
new file mode 100644
index 0000000..4a6d1ae
--- /dev/null
+++ b/tests/cluster/tests/helpers/onlydots.tcl
@@ -0,0 +1,16 @@
+# Read the standard input and only shows dots in the output, filtering out
+# all the other characters. Designed to avoid bufferization so that when
+# we get the output of redis-trib and want to show just the dots, we'll see
+# the dots as soon as redis-trib will output them.
+
+fconfigure stdin -buffering none
+
+while 1 {
+    set c [read stdin 1]
+    if {$c eq {}} {
+        exit 0; # EOF
+    } elseif {$c eq {.}} {
+        puts -nonewline .
+        flush stdout
+    }
+}
diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl
new file mode 100644
index 0000000..466ab8f
--- /dev/null
+++ b/tests/cluster/tests/includes/init-tests.tcl
@@ -0,0 +1,70 @@
+# Initialization tests -- most units will start including this.
+
+test "(init) Restart killed instances" {
+    foreach type {redis} {
+        foreach_${type}_id id {
+            if {[get_instance_attrib $type $id pid] == -1} {
+                puts -nonewline "$type/$id "
+                flush stdout
+                restart_instance $type $id
+            }
+        }
+    }
+}
+
+test "Cluster nodes are reachable" {
+    foreach_redis_id id {
+        # Every node should be reachable.
+        wait_for_condition 1000 50 {
+            ([catch {R $id ping} ping_reply] == 0) &&
+            ($ping_reply eq {PONG})
+        } else {
+            catch {R $id ping} err
+            fail "Node #$id keeps replying '$err' to PING."
+        }
+    }
+}
+
+test "Cluster nodes hard reset" {
+    foreach_redis_id id {
+        if {$::valgrind} {
+            set node_timeout 10000
+        } else {
+            set node_timeout 3000
+        }
+        catch {R $id flushall} ; # May fail for readonly slaves.
+        R $id MULTI
+        R $id cluster reset hard
+        R $id cluster set-config-epoch [expr {$id+1}]
+        R $id EXEC
+        R $id config set cluster-node-timeout $node_timeout
+        R $id config set cluster-slave-validity-factor 10
+        R $id config rewrite
+    }
+}
+
+test "Cluster Join and auto-discovery test" {
+    # Join node 0 with 1, 1 with 2, ... and so forth.
+    # If auto-discovery works all nodes will know every other node
+    # eventually.
+    set ids {}
+    foreach_redis_id id {lappend ids $id}
+    for {set j 0} {$j < [expr [llength $ids]-1]} {incr j} {
+        set a [lindex $ids $j]
+        set b [lindex $ids [expr $j+1]]
+        set b_port [get_instance_attrib redis $b port]
+        R $a cluster meet 127.0.0.1 $b_port
+    }
+
+    foreach_redis_id id {
+        wait_for_condition 1000 50 {
+            [llength [get_cluster_nodes $id]] == [llength $ids]
+        } else {
+            fail "Cluster failed to join into a full mesh."
+        }
+    }
+}
+
+test "Before slots allocation, all nodes report cluster failure" {
+    assert_cluster_state fail
+}
diff --git a/tests/cluster/tmp/.gitignore b/tests/cluster/tmp/.gitignore
new file mode 100644
index 0000000..f581f73
--- /dev/null
+++ b/tests/cluster/tmp/.gitignore
@@ -0,0 +1,2 @@
+redis_*
+sentinel_*