179 lines
5.9 KiB
Django/Jinja
179 lines
5.9 KiB
Django/Jinja
#!/usr/bin/env python
|
|
# {{ ansible_managed }}
|
|
# Author: Maurice Makaay, XS4ALL
|
|
|
|
from __future__ import print_function
|
|
import subprocess
|
|
import re
|
|
import json
|
|
from os import system
|
|
from sys import exit, stdin
|
|
|
|
|
|
nodes = {{ groups['galera'] | to_json }}
|
|
lxd_status = {{ lxd_status | to_json }}
|
|
|
|
|
|
def propose(commands, exit_code):
|
|
for command in commands:
|
|
print("# %s" % command)
|
|
print("")
|
|
print("Execute now? [y/n]: ", end="")
|
|
answer = stdin.readline()
|
|
if "y" in answer.lower():
|
|
print("")
|
|
for command in commands:
|
|
print("EXECUTING> %s" % command)
|
|
system(command)
|
|
print("")
|
|
exit(0)
|
|
exit(exit_code)
|
|
|
|
|
|
print("Collecting Galera status information from nodes ...")
|
|
status = {}
|
|
for node in nodes:
|
|
lxd_host = "%s:%s" % (lxd_status[node]["host"], node)
|
|
try:
|
|
result = subprocess.check_output([
|
|
"lxc", "exec", lxd_host, "/root/galera_cluster_status"])
|
|
status[node] = json.loads(result)
|
|
except subprocess.CalledProcessError:
|
|
status[node] = {
|
|
"cluster_size": 0,
|
|
"cluster_status": 'Status Failed',
|
|
"connected": "Unknown",
|
|
"ready": "Unknown",
|
|
"safe_to_bootstrap": 0,
|
|
"seqno": -1,
|
|
"uuid": None
|
|
}
|
|
status[node]['lxd_host'] = lxd_host
|
|
status[node]['node'] = node
|
|
|
|
def is_primary(s):
|
|
return s["cluster_status"] == "Primary"
|
|
|
|
def has_correct_cluster_size(s):
|
|
return s["cluster_size"] == len(nodes)
|
|
|
|
def is_connected(s):
|
|
return s["connected"] == "ON"
|
|
|
|
def is_ready(s):
|
|
return s["ready"] == "ON"
|
|
|
|
|
|
print("")
|
|
print("%-20s %-15s %-6s %-12s %-7s" % (
|
|
"Node", "Status", "Size", "Connected", "Ready"))
|
|
for node in nodes:
|
|
s = status[node]
|
|
print("%-20s %-15s %-6s %-12s %-7s" % (
|
|
node, s["cluster_status"], s["cluster_size"],
|
|
s["connected"], s["ready"]))
|
|
|
|
print("")
|
|
print("Checking cluster status ...")
|
|
print("")
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# CASE: All cluster nodes are up and running, green lights galore!
|
|
# ----------------------------------------------------------------------------
|
|
|
|
all_primary = all(map(is_primary, status.values()))
|
|
all_size_ok = all(map(has_correct_cluster_size, status.values()))
|
|
all_connected = all(map(is_connected, status.values()))
|
|
all_ready = all(map(is_ready, status.values()))
|
|
|
|
if all([all_primary, all_size_ok, all_connected, all_ready]):
|
|
print("There's no bootstrapping work to do here, all looks good!")
|
|
print("")
|
|
exit(0)
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# CASE: The cluster is parially down, but some cluster hosts are still ok.
|
|
# ----------------------------------------------------------------------------
|
|
|
|
if any(map(is_primary, status.values())) and any(map(is_ready, status.values())):
|
|
print("The cluster seems to be in a degraded status.")
|
|
print("Please investigate the cluster status.")
|
|
print("- Can the cluster hosts reach each other over the network?")
|
|
print("- Are all mariadb instances running?")
|
|
print("")
|
|
print("It might help to (re)start the database server on the degraded node(s):")
|
|
print("")
|
|
commands = [
|
|
"lxc exec %s service mysql restart" % s["lxd_host"]
|
|
for s in status.values()
|
|
if not is_primary(s)
|
|
]
|
|
propose(commands, 1)
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# CASE: All cluster nodes are down, one cluster node is safe to bootstrap.
|
|
# ----------------------------------------------------------------------------
|
|
|
|
print("None of the cluster hosts is operational. A cluster bootup is required.")
|
|
|
|
safe_to_bootstrap = [s for s in status.values() if s["safe_to_bootstrap"] == 1]
|
|
if any(safe_to_bootstrap):
|
|
bootstrap_node = safe_to_bootstrap[0]
|
|
print("A node is marked as 'safe to bootstrap', so proposed strategy:")
|
|
print("")
|
|
commands = ["lxc exec %s galera_new_cluster" % bootstrap_node["lxd_host"]]
|
|
for n, s in status.items():
|
|
if n == bootstrap_node["node"]:
|
|
continue
|
|
commands.append("lxc exec %s service mysql start" % s["lxd_host"])
|
|
propose(commands, 2)
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# CASE: All cluster nodes are down, no cluster node is safe to bootstrap.
|
|
# ----------------------------------------------------------------------------
|
|
|
|
print("Unfortunately, none of the nodes is marked as safe to bootstrap.")
|
|
print("Retrieving last recovered position for all cluster nodes ...")
|
|
print("")
|
|
print("%-20s %-15s %-40s" % ("Node", "Recovery pos", "UUID"))
|
|
|
|
for n, s in status.items():
|
|
lxd_host = "%s:%s" % (lxd_status[n]["host"], n)
|
|
try:
|
|
result = subprocess.check_output([
|
|
"lxc", "exec", lxd_host, "/root/galera_wsrep_recovered_position"])
|
|
uuid_and_pos = json.loads(result)
|
|
uuid, pos = re.split(':', uuid_and_pos, maxsplit=1)
|
|
s["uuid"] = uuid
|
|
s["pos"] = int(pos)
|
|
except subprocess.CalledProcessError:
|
|
s["uuid"] = "Unknown"
|
|
s["pos"] = -1
|
|
print("%-20s %-15d %-40s" % (n, s["pos"], s["uuid"]))
|
|
|
|
uuids = set((s["uuid"] for s in status.values()))
|
|
if len(uuids) != 1:
|
|
print("")
|
|
print("Wow... now wait a minute... There are multiple UUID's in play!")
|
|
print("That should never happen in a Galera cluster.")
|
|
print("You will have to handle this one yourself I'm afraid.")
|
|
|
|
def get_pos_key(x):
|
|
return x["pos"]
|
|
|
|
old_to_new = sorted(status.itervalues(), key=get_pos_key)
|
|
bootstrap_node = old_to_new[-1]
|
|
|
|
print("")
|
|
print("Determined a node that is safe for bootstrapping, so proposed strategy:")
|
|
print("")
|
|
commands = [
|
|
"lxc exec %s /root/galera_flag_as_safe_to_bootstrap" % bootstrap_node["lxd_host"],
|
|
"lxc exec %s galera_new_cluster" % bootstrap_node["lxd_host"]
|
|
]
|
|
for n, s in status.items():
|
|
if n == bootstrap_node["node"]:
|
|
continue
|
|
commands.append("lxc exec %s service mysql start" % s["lxd_host"])
|
|
propose(commands, 3)
|