view mod_http_host_status_check/mod_http_host_status_check.lua @ 2712:d89ab70808f6

mod_cloud_notify: fix bug when multiple resources are used This bug was triggered under the rare circumstances that a message arrived and one resource was smacks hibernated while the other one(s) were offline. Then only the hibernated resource but not the offline one(s) (or the other way round) got notified.
author tmolitor <thilo@eightysoft.de>
date Mon, 08 May 2017 18:24:29 +0200
parents 2a7e9d9e7339
children 85cf9a8b4020
line wrap: on
line source

local heartbeats = module:shared("/*/host_status_check/heartbeats");
local events = module:shared("/*/host_status_check/connection_events");
local host_status_ok = module:shared("host_status_ok");

local time = require "socket".gettime;
local template = require "util.interpolation".new("%b{}", function (s) return s end)

module:depends "http"

local threshold = module:get_option_number("status_check_heartbeat_threshold", 10);

local function status_string(status, duration, comment)
	local string_timestamp;
	if duration then
		string_timestamp = ("(%0.2fs%s)"):format(duration, comment or "");
	elseif comment then
		string_timestamp = ("(%s)"):format(comment);
	else
		return status and "UP" or "DOWN";
	end
	return (status and "UP " or "DOWN ")..string_timestamp;
end

local function string_pad(s, len)
	return s..(" "):rep(len-#s);
end

local status_page_template = [[
STATUS {status}
{host_statuses%HOST {item} {idx}
}]];

function status_page()
	local host_statuses = {};
	local current_time = time();

	local all_ok = true;

	for host in pairs(hosts) do
		local last_heartbeat_time = heartbeats[host];
		
		local ok, status_text = true;
		
		local is_component = hosts[host].type == "component" and hosts[host].modules.component;
		
		if is_component then
			local current_status = hosts[host].modules.component.connected;
			if events[host] then
				local tracked_status = events[host].connected;
				if tracked_status == current_status then
					status_text = status_string(current_status, time() - events[host].timestamp);
				else
					status_text = status_string(current_status, nil, "!");
				end
			else
				status_text = status_string(current_status, nil, "?");
			end
			if not current_status then
				ok = false;
			end
		else
			local event_info = events[host];
			local connected = true;
			if event_info then
				connected = event_info.connected;
			end
			status_text = status_string(connected, event_info and (time() - events[host].timestamp), not event_info and "?");
		end

		if last_heartbeat_time then
			local time_since_heartbeat = current_time - last_heartbeat_time;
			if ok then
				if time_since_heartbeat > threshold then
					status_text = ("TIMEOUT (%0.2fs)"):format(time_since_heartbeat);
					ok = false;
				else
					status_text = status_text:gsub("^%S+", "GOOD");
				end
			end
		end

		if not ok then
			all_ok = false;
		end
		
		if not ok or is_component or last_heartbeat_time then
			host_statuses[host] = string_pad(status_text, 20);
		end
		local last_ok = host_status_ok[host];
		if last_ok ~= ok then
			if last_ok ~= nil then
				module:log("warn", "Host status check %s (%s)", ok and "OK" or "FAILED", status_text);
			end
			host_status_ok[host] = ok;
		end
	end
	local page = template(status_page_template, {
		status = all_ok and "OK" or "FAIL";
		host_statuses = host_statuses;
	});
	return page;
end

module:provides("http", {
	route = {
		GET = status_page;
	};
})