fixed the offline checks
This commit is contained in:
parent
75d5c26559
commit
90b05a5c83
|
@ -1,7 +1,8 @@
|
|||
|
||||
const express = require("express");
|
||||
const ping = require("ping");
|
||||
const pm2 = require("pm2");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
|
@ -12,6 +13,22 @@ const REMOTE_SERVERS = [
|
|||
];
|
||||
|
||||
const CHECK_INTERVAL = 5 * 1000;
|
||||
const LOGS_DIR = path.join(__dirname, '../../logs');
|
||||
const ONLINE_LOGS_DIR = path.join(LOGS_DIR, 'online');
|
||||
const OFFLINE_LOGS_DIR = path.join(LOGS_DIR, 'offline');
|
||||
|
||||
// Create log directories if they don't exist
|
||||
function ensureLogDirectories() {
|
||||
if (!fs.existsSync(LOGS_DIR)) {
|
||||
fs.mkdirSync(LOGS_DIR, { recursive: true });
|
||||
}
|
||||
if (!fs.existsSync(ONLINE_LOGS_DIR)) {
|
||||
fs.mkdirSync(ONLINE_LOGS_DIR, { recursive: true });
|
||||
}
|
||||
if (!fs.existsSync(OFFLINE_LOGS_DIR)) {
|
||||
fs.mkdirSync(OFFLINE_LOGS_DIR, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
let serversStatus = {};
|
||||
REMOTE_SERVERS.forEach(server => {
|
||||
|
@ -27,6 +44,8 @@ let pm2ServicesStatus = {};
|
|||
|
||||
async function checkServers() {
|
||||
try {
|
||||
ensureLogDirectories();
|
||||
|
||||
for (const server of REMOTE_SERVERS) {
|
||||
try {
|
||||
const res = await ping.promise.probe(server.host, {
|
||||
|
@ -40,6 +59,25 @@ async function checkServers() {
|
|||
serversStatus[server.name].responseTime = null;
|
||||
}
|
||||
serversStatus[server.name].lastChecked = new Date().toISOString();
|
||||
|
||||
// Log server status to the appropriate folder
|
||||
const timestamp = new Date().toISOString();
|
||||
const serverStatus = serversStatus[server.name];
|
||||
const logFolder = serverStatus.online ? ONLINE_LOGS_DIR : OFFLINE_LOGS_DIR;
|
||||
const logFilePath = path.join(logFolder, `${server.name.replace(/\s+/g, '_')}.log`);
|
||||
|
||||
// Create a human-readable log entry
|
||||
const logEntry = `[${timestamp}] Server: ${server.name} (${server.host})\n` +
|
||||
`Status: ${serverStatus.online ? 'ONLINE' : 'OFFLINE'}\n` +
|
||||
`Response Time: ${serverStatus.responseTime ? serverStatus.responseTime + 'ms' : 'N/A'}\n` +
|
||||
`-----------------------------------\n`;
|
||||
|
||||
// Append to log file
|
||||
fs.appendFile(logFilePath, logEntry, (err) => {
|
||||
if (err) {
|
||||
console.error(`Error writing log file for ${server.name}:`, err);
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error in checkServers function:", error);
|
||||
|
@ -101,6 +139,8 @@ async function checkAll() {
|
|||
|
||||
// Initial check with error handling
|
||||
try {
|
||||
// Ensure log directories exist at startup
|
||||
ensureLogDirectories();
|
||||
checkAll();
|
||||
} catch (error) {
|
||||
console.error("Error during initial check:", error);
|
||||
|
|
|
@ -5,7 +5,7 @@ class NotificationService {
|
|||
this.client = client;
|
||||
this.authorizedUserId = process.env.AUTHORIZED_USER_ID;
|
||||
this.statusChannel = null;
|
||||
this.checkInterval = options.checkInterval || 10000; // Changed to 10 seconds default
|
||||
this.checkInterval = options.checkInterval || 5000; // Changed to 5 seconds default
|
||||
this.statusEndpoint = options.statusEndpoint || 'https://blahaj.tr:2589/status';
|
||||
this.notificationChannelId = process.env.STATUS_NOTIFICATION_CHANNEL;
|
||||
|
||||
|
@ -20,6 +20,15 @@ class NotificationService {
|
|||
|
||||
// Indicate if the service is running
|
||||
this.isRunning = false;
|
||||
|
||||
// Add counters to track consecutive failures before marking as offline
|
||||
this.failureTracking = {
|
||||
servers: {},
|
||||
pm2Services: {}
|
||||
};
|
||||
|
||||
// Number of consecutive failures required before considering something truly offline
|
||||
this.failureThreshold = 3;
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
|
@ -75,21 +84,89 @@ class NotificationService {
|
|||
async checkStatus() {
|
||||
try {
|
||||
const currentStatus = await this.fetchStatus();
|
||||
const changes = this.detectChanges(this.previousStatus, currentStatus);
|
||||
|
||||
// Process current status and apply failure thresholds
|
||||
const processedStatus = this.processStatusWithThreshold(currentStatus);
|
||||
|
||||
// Detect changes between previous status and processed status
|
||||
const changes = this.detectChanges(this.previousStatus, processedStatus);
|
||||
|
||||
// If changes detected and not the first check, send notifications
|
||||
if (changes.length > 0 && !this.isFirstCheck) {
|
||||
await this.sendNotifications(changes, currentStatus);
|
||||
await this.sendNotifications(changes, processedStatus);
|
||||
}
|
||||
|
||||
// Update previous status and set first check to false
|
||||
this.previousStatus = currentStatus;
|
||||
this.previousStatus = processedStatus;
|
||||
this.isFirstCheck = false;
|
||||
} catch (error) {
|
||||
console.error(`Status check failed: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
processStatusWithThreshold(currentStatus) {
|
||||
const processedStatus = {
|
||||
servers: {...currentStatus.servers},
|
||||
pm2Services: {...currentStatus.pm2Services}
|
||||
};
|
||||
|
||||
// Process servers
|
||||
for (const server in currentStatus.servers) {
|
||||
if (!currentStatus.servers[server].online) {
|
||||
// Initialize counter if it doesn't exist
|
||||
if (!this.failureTracking.servers[server]) {
|
||||
this.failureTracking.servers[server] = 0;
|
||||
}
|
||||
|
||||
// Increment failures counter
|
||||
this.failureTracking.servers[server]++;
|
||||
|
||||
// If failures haven't reached threshold, keep it as online in the processed status
|
||||
if (this.failureTracking.servers[server] < this.failureThreshold) {
|
||||
processedStatus.servers[server] = {
|
||||
...currentStatus.servers[server],
|
||||
online: true // Keep it as online until threshold reached
|
||||
};
|
||||
console.log(`Server ${server} failure count: ${this.failureTracking.servers[server]}/${this.failureThreshold}`);
|
||||
} else {
|
||||
console.log(`Server ${server} marked offline after ${this.failureThreshold} consecutive failures`);
|
||||
}
|
||||
} else {
|
||||
// Reset counter if the server is online
|
||||
this.failureTracking.servers[server] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Process PM2 services
|
||||
for (const service in currentStatus.pm2Services) {
|
||||
if (currentStatus.pm2Services[service].status !== 'online') {
|
||||
// Initialize counter if it doesn't exist
|
||||
if (!this.failureTracking.pm2Services[service]) {
|
||||
this.failureTracking.pm2Services[service] = 0;
|
||||
}
|
||||
|
||||
// Increment failures counter
|
||||
this.failureTracking.pm2Services[service]++;
|
||||
|
||||
// If failures haven't reached threshold, keep it as online in the processed status
|
||||
if (this.failureTracking.pm2Services[service] < this.failureThreshold) {
|
||||
processedStatus.pm2Services[service] = {
|
||||
...currentStatus.pm2Services[service],
|
||||
status: 'online' // Keep it as online until threshold reached
|
||||
};
|
||||
console.log(`Service ${service} failure count: ${this.failureTracking.pm2Services[service]}/${this.failureThreshold}`);
|
||||
} else {
|
||||
console.log(`Service ${service} marked as ${currentStatus.pm2Services[service].status} after ${this.failureThreshold} consecutive failures`);
|
||||
}
|
||||
} else {
|
||||
// Reset counter if the service is online
|
||||
this.failureTracking.pm2Services[service] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return processedStatus;
|
||||
}
|
||||
|
||||
detectChanges(previousStatus, currentStatus) {
|
||||
const changes = [];
|
||||
|
||||
|
|
Loading…
Reference in a new issue