liblog-bdm/util/logsupervisor.c

1247 lines
40 KiB
C
Raw Normal View History

2000-07-31 15:15:35 +02:00
#include <sys/types.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/times.h>
#include <signal.h>
#ifdef LINUX
#include <bits/siginfo.h>
#include <sys/wait.h>
#else
#include <sys/siginfo.h>
#include <wait.h>
#endif
#include <errno.h>
#include <ver.h>
#define ND_MODE 1
#include <node.h>
#define MSG_MODE 1
#include <msg.h>
#include <logagent.h>
extern int sigignore(int sig);
VER_INFO_EXPORT (logsupervisor, "$Revision: 1.1 $", "$Name: $", __FILE__, "$Author: smas $")
/*--------------------------------------------------------------------------------------------------*/
#define Date_IsSet(tvp) ((tvp).tv_sec || (tvp).tv_usec)
#define Date_Compare(tvp, uvp) ((tvp).tv_sec < (uvp).tv_sec || ((tvp).tv_sec == (uvp).tv_sec && (tvp).tv_usec < (uvp).tv_usec))
/* Tableau permettant de r<>f<EFBFBD>r<EFBFBD>ncer les agents suivis par le superviseur */
typedef struct {
pid_t Pid; /* Identifiant du processus */
short int Restarted; /* Indique si l'agent est d<>j<EFBFBD> tomb<6D> puis a <20>t<EFBFBD> red<65>marr<72> */
short int GivenUp; /* Indique si l'agent a <20>t<EFBFBD> abandonn<6E> (car tomb<6D> plusieur fois) */
} Agent_Ref;
Agent_Ref * Tab_Agent;
/* Requ<71>tes <20> traiter par le superviseur (class<73>es par type de message syst<73>me) */
typedef struct {
struct timeval Request_Date; /* Date de d<>but de la requ<71>te */
unsigned int Nb_Reply; /* Nombre de r<>ponses recues de la part des agents */
Agent_Stat Reply_Stat [NB_MAX_AGENT]; /* Statistiques des agents ayant r<>pondu */
NDT_Root * Msg_List; /* Liste cha<68>n<EFBFBD>e des messages en attente sur cette requ<71>te */
} Waiting_Request;
Waiting_Request Tab_Request [MSGD_NB_SYSTEM_MESSAGE];
char Debug_Trace [256];
unsigned int Timeout;
/*--------------------------------------------------------------------------------------------------*/
#define AGENT_PATH "logagent"
#define SUPERVISOR_START 1
#define SUPERVISOR_PING 2
#define SUPERVISOR_STATUS_GET 3
#define SUPERVISOR_INFO_GET 4
#define SUPERVISOR_STOP_AGENT 5
#define SUPERVISOR_CONTINUE_AGENT 6
#define SUPERVISOR_TRACEON 7
#define SUPERVISOR_TRACEOFF 8
#define SUPERVISOR_SHUTDOWN 9
void Parse_Arg ( int, char ** );
void Supervisor_Start ( void );
void Supervisor_Request (unsigned int, size_t, int);
void Supervisor_Ping ( void );
void Supervisor_Status_Get ( void );
void Supervisor_Info_Get ( void );
void Supervisor_Agent_Stop ( void );
void Supervisor_Agent_Continue ( void );
void Supervisor_Shutdown ( void );
void System_Msg_Process ( MSGT_Message * );
int Agent_Msg_Send ( unsigned int, unsigned int, int );
int Agent_Start ( unsigned int );
void Agent_Restart ( int signum );
char * Agent_Status_Get ( unsigned int Agent_Status, unsigned int Agent_Debug );
void Request_Answer ( unsigned int Idx );
void Timeout_Handle ( int signum );
void Info_Trace ( void );
char * Status_Get ( void );
NDT_Status Request_List_Manager ( va_list );
unsigned int Nb_Agent, Nb_GivenUp, End_Supervisor, Status, Debug, Action;
MSGT_Port * Supervisor_Port, * System_Port;
/*--------------------------------------------------------------------------------------------------*/
int main ( int argc , char ** argv )
{
/* R<>cup<75>ration des arguments de la ligne de commande */
Parse_Arg (argc, argv);
/* Lancement de l'action sur le superviseur */
switch (Action)
{
case SUPERVISOR_START:
Supervisor_Start ();
break;
case SUPERVISOR_PING:
Supervisor_Request (MSGD_SYSTEM_PING_REQUEST, sizeof (MSGT_PingData), MSGD_SYSTEM_HIGH_PRIORITY);
break;
case SUPERVISOR_STATUS_GET:
Supervisor_Request (MSGD_SYSTEM_STATUS_REQUEST, ANSWER_SIZE, MSGD_SYSTEM_HIGH_PRIORITY);
break;
case SUPERVISOR_INFO_GET:
Supervisor_Request (MSGD_SYSTEM_INFO_REQUEST, ANSWER_SIZE, MSGD_SYSTEM_HIGH_PRIORITY);
break;
case SUPERVISOR_STOP_AGENT:
Supervisor_Request (MSGD_SYSTEM_STOP_REQUEST, ANSWER_SIZE, MSGD_SYSTEM_HIGH_PRIORITY);
break;
case SUPERVISOR_CONTINUE_AGENT:
Supervisor_Request (MSGD_SYSTEM_CONTINUE_REQUEST, ANSWER_SIZE, MSGD_SYSTEM_HIGH_PRIORITY);
break;
case SUPERVISOR_TRACEON:
Supervisor_Request (MSGD_SYSTEM_TRACEON_REQUEST, ANSWER_SIZE, MSGD_SYSTEM_HIGH_PRIORITY);
break;
case SUPERVISOR_TRACEOFF:
Supervisor_Request (MSGD_SYSTEM_TRACEOFF_REQUEST, ANSWER_SIZE, MSGD_SYSTEM_HIGH_PRIORITY);
break;
case SUPERVISOR_SHUTDOWN:
Supervisor_Request (MSGD_SYSTEM_SHUTDOWN_REQUEST, ANSWER_SIZE, MSGD_SYSTEM_HIGH_PRIORITY);
break;
default:
break;
}
return OK;
}
/*--------------------------------------------------------------------------------------------------*/
/* R<>cup<75>ration de la ligne de commande */
/*--------------------------------------------------------------------------------------------------*/
void Parse_Arg (int argc, char ** argv)
{
int i;
Debug = FALSE;
Nb_Agent = 0;
Timeout = 0;
for (i = 1; i < argc; i++)
{
if (!strcmp (argv[i], "--help") || !strcmp (argv[i], "-h"))
{
char Usage [1000];
sprintf (Usage,
"Usage : %s <options>\nOptions :\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n\t%-50s\t%s\n",
argv [0],
"--help | -h", "-> Affiche l'aide courante",
"--version [-v]", "-> Affiche le num<75>ro de version",
"--start <nb_agent> [--debug] [--timeout <nb_sec>]", "-> D<>marre le superviseur et <nb_agent> agents",
"--ping", "-> Ping le superviseur",
"--status", "-> Affiche le status de tous les agents et du superviseur",
"--info", "-> Affiche des informations statistiques de tous les agents",
"--stop", "-> Stoppe tous les agents",
"--continue", "-> Continue tous les agents stopp<70>s",
"--traceon", "-> Active le mode trace du superviseur et des agents",
"--traceoff", "-> D<>sactive le mode trace du superviseur et des agents",
"--shutdown", "-> Tue tous les agents et le superviseur");
fprintf (stderr, Usage);
exit (1);
}
if (!strcmp (argv[i], "--version"))
{
if (i+1 < argc && !strcmp (argv[i+1], "-v"))
{
VER_Object_Print (stdout, VERD_VERBOSE);
exit (0);
}
else
{
VER_Object_Print (stdout, VERD_MINIMAL);
exit (0);
}
}
if (!strcmp (argv[i], "--start"))
{
Action = SUPERVISOR_START;
i++;
if (i == argc)
{
fprintf (stderr, "Missing argument after %s\n", argv[i - 1]);
exit (0);
}
if ((Nb_Agent = atoi (argv[i])) <= 0)
{
fprintf (stderr, "Bad value (%s) after %s\n", argv[i], argv[i - 1]);
exit (0);
}
continue;
}
if (!strcmp (argv[i], "--ping"))
{
Action = SUPERVISOR_PING;
continue;
}
if (!strcmp (argv[i], "--status"))
{
Action = SUPERVISOR_STATUS_GET;
continue;
}
if (!strcmp (argv[i], "--info"))
{
Action = SUPERVISOR_INFO_GET;
continue;
}
if (!strcmp (argv[i], "--stop"))
{
Action = SUPERVISOR_STOP_AGENT;
continue;
}
if (!strcmp (argv[i], "--continue"))
{
Action = SUPERVISOR_CONTINUE_AGENT;
continue;
}
if (!strcmp (argv[i], "--traceon"))
{
Action = SUPERVISOR_TRACEON;
continue;
}
if (!strcmp (argv[i], "--traceoff"))
{
Action = SUPERVISOR_TRACEOFF;
continue;
}
if (!strcmp (argv[i], "--shutdown"))
{
Action = SUPERVISOR_SHUTDOWN;
continue;
}
if (!strcmp (argv[i], "--debug"))
{
Debug = TRUE;
continue;
}
if (!strcmp (argv[i], "--timeout"))
{
i++;
if (i == argc)
{
fprintf (stderr, "Missing argument after %s\n", argv[i - 1]);
exit (0);
}
if ((Timeout = atoi (argv[i])) <= 0)
{
fprintf (stderr, "Bad value (%s) after %s\n", argv[i], argv[i - 1]);
exit (0);
}
continue;
}
fprintf (stderr, "Option invalide \"%s\"\n", argv[i]);
exit (0);
}
if (Timeout == 0) Timeout = DEFAULT_TIMEOUT;
}
/*--------------------------------------------------------------------------------------------------*/
/*--------------------------------------- SUPERVISEUR ----------------------------------------------*/
/*--------------------------------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------------------------------*/
/* Envoie une requ<71>te au superviseur */
/*--------------------------------------------------------------------------------------------------*/
void Supervisor_Request (unsigned int Type, size_t Size, int Priority)
{
MSGT_Message * Msg;
Debug = TRUE;
/* Ouverture de la librairie LIBLOG */
if (MSG_Library_Open (0, NULL, MSGD_OPEN | MSGD_DEBUG_ALL) != MSGS_OK)
{
fprintf (stderr, "=> Impossible d'ouvrir la librairie LIBMSG\n");
return;
}
/* Ouverture du port de messages du superviseur */
if (MSG_Port_Open (MSGD_SUPERVISOR_PORT_NAME, &Supervisor_Port, MSGD_OPEN) != MSGS_OK)
{
fprintf (stderr, "=> Impossible d'ouvrir le port de messages du superviseur\n");
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* Cr<43>ation / ouverture du port syst<73>me dont on se sert pour les accus<75>s r<>ception */
if (MSG_Port_Open (MSGD_SYSTEM_PORT_NAME, &System_Port, MSGD_OPEN | MSGD_CREATE) != MSGS_OK)
{
fprintf (stderr, "=> Impossible d'ouvrir ou de cr<63>er le port de messages syst<73>me\n");
MSG_Port_Close (Supervisor_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* Cr<43>ation du message */
if (MSG_Message_Alloc (&Msg, Size) != MSGS_OK)
{
fprintf (stderr, "=> Impossible de cr<63>er un message\n");
MSG_Port_Close (Supervisor_Port, MSGD_CLOSE);
MSG_Port_Close (System_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
memset (Msg->Data, 0, Msg->Size);
if (MSG_Message_Config (Msg, MSGD_CONFIG_TYPE, Type) != MSGS_OK)
{
fprintf (stderr, "=> Impossible de configurer le type du message\n");
MSG_Message_Free (Msg);
MSG_Port_Close (System_Port, MSGD_CLOSE);
MSG_Port_Close (Supervisor_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
if (MSG_Message_Config (Msg, MSGD_CONFIG_PRIORITY, Priority) != MSGS_OK)
{
fprintf (stderr, "=> Impossible de configurer la priorit<69> du message\n");
MSG_Message_Free (Msg);
MSG_Port_Close (System_Port, MSGD_CLOSE);
MSG_Port_Close (Supervisor_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* Envoi du message avec demande d'accus<75> r<>ception dans le port syst<73>me */
if (MSG_Message_Send (MSGD_SYSTEM_PORT_NAME, Supervisor_Port, Msg) != MSGS_OK)
{
fprintf (stderr, "=> Impossible d'envoyer le message\n");
MSG_Port_Close (System_Port, MSGD_CLOSE);
MSG_Port_Close (Supervisor_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* Fermeture du port de messages du superviseur */
if (MSG_Port_Close (Supervisor_Port, MSGD_CLOSE) != MSGS_OK)
{
fprintf (stderr, "=> Impossible de fermer le port de messages du superviseur\n");
MSG_Port_Close (Supervisor_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* Ecoute du port de messages syst<73>me */
if (MSG_Message_Receive (System_Port, MSGD_NO_TYPE, &Msg, MSGD_WAIT) != MSGS_OK)
{
fprintf (stderr, "=> Impossible de r<>ceptionner un message dans le port syst<73>me\n");
MSG_Message_Free (Msg);
MSG_Library_Close (MSGD_CLOSE);
return;
}
if (Msg->Type == MSGD_SYSTEM_PING_REPLY)
{
double send_delay, rcv_delay, ping_delay;
struct timeval t1, t2, t3, t4;
t1 = ((MSGT_PingData *)(Msg->Data))->Snd1;
t2 = ((MSGT_PingData *)(Msg->Data))->Rcv1;
t3 = ((MSGT_PingData *)(Msg->Data))->Snd2;
t4 = ((MSGT_PingData *)(Msg->Data))->Rcv2;
send_delay = (double)(t2.tv_sec) - (double)(t1.tv_sec) + ((double)(t2.tv_usec) - (double)(t1.tv_usec)) / 1000000;
rcv_delay = (double)(t4.tv_sec) - (double)(t3.tv_sec) + ((double)(t4.tv_usec) - (double)(t3.tv_usec)) / 1000000;
ping_delay = (double)(t4.tv_sec) - (double)(t1.tv_sec) + ((double)(t4.tv_usec) - (double)(t1.tv_usec)) / 1000000;
fprintf (stdout, "Ping = %.4f sec (Aller = %.4f sec / Retour = %.4f sec)\n", ping_delay, send_delay, rcv_delay);
}
else
{
/* Autres messages syst<73>me */
fprintf (stdout, (char *)(Msg->Data));
}
/* D<>sallocation du message */
if (MSG_Message_Free (Msg) != MSGS_OK)
{
fprintf (stderr, "=> Impossible de supprimer le message\n");
MSG_Port_Close (System_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* Fermeture du port de messages syst<73>me */
if (MSG_Port_Close (System_Port, MSGD_CLOSE) != MSGS_OK)
{
fprintf (stderr, "=> Impossible de fermer le port de messages syst<73>me\n");
MSG_Library_Close (MSGD_CLOSE);
return ;
}
MSG_Library_Close (MSGD_CLOSE);
}
/*--------------------------------------------------------------------------------------------------*/
/* Proc<6F>dure de d<>marrage du superviseur */
/*--------------------------------------------------------------------------------------------------*/
void Supervisor_Start ( void )
{
MSGT_Message * Msg;
unsigned int i;
Nb_GivenUp = 0;
End_Supervisor = FALSE;
Status = ACTIVE;
/* Allocation d'un tableau pour r<>f<EFBFBD>rencer les agents */
Tab_Agent = (Agent_Ref *)malloc (Nb_Agent * sizeof (Agent_Ref));
if (!Tab_Agent)
{
strcpy (Debug_Trace, "impossible d'allouer de la m<>moire pour r<>f<EFBFBD>rencer les agents");
Info_Trace ();
return;
}
/* Initialisation du tableau des requ<71>tes soumises au superviseur */
for (i = 0; i < MSGD_NB_SYSTEM_MESSAGE; i++)
{
memset (&(Tab_Request [i]), 0, sizeof (Waiting_Request));
ND_DataStruct_Open (&(Tab_Request [i].Msg_List), NDD_DS_LIST | NDD_MN_FIFO, NULL, NULL, NULL, TRUE);
strcpy (Tab_Request [i].Msg_List->Manager, "Request_List_Manager");
}
/* Ouverture de la librairie LIBMSG */
if (MSG_Library_Open (0, NULL, MSGD_OPEN) != MSGS_OK)
{
strcpy (Debug_Trace, "impossible d'ouvrir la librairie LIBMSG");
Info_Trace ();
return;
}
/* Ouverture / cr<63>ation du port de messages du superviseur */
if (MSG_Port_Open (MSGD_SUPERVISOR_PORT_NAME, &Supervisor_Port, MSGD_OPEN | MSGD_CREATE) != MSGS_OK)
{
strcpy (Debug_Trace, "impossible d'ouvrir mon port de messages priv<69>");
Info_Trace ();
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* On trappe le signal SIGCHLD pour d<>tecter les agents qui tombent */
signal (SIGCHLD, Agent_Restart);
/* Lancement des agents */
for (i = 1; i <= Nb_Agent; i++) Agent_Start (i);
/* Boucle principale du superviseur */
while (End_Supervisor == FALSE)
{
/* Ecoute du port de messages du superviseur */
int rc = MSG_Message_Receive (Supervisor_Port, MSGD_NO_TYPE, &Msg, MSGD_WAIT);
switch (rc)
{
case MSGS_OK:
/* Traitement des messages envoy<6F>s au superviseur */
System_Msg_Process (Msg);
break;
case MSGS_ERRSIG:
/* On continue <20> boucler sur l'<27>coute */
break;
default:
strcpy (Debug_Trace, "impossible de r<>ceptionner un message dans mon port du messages priv<69>");
Info_Trace ();
MSG_Message_Free (Msg);
MSG_Port_Close (Supervisor_Port, MSGD_CLOSE);
MSG_Library_Close (MSGD_CLOSE);
return;
}
}
/* Fermeture du port de messages du superviseur */
if (MSG_Port_Close (Supervisor_Port, MSGD_CLOSE) != MSGS_OK)
{
strcpy (Debug_Trace, "impossible de fermer mon port de messages priv<69>");
Info_Trace ();
MSG_Library_Close (MSGD_CLOSE);
return;
}
/* Fermeture de la librairie LIBMSG */
MSG_Library_Close (MSGD_CLOSE);
/* D<>sallocation des ressources locales */
for (i = 0; i < MSGD_NB_SYSTEM_MESSAGE; i++)
{
if (Tab_Request [i].Msg_List) ND_DataStruct_Close (Tab_Request [i].Msg_List);
}
strcpy (Debug_Trace, "termin<EFBFBD>");
Info_Trace ();
}
/*--------------------------------------------------------------------------------------------------*/
/* Traitement d'un message syst<73>me par le superviseur */
/*--------------------------------------------------------------------------------------------------*/
void System_Msg_Process (MSGT_Message * Msg)
{
unsigned int Idx;
unsigned int i;
/* S'agit-il d'un message de type REQUEST ou REPLY ? */
if (MSGD_IS_SYSTEM_REQUEST (Msg->Type))
{
Idx = (Msg->Type - 1) / 2;
/* Le message est-il de taille suffisante pour que l'on y r<>ponde ? */
if (Msg->Size < ANSWER_SIZE)
{
/* On le supprime : tant pis pour l'envoyeur ! */
MSG_Message_Free (Msg);
sprintf (Debug_Trace, "la requ<71>te envoy<6F>e par \"%s\" ne sera pas prise en compte car le message est de taille insuffisante (%d octets minimum)", Msg->From, ANSWER_SIZE);
Info_Trace ();
return;
}
/*
Mise du message en attente : on y r<EFBFBD>pondra lorque tous les
agents y auront r<EFBFBD>pondu ou bien lorsque le timeout aura expir<EFBFBD>.
*/
ND_Value_Add (Tab_Request [Idx].Msg_List, Msg);
/* On regarde si une requ<71>te du m<>me type <20>tait d<>j<EFBFBD> en cours */
if (Tab_Request [Idx].Msg_List->Node_Number == 1)
{
/* S'il s'agit d'une requ<71>te SHUTDOWN_REQUEST, il faut d<>sormais ignorer les signaux de type SIGCHLD */
if (Msg->Type == MSGD_SYSTEM_SHUTDOWN_REQUEST)
sigignore (SIGCHLD);
/* On prend en compte les requ<71>tes TRACEON_REQUEST et TRACEOFF_REQUEST */
if (Msg->Type == MSGD_SYSTEM_TRACEON_REQUEST)
{
Debug = TRUE;
sprintf (Debug_Trace, "activation du mode trace");
Info_Trace ();
}
if (Msg->Type == MSGD_SYSTEM_TRACEOFF_REQUEST)
{
sprintf (Debug_Trace, "d<EFBFBD>sactivation du mode trace");
Info_Trace ();
Debug = FALSE;
}
/* On m<>morise la date de la requ<71>te */
gettimeofday (&(Tab_Request [Idx].Request_Date), NULL);
if (Nb_Agent - Nb_GivenUp == 0)
{
/* S'il n'y a plus d'agent valide, on r<>pond tout de suite <20> la requ<71>te */
Request_Answer (Idx);
}
else
{
/* Sinon, on initie une alarme ... */
signal (SIGALRM, Timeout_Handle);
alarm (Timeout);
/* ... et on transmet la requ<71>te <20> tous les agents qui devront y r<>pondre avant le timeout */
for (i = 1; i <= Nb_Agent; i++)
{
if (Tab_Agent [i -1].GivenUp == FALSE)
{
if (Agent_Msg_Send (i, Msg->Type, MSGD_SYSTEM_HIGH_PRIORITY) != OK)
{
sprintf (Debug_Trace, "impossible de transmettre la requ<71>te <20> l'agent n<>%d", i);
Info_Trace ();
}
}
}
}
}
}
else if (MSGD_IS_SYSTEM_REPLY (Msg->Type)) /* Il s'agit d'une r<>ponse d'un agent */
{
Idx = (Msg->Type / 2) - 1;
/* Une requ<71>te de ce type a-t'elle d<>j<EFBFBD> <20>t<EFBFBD> lanc<6E>e ? */
if (Tab_Request [Idx].Msg_List->Node_Number > 0)
{
int Num_Agent;
Agent_Stat * Stat;
Tab_Request [Idx].Nb_Reply++;
/* Mise <20> jour de statistiques : on recopie celles renvoy<6F>s par l'agent dans notre tableau */
Stat = (Agent_Stat *)(Msg->Data);
Num_Agent = Stat->Num_Agent;
Tab_Request [Idx].Reply_Stat [Num_Agent - 1].Num_Agent = Stat->Num_Agent;
Tab_Request [Idx].Reply_Stat [Num_Agent - 1].Pid = Stat->Pid;
Tab_Request [Idx].Reply_Stat [Num_Agent - 1].Cpt_Event = Stat->Cpt_Event;
Tab_Request [Idx].Reply_Stat [Num_Agent - 1].Cpt_System = Stat->Cpt_System;
Tab_Request [Idx].Reply_Stat [Num_Agent - 1].Status = Stat->Status;
Tab_Request [Idx].Reply_Stat [Num_Agent - 1].Debug = Stat->Debug;
Tab_Request [Idx].Reply_Stat [Num_Agent - 1].Answer = Stat->Answer;
/* Tous les agents ont-ils r<>pondu <20> la requ<71>te ? */
if (Tab_Request [Idx].Nb_Reply == Nb_Agent - Nb_GivenUp)
{
/* On r<>pond <20> la requ<71>te */
Request_Answer (Idx);
}
else
{
/* Tous les agents n'ont pas encore r<>pondu : on attend encore */
}
}
else
{
/* L'agent a r<>pondu trop tard : tant pis pour lui, on ne tient pas compte de cette r<>ponse */
}
/* On supprime le message */
MSG_Message_Free (Msg);
}
}
/*--------------------------------------------------------------------------------------------------*/
/* R<>ponse <20> une requ<71>te (proc<6F>dure ex<65>cut<75>e au timeout ou bien lorsque tous les agents ont r<>pondu)*/
/*--------------------------------------------------------------------------------------------------*/
void Request_Answer ( unsigned int Idx )
{
char Answer [ANSWER_SIZE];
unsigned int i, j;
unsigned int Type = 2 * Idx + 1;
NDT_Node * Node, * Next_Node;
NDT_Root * Msg_List;
char str [256];
Answer [0] = (char)0;
/* R<>ponse concernant le superviseur lui-m<>me */
switch (Type)
{
case MSGD_SYSTEM_TRACEON_REQUEST:
sprintf (str, "Superviseur : mode trace activ<69>\n");
strcat (Answer, str);
break;
case MSGD_SYSTEM_TRACEOFF_REQUEST:
sprintf (str, "Superviseur : mode trace d<>sactiv<69>\n");
strcat (Answer, str);
break;
case MSGD_SYSTEM_STATUS_REQUEST:
sprintf (str, "Superviseur : %s\n", Status_Get ());
strcat (Answer, str);
break;
case MSGD_SYSTEM_INFO_REQUEST:
/* Informations concernant les agents suivis */
sprintf (str, "Superviseur :\n\t- %d agent(s) suivi(s)\n\t- %d agent(s) abandonn<6E>(s)\n", Nb_Agent - Nb_GivenUp, Nb_GivenUp);
strcat (Answer, str);
/* On compte le nombre de requ<71>tes en attente */
j = 0;
for (i = 0; i < MSGD_NB_SYSTEM_MESSAGE; i++)
if (i != Idx && Tab_Request [i].Msg_List->Node_Number > 0) j++;
sprintf (str, "\t- %d requ<71>te(s) en attente de r<>ponse de la part des agents\n", j);
strcat (Answer, str);
/* On consolide les statistiques des agents concernant le nombre d'<27>v<EFBFBD>nements trait<69>s */
j = 0;
for (i = 0; i < MSGD_NB_SYSTEM_MESSAGE; i++)
j += Tab_Request [Idx].Reply_Stat [i].Cpt_Event;
sprintf (str, "\t- %d <20>v<EFBFBD>nement(s) trait<69>(s) par l'ensemble des agents ayant r<>pondu\n\n", j);
strcat (Answer, str);
break;
case MSGD_SYSTEM_SHUTDOWN_REQUEST:
End_Supervisor = TRUE;
strcpy (Debug_Trace, "terminaison en cours...");
Info_Trace ();
break;
default:
break;
}
/* R<>ponse concernant les agents auxquels la requ<71>te a <20>t<EFBFBD> transmise */
sprintf (str, "%d/%d agent(s) ont r<>pondu <20> la requ<71>te%s\n", Tab_Request [Idx].Nb_Reply, Nb_Agent - Nb_GivenUp, Tab_Request [Idx].Nb_Reply > 0 ? " :" : ".");
strcat (Answer, str);
for (i = 0; i < Nb_Agent; i++)
{
if (Tab_Agent [i].GivenUp == TRUE) continue;
sprintf (str, "\t- Agent n<>%d (process %ld) : ", i + 1, Tab_Agent [i].Pid);
strcat (Answer, str);
if (Tab_Request [Idx].Reply_Stat [i].Num_Agent == 0)
strcat (Answer, "pas de r<>ponse\n");
else
{
switch (Type)
{
case MSGD_SYSTEM_STOP_REQUEST:
sprintf (str, "%s\n", Tab_Request [Idx].Reply_Stat [i].Answer == TRUE ? "stopp<EFBFBD>" : "d<EFBFBD>j<EFBFBD> stopp<70>");
strcat (Answer, str);
break;
case MSGD_SYSTEM_CONTINUE_REQUEST:
sprintf (str, "%s\n", Tab_Request [Idx].Reply_Stat [i].Answer == TRUE ? "repris" : "d<EFBFBD>j<EFBFBD> actif");
strcat (Answer, str);
break;
case MSGD_SYSTEM_SHUTDOWN_REQUEST:
strcat (Answer, "arr<EFBFBD>t<EFBFBD>\n");
break;
case MSGD_SYSTEM_STATUS_REQUEST:
sprintf (str, "%s\n", Agent_Status_Get (Tab_Request [Idx].Reply_Stat [i].Status, Tab_Request [Idx].Reply_Stat [i].Debug));
strcat (Answer, str);
break;
case MSGD_SYSTEM_INFO_REQUEST:
sprintf (str, "%d <20>v<EFBFBD>nement(s) trait<69>(s) / %d message(s) syst<73>me(s) re<72>u(s)\n", Tab_Request [Idx].Reply_Stat [i].Cpt_Event, Tab_Request [Idx].Reply_Stat [i].Cpt_System);
strcat (Answer, str);
break;
case MSGD_SYSTEM_TRACEON_REQUEST:
strcat (Answer, "mode trace activ<69>\n");
break;
case MSGD_SYSTEM_TRACEOFF_REQUEST:
strcat (Answer, "mode trace d<>sactiv<69>\n");
break;
default:
break;
}
}
}
if (Type == MSGD_SYSTEM_SHUTDOWN_REQUEST) strcat (Answer, "Superviseur termin<69>.\n");
/* On r<>pond <20> tous les messages en attente sur cette requ<71>te */
Msg_List = Tab_Request [Idx].Msg_List;
ND_Node_First_Get (Msg_List, &Node);
while (Node)
{
MSGT_Message * Msg = (MSGT_Message *)(Node->Value);
char * Msg_Data = Msg->Data;
ND_Node_Next_Get (Node, &Next_Node);
ND_Node_Remove (Node);
/* On remplit le champ de donn<6E>es du message */
if (strlen (Answer) >= Msg->Size)
{
sprintf (Debug_Trace, "attention, le message (%d octets) n'est pas assez long pour contenir toute la r<>ponse (%d caract<63>res)\n", Msg->Size, strlen (Answer));
Info_Trace ();
strncpy (Msg_Data, Answer, Msg->Size - 1);
Msg_Data [Msg->Size - 1] = (char)0;
}
else strcpy (Msg_Data, Answer);
/* On renvoie le message <20> son envoyeur */
MSG_Message_Reply (Msg);
/* Message suivant */
Node = Next_Node;
}
/* On r<>initialise les donn<6E>es concernant la requ<71>te */
memset (&(Tab_Request [Idx]), 0, sizeof (Waiting_Request));
Tab_Request [Idx].Msg_List = Msg_List;
}
/*--------------------------------------------------------------------------------------------------*/
/* Proc<6F>dure appel<65>e lorsqu'un timeout expire */
/*--------------------------------------------------------------------------------------------------*/
void Timeout_Handle ( int signum )
{
unsigned int i, Older;
int Found = FALSE;
/* On recherche la requ<71>te la plus ancienne */
for (i = 0; i < MSGD_NB_SYSTEM_MESSAGE; i++)
{
if (Date_IsSet (Tab_Request [i].Request_Date) && (Found == FALSE || Date_Compare (Tab_Request [i].Request_Date, Tab_Request [Older].Request_Date)))
{
Found = TRUE;
Older = i;
}
}
/* On r<>pond <20> la requ<71>te */
if (Found == TRUE)
{
strcpy (Debug_Trace, "timeout expir<69> => r<>ponse <20> la requ<71>te la plus ancienne");
Info_Trace ();
Request_Answer (Older);
}
}
/*--------------------------------------------------------------------------------------------------*/
/* Red<65>marrage automatique d'un agent tomb<6D> */
/*--------------------------------------------------------------------------------------------------*/
void Agent_Restart ( int signum )
{
unsigned int Found, i;
pid_t Child_Pid;
#ifdef LINUX
int Child_Info;
#else
siginfo_t Child_Info;
#endif
unsigned int Restart;
/* On r<>initialise le trap du signal SIGCHLD */
signal (SIGCHLD, Agent_Restart);
/* On r<>cup<75>re les informations concernant le processus fils qui est tomb<6D> */
#ifdef LINUX
Child_Pid = waitpid (-1, &Child_Info, WNOHANG);
if (Child_Pid == -1)
{
sprintf (Debug_Trace, "impossible de r<>cup<75>rer les informations concernant un processus fils tomb<6D>");
Info_Trace ();
return;
}
#else
if (waitid (P_ALL, 0, &Child_Info, WEXITED) == -1)
{
sprintf (Debug_Trace, "impossible de r<>cup<75>rer les informations concernant un processus fils tomb<6D>");
Info_Trace ();
return;
}
Child_Pid = Child_Info.si_pid;
#endif
/* On recherche l'agent correspondant au process fils tomb<6D> */
Found = FALSE;
i = 0;
while (i < Nb_Agent && Found == FALSE)
{
if (Tab_Agent [i].GivenUp == FALSE)
{
if (Tab_Agent [i].Pid == Child_Pid) Found = TRUE;
else i++;
}
else i++;
}
if (Found == FALSE)
{
sprintf (Debug_Trace, "impossible de trouver l'agent correspondant au processus fils %ld tomb<6D>", Child_Pid);
Info_Trace ();
return;
}
/* Analyse de la raison pour laquelle le fils est tomb<6D> */
#ifdef LINUX
if (WIFEXITED(Child_Info))
#else
if (Child_Info.si_code == CLD_EXITED)
#endif
{
/* L'agent s'est termin<69> tout seul (comme un grand !!!) : on ne le relance pas */
Restart = FALSE;
#ifdef LINUX
sprintf (Debug_Trace, "d<EFBFBD>tection agent n<>%d termin<69> avec code retour %d", i + 1, WEXITSTATUS(Child_Info));
#else
sprintf (Debug_Trace, "d<EFBFBD>tection agent n<>%d termin<69> avec code retour %d", i + 1, Child_Info.si_status);
#endif
Info_Trace ();
}
#ifdef LINUX
else if (WIFSIGNALED(Child_Info) && WTERMSIG(Child_Info) == SIGKILL)
#else
else if (Child_Info.si_code == CLD_KILLED)
#endif
{
/* L'agent a <20> priori <20>t<EFBFBD> tu<74> volontairement : on ne le relance donc pas */
Restart = FALSE;
sprintf (Debug_Trace, "d<EFBFBD>tection agent n<>%d tu<74>", i + 1);
Info_Trace ();
}
#ifdef LINUX
else if WIFSIGNALED(Child_Info)
#else
else if (Child_Info.si_code == CLD_DUMPED)
#endif
{
/* L'agent s'est termin<69> anormallement : on essaie de relancer l'agent */
Restart = TRUE;
#ifdef LINUX
sprintf (Debug_Trace, "d<EFBFBD>tection agent n<>%d tomb<6D> suite <20> signal %d", i + 1, WTERMSIG(Child_Info));
#else
sprintf (Debug_Trace, "d<EFBFBD>tection agent n<>%d tomb<6D> suite <20> signal %d", i + 1, Child_Info.si_status);
#endif
Info_Trace ();
}
else Restart = TRUE;
if (Restart == TRUE)
{
/* Une tentative de relance a-t'elle d<>j<EFBFBD> <20>t<EFBFBD> lanc<6E>e ? */
if (Tab_Agent [i].Restarted == TRUE)
{
sprintf (Debug_Trace, "abandon de l'agent n<>%d", i + 1);
Info_Trace ();
Tab_Agent [i].GivenUp = TRUE;
Nb_GivenUp ++;
}
else
{
/* On red<65>marre l'agent */
sprintf (Debug_Trace, "relance de l'agent n<>%d", i + 1);
Info_Trace ();
Agent_Start (i + 1);
Tab_Agent [i].Restarted = TRUE;
}
}
else
{
sprintf (Debug_Trace, "abandon de l'agent n<>%d", i + 1);
Info_Trace ();
Tab_Agent [i].GivenUp = TRUE;
Nb_GivenUp ++;
}
}
/*--------------------------------------------------------------------------------------------------*/
/* Envoi d'un message <20> un agent */
/*--------------------------------------------------------------------------------------------------*/
int Agent_Msg_Send ( unsigned int Num_Agent, unsigned int Type, int Priority )
{
MSGT_Message * Msg;
char Agent_Port_Name [256];
MSGT_Port * Agent_Port;
/* Ouverture du port de messages priv<69> d'un agent dont on connait le num<75>ro */
sprintf (Agent_Port_Name, "Agent_%d_port", Num_Agent);
if (MSG_Port_Open (Agent_Port_Name, &Agent_Port, MSGD_OPEN) != MSGS_OK)
{
sprintf (Debug_Trace, "impossible d'ouvrir le port de messages \"%s\"", Agent_Port_Name);
Info_Trace ();
return KO;
}
/* Cr<43>ation du message (de la taille d'un message syst<73>me standard du superviseur aux agents) */
if (MSG_Message_Alloc (&Msg, sizeof (Agent_Stat)) != MSGS_OK)
{
sprintf (Debug_Trace, "impossible de cr<63>er un message destin<69> <20> l'agent n<>%d", Num_Agent);
Info_Trace ();
MSG_Port_Close (Agent_Port, MSGD_CLOSE);
return KO;
}
/* Initialisation des donn<6E>es du message */
memset (Msg->Data, 0, Msg->Size);
/* Configuration du type de message */
if (MSG_Message_Config (Msg, MSGD_CONFIG_TYPE, Type) != MSGS_OK)
{
sprintf (Debug_Trace, "impossible de configurer le type du message destin<69> <20> l'agent n<>%d", Num_Agent);
Info_Trace ();
MSG_Message_Free (Msg);
MSG_Port_Close (Agent_Port, MSGD_CLOSE);
return KO;
}
/* Configuration de la priorit<69> du message */
if (MSG_Message_Config (Msg, MSGD_CONFIG_PRIORITY, Priority) != MSGS_OK)
{
sprintf (Debug_Trace, "impossible de configurer la priorit<69> du message destin<69> <20> l'agent n<>%d", Num_Agent);
Info_Trace ();
MSG_Message_Free (Msg);
MSG_Port_Close (Agent_Port, MSGD_CLOSE);
return KO;
}
/* Envoi du message avec demande d'accus<75> r<>ception dans le port du superviseur */
if (MSG_Message_Send (MSGD_SUPERVISOR_PORT_NAME, Agent_Port, Msg) != MSGS_OK)
{
sprintf (Debug_Trace, "impossible d'envoyer le message destin<69> <20> l'agent n<>%d", Num_Agent);
Info_Trace ();
MSG_Message_Free (Msg);
MSG_Port_Close (Agent_Port, MSGD_CLOSE);
return KO;
}
/* Fermeture du port de messages d'envoi */
if (MSG_Port_Close (Agent_Port, MSGD_CLOSE) != MSGS_OK)
{
sprintf (Debug_Trace, "impossible de fermer le port de messages de l'agent n<>%d", Num_Agent);
Info_Trace ();
MSG_Message_Free (Msg);
MSG_Port_Close (Agent_Port, MSGD_CLOSE);
return KO;
}
return OK;
}
/*--------------------------------------------------------------------------------------------------*/
/* D<>marrage d'un agent */
/*--------------------------------------------------------------------------------------------------*/
int Agent_Start ( unsigned int Num_Agent )
{
pid_t Agent_Pid;
if (Num_Agent > NB_MAX_AGENT)
{
strcpy (Debug_Trace, "Limite maximale du nombre d'agents atteinte");
Info_Trace ();
return KO;
}
/* On cr<63>e un process fils */
#ifdef LINUX
Agent_Pid = fork ();
#else
/* Sous Solaris, il faut utiliser la fonction fork1() pour supporter le multi-thread */
Agent_Pid = fork1 ();
#endif
if (Agent_Pid != 0)
{
/* Code pour le processus p<>re (superviseur) : on r<>f<EFBFBD>rence le processus fils comme nouvel agent */
Tab_Agent [Num_Agent - 1].Pid = Agent_Pid;
Tab_Agent [Num_Agent - 1].Restarted = FALSE;
Tab_Agent [Num_Agent - 1].GivenUp = FALSE;
sprintf (Debug_Trace, "lancement de l'agent n<>%d", Num_Agent);
Info_Trace ();
}
else
{
char sNum_Agent [10];
sprintf (sNum_Agent, "%d", Num_Agent);
/* Code pour le processus fils (agent) */
if (Debug == TRUE)
execlp (AGENT_PATH, AGENT_PATH, "-id", sNum_Agent, "--debug", NULL, NULL);
else
execlp (AGENT_PATH, AGENT_PATH, "-id", sNum_Agent, NULL, NULL);
sprintf (Debug_Trace, "erreur lors du lancement de l'agent n<>%d (execl: errno %d)", Num_Agent, errno);
Info_Trace ();
exit (KO);
}
return OK;
}
/*--------------------------------------------------------------------------------------------------*/
/* Affichage sur la sortie standard d'erreur d'un message g<>n<EFBFBD>r<EFBFBD> par le superviseur */
/*--------------------------------------------------------------------------------------------------*/
void Info_Trace ( void )
{
time_t dts;
struct tm * dt;
char Current_Date [25];
if (Debug)
{
/* R<>cup<75>re la date courante */
time (&dts);
dt = localtime (&dts);
sprintf (Current_Date, "%02d/%02d/%04d %02d:%02d:%02d", dt->tm_mday, dt->tm_mon + 1, dt->tm_year + 1900, dt->tm_hour, dt->tm_min, dt->tm_sec);
fprintf (stderr, "[%s] Superviseur : %s\n", Current_Date, Debug_Trace);
}
}
/*--------------------------------------------------------------------------------------------------*/
/* Affiche le statut du superviseur */
/*--------------------------------------------------------------------------------------------------*/
char * Status_Get ( void )
{
static char Status_Str [256];
sprintf (Status_Str, "Timeout = %d sec / Trace = %s", Timeout, Debug == TRUE ? "On" : "Off");
return Status_Str;
}
/*--------------------------------------------------------------------------------------------------*/
/* Affiche le statut d'un agent */
/*--------------------------------------------------------------------------------------------------*/
char * Agent_Status_Get ( unsigned int Agent_Status, unsigned int Agent_Debug )
{
static char Status_Str [256];
sprintf (Status_Str, "Statut = %s / Trace = %s", Agent_Status == ACTIVE ? "actif" : "stopp<EFBFBD>", Agent_Debug == TRUE ? "On" : "Off");
return Status_Str;
}
/*--------------------------------------------------------------------------------------------------*/
/* Manager d'une liste de messages en attente sur une requ<71>te */
/*--------------------------------------------------------------------------------------------------*/
NDT_Status Request_List_Manager (va_list Args)
{
return NDS_OK;
}