5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2001 - 2005, 2007 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
21 #include "serverincludes.h"
22 #include "server_internal.h"
24 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
25 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router);
26 SILC_TASK_CALLBACK(silc_server_backup_announce_watches);
28 static void silc_server_backup_connect_primary(SilcServer server,
29 SilcServerEntry server_entry,
33 /************************** Types and Definitions ***************************/
37 SilcServerEntry server;
41 } SilcServerBackupEntry;
43 /* Holds IP address and port of the primary router that was replaced
48 SilcServerEntry server; /* Backup router that replaced the primary */
49 } SilcServerBackupReplaced;
52 struct SilcServerBackupStruct {
53 SilcServerBackupEntry *servers;
54 SilcUInt32 servers_count;
55 SilcServerBackupReplaced **replaced;
56 SilcUInt32 replaced_count;
62 SilcServerEntry server_entry;
63 } SilcServerBackupProtocolSession;
65 /* Backup resuming protocol context */
68 SilcPacketStream sock;
71 SilcServerBackupProtocolSession *sessions;
72 SilcUInt32 sessions_count;
73 SilcUInt32 initiator_restart;
75 unsigned int responder : 1;
76 unsigned int received_failure : 1;
77 unsigned int timeout : 1;
78 unsigned int error : 1;
79 } *SilcServerBackupProtocolContext;
82 /********************* Backup Configuration Routines ************************/
84 /* Adds the `backup_server' to be one of our backup router. This can be
85 called multiple times to set multiple backup routers. The `ip' and `port'
86 is the IP and port that the `backup_router' will replace if the `ip'
87 will become unresponsive. If `local' is TRUE then the `backup_server' is
88 in the local cell, if FALSE it is in some other cell. */
90 void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
91 const char *ip, int port, SilcBool local)
98 if (!server->backup) {
99 server->backup = silc_calloc(1, sizeof(*server->backup));
104 /* See if already added */
105 for (i = 0; i < server->backup->servers_count; i++) {
106 if (server->backup->servers[i].server == backup_server)
110 SILC_LOG_DEBUG(("Backup router %s will replace %s",
111 backup_server->data.sconn->remote_host, ip, port));
113 for (i = 0; i < server->backup->servers_count; i++) {
114 if (!server->backup->servers[i].server) {
115 server->backup->servers[i].server = backup_server;
116 server->backup->servers[i].local = local;
117 server->backup->servers[i].port = SILC_SWAB_16(port);
118 memset(server->backup->servers[i].ip.data, 0,
119 sizeof(server->backup->servers[i].ip.data));
120 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
121 sizeof(server->backup->servers[i].ip.data));
126 i = server->backup->servers_count;
127 server->backup->servers = silc_realloc(server->backup->servers,
128 sizeof(*server->backup->servers) *
130 server->backup->servers[i].server = backup_server;
131 server->backup->servers[i].local = local;
132 server->backup->servers[i].port = SILC_SWAB_16(port);
133 memset(server->backup->servers[i].ip.data, 0,
134 sizeof(server->backup->servers[i].ip.data));
135 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
136 sizeof(server->backup->servers[i].ip.data));
137 server->backup->servers_count++;
140 /* Returns backup router for IP and port in `server_id' or NULL if there
141 does not exist backup router. */
143 SilcServerEntry silc_server_backup_get(SilcServer server,
144 SilcServerID *server_id)
151 for (i = 0; i < server->backup->servers_count; i++) {
152 if (server->backup->servers[i].server &&
153 server->backup->servers[i].port == server_id->port &&
154 !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
155 sizeof(server_id->ip.data))) {
156 SILC_LOG_DEBUG(("Found backup router %s for %s",
157 server->backup->servers[i].server->server_name,
158 silc_id_render(server_id, SILC_ID_SERVER)));
159 return server->backup->servers[i].server;
166 /* Deletes the backup server `server_entry'. */
168 void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
175 for (i = 0; i < server->backup->servers_count; i++) {
176 if (server->backup->servers[i].server == server_entry) {
177 SILC_LOG_DEBUG(("Removing %s as backup router",
178 silc_id_render(server->backup->servers[i].server->id,
180 server->backup->servers[i].server = NULL;
181 memset(server->backup->servers[i].ip.data, 0,
182 sizeof(server->backup->servers[i].ip.data));
187 /* Frees all data allocated for backup routers. Call this after deleting
188 all backup routers and when new routers are added no more, for example
189 when shutting down the server. */
191 void silc_server_backup_free(SilcServer server)
198 /* Delete existing servers if caller didn't do it */
199 for (i = 0; i < server->backup->servers_count; i++) {
200 if (server->backup->servers[i].server)
201 silc_server_backup_del(server, server->backup->servers[i].server);
204 silc_free(server->backup->servers);
205 silc_free(server->backup);
206 server->backup = NULL;
209 /* Marks the IP address and port from the `server_id' as being replaced
210 by backup router indicated by the `server'. If the router connects at
211 a later time we can check whether it has been replaced by an backup
214 void silc_server_backup_replaced_add(SilcServer server,
215 SilcServerID *server_id,
216 SilcServerEntry server_entry)
219 SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));;
222 server->backup = silc_calloc(1, sizeof(*server->backup));
223 if (!server->backup->replaced) {
224 server->backup->replaced =
225 silc_calloc(1, sizeof(*server->backup->replaced));
226 server->backup->replaced_count = 1;
229 SILC_LOG_DEBUG(("Replacing router %s with %s",
230 silc_id_render(server_id, SILC_ID_SERVER),
231 server_entry->server_name));
233 memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip));
234 r->server = server_entry;
236 for (i = 0; i < server->backup->replaced_count; i++) {
237 if (!server->backup->replaced[i]) {
238 server->backup->replaced[i] = r;
243 i = server->backup->replaced_count;
244 server->backup->replaced = silc_realloc(server->backup->replaced,
245 sizeof(*server->backup->replaced) *
247 server->backup->replaced[i] = r;
248 server->backup->replaced_count++;
251 /* Checks whether the IP address and port from the `server_id' has been
252 replaced by an backup router. If it has been then this returns TRUE
253 and the bacup router entry to the `server' pointer if non-NULL. Returns
254 FALSE if the router is not replaced by backup router. */
256 SilcBool silc_server_backup_replaced_get(SilcServer server,
257 SilcServerID *server_id,
258 SilcServerEntry *server_entry)
262 if (!server->backup || !server->backup->replaced)
265 for (i = 0; i < server->backup->replaced_count; i++) {
266 if (!server->backup->replaced[i])
268 if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
269 sizeof(server_id->ip.data))) {
271 *server_entry = server->backup->replaced[i]->server;
272 SILC_LOG_DEBUG(("Router %s is replaced by %s",
273 silc_id_render(server_id, SILC_ID_SERVER),
274 server->backup->replaced[i]->server->server_name));
279 SILC_LOG_DEBUG(("Router %s is not replaced by backup router",
280 silc_id_render(server_id, SILC_ID_SERVER)));
284 /* Deletes a replaced host by the set `server_entry. */
286 void silc_server_backup_replaced_del(SilcServer server,
287 SilcServerEntry server_entry)
291 if (!server->backup || !server->backup->replaced)
294 for (i = 0; i < server->backup->replaced_count; i++) {
295 if (!server->backup->replaced[i])
297 if (server->backup->replaced[i]->server == server_entry) {
298 silc_free(server->backup->replaced[i]);
299 server->backup->replaced[i] = NULL;
304 /* Broadcast the received packet indicated by `packet' to all of our backup
305 routers. All router wide information is passed using broadcast packets.
306 That is why all backup routers need to get this data too. It is expected
307 that the caller already knows that the `packet' is broadcast packet. */
309 void silc_server_backup_broadcast(SilcServer server,
310 SilcPacketStream sender,
313 SilcServerEntry backup;
314 SilcPacketStream sock;
317 if (!server->backup || server->server_type != SILC_ROUTER)
320 SILC_LOG_DEBUG(("Broadcasting received packet to backup routers"));
322 for (i = 0; i < server->backup->servers_count; i++) {
323 backup = server->backup->servers[i].server;
325 if (!backup || backup->connection == sender ||
326 server->backup->servers[i].local == FALSE)
328 if (server->backup->servers[i].server == server->id_entry)
331 sock = backup->connection;
332 silc_server_packet_route(server, sock, packet);
336 /* A generic routine to send data to all backup routers. If the `sender'
337 is provided it will indicate the original sender of the packet and the
338 packet won't be resent to that entity. The `data' is the data that will
339 be assembled to packet context before sending. The packet will be
340 encrypted this function. If the `force_send' is TRUE the data is sent
341 immediately and not put to queue. If `local' is TRUE then the packet
342 will be sent only to local backup routers inside the cell. If false the
343 packet can go from one cell to the other. This function has no effect
344 if there are no any backup routers. */
346 void silc_server_backup_send(SilcServer server,
347 SilcServerEntry sender,
349 SilcPacketFlags flags,
355 SilcServerEntry backup;
356 SilcPacketStream sock;
359 if (!server->backup || server->server_type != SILC_ROUTER)
362 for (i = 0; i < server->backup->servers_count; i++) {
363 backup = server->backup->servers[i].server;
364 if (!backup || sender == backup)
366 if (local && server->backup->servers[i].local == FALSE)
368 if (server->backup->servers[i].server == server->id_entry)
371 sock = backup->connection;
373 silc_server_packet_send(server, backup->connection, type, flags,
378 /* Same as silc_server_backup_send but sets a specific Destination ID to
379 the packet. The Destination ID is indicated by the `dst_id' and the
380 ID type `dst_id_type'. For example, packets destined to channels must
381 be sent using this function. */
383 void silc_server_backup_send_dest(SilcServer server,
384 SilcServerEntry sender,
386 SilcPacketFlags flags,
388 SilcIdType dst_id_type,
394 SilcServerEntry backup;
395 SilcPacketStream sock;
398 if (!server->backup || server->server_type != SILC_ROUTER)
401 for (i = 0; i < server->backup->servers_count; i++) {
402 backup = server->backup->servers[i].server;
403 if (!backup || sender == backup)
405 if (local && server->backup->servers[i].local == FALSE)
407 if (server->backup->servers[i].server == server->id_entry)
410 sock = backup->connection;
412 silc_server_packet_send_dest(server, backup->connection, type, flags,
413 dst_id, dst_id_type, data, data_len);
417 /* Send the START_USE indication to remote connection. If `failure' is
418 TRUE then this sends SILC_PACKET_FAILURE. Otherwise it sends
419 SILC_PACKET_RESUME_ROUTER. */
421 void silc_server_backup_send_start_use(SilcServer server,
422 SilcPacketStream sock,
425 unsigned char data[4];
427 SILC_LOG_DEBUG(("Sending START_USE (%s)",
428 failure ? "failure" : "success"));
431 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START_USE, data);
432 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
435 data[0] = SILC_SERVER_BACKUP_START_USE;
437 silc_server_packet_send(server, sock,
438 SILC_PACKET_RESUME_ROUTER, 0,
443 /* Send the REPLACED indication to remote router. This is send by the
444 primary router (remote router) of the primary router that came back
445 online. This is not sent by backup router or any other server. */
447 void silc_server_backup_send_replaced(SilcServer server,
448 SilcPacketStream sock)
450 unsigned char data[4];
452 SILC_LOG_DEBUG(("Sending REPLACED"));
454 data[0] = SILC_SERVER_BACKUP_REPLACED;
456 silc_server_packet_send(server, sock,
457 SILC_PACKET_RESUME_ROUTER, 0,
462 /************************ Backup Resuming Protocol **************************/
464 /* Timeout callback for protocol */
466 SILC_TASK_CALLBACK(silc_server_backup_timeout)
468 SilcServerBackupProtocolContext ctx = context;
469 SilcServer server = app_context;
471 SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
474 silc_schedule_task_add_timeout(server->schedule,
475 silc_server_protocol_backup_done, context,
479 /* Callback to start the protocol as responder */
481 SILC_TASK_CALLBACK(silc_server_backup_responder_start)
483 SilcServerBackupProtocolContext proto_ctx = context;
484 SilcPacketStream sock = proto_ctx->sock;
485 SilcIDListData idata = silc_packet_get_context(sock);
486 SilcServer server = app_context;
488 /* If other protocol is executing at the same time, start with timeout. */
489 if (idata->sconn->op) {
490 SILC_LOG_DEBUG(("Other protocol is executing, wait for it to finish"));
491 silc_schedule_task_add_timeout(server->schedule,
492 silc_server_backup_responder_start,
497 /* Register protocol timeout */
498 silc_schedule_task_add_timeout(server->schedule,
499 silc_server_backup_timeout,
502 /* Run the backup resuming protocol */
503 silc_schedule_task_add_timeout(server->schedule,
504 silc_server_protocol_backup,
508 /* Callback to send START_USE to backup to check whether using backup
511 SILC_TASK_CALLBACK(silc_server_backup_check_status)
513 SilcPacketStream sock = context;
514 SilcServer server = app_context;
516 /* Check whether we are still using backup */
517 if (!server->backup_primary)
520 silc_server_backup_send_start_use(server, sock, FALSE);
521 silc_packet_stream_unref(sock);
526 SilcPacketStream sock;
528 } *SilcServerBackupPing;
530 /* PING command reply callback */
532 void silc_server_backup_ping_reply(void *context, void *reply)
534 SilcServerBackupPing pc = context;
535 SilcServerCommandReplyContext cmdr = reply;
537 if (cmdr && !silc_command_get_status(cmdr->payload, NULL, NULL)) {
538 /* Timeout error occurred, the primary is really down. */
539 SilcPacketStream primary = SILC_PRIMARY_ROUTE(pc->server);
541 SILC_LOG_DEBUG(("PING timeout, primary is down"));
544 silc_server_free_sock_user_data(pc->server, primary, NULL);
545 silc_server_close_connection(pc->server, primary);
548 /* Reprocess the RESUME_ROUTER packet */
549 silc_server_backup_resume_router(pc->server, pc->sock, pc->packet);
551 /* The primary is not down, refuse to serve the server as primary */
552 SILC_LOG_DEBUG(("PING received, primary is up"));
553 silc_server_backup_send_start_use(pc->server, pc->sock, TRUE);
556 silc_packet_stream_unref(pc->sock);
557 silc_packet_free(pc->packet);
561 /* Processes incoming RESUME_ROUTER packet. This can give the packet
562 for processing to the protocol handler or allocate new protocol if
563 start command is received. */
565 void silc_server_backup_resume_router(SilcServer server,
566 SilcPacketStream sock,
569 SilcIDListData idata = silc_packet_get_context(sock);
570 SilcUInt8 type, session;
571 SilcServerBackupProtocolContext ctx;
574 SILC_LOG_DEBUG(("Received RESUME_ROUTER packet"));
576 if (idata->conn_type == SILC_CONN_CLIENT ||
577 idata->conn_type == SILC_CONN_UNKNOWN) {
578 SILC_LOG_DEBUG(("Bad packet received"));
579 silc_packet_free(packet);
583 ret = silc_buffer_unformat(&packet->buffer,
584 SILC_STR_UI_CHAR(&type),
585 SILC_STR_UI_CHAR(&session),
588 SILC_LOG_ERROR(("Malformed resume router packet received"));
589 silc_packet_free(packet);
593 /* Check whether this packet is used to tell us that server will start
594 using us as primary router. */
595 if (type == SILC_SERVER_BACKUP_START_USE) {
597 SilcServerBackupPing pc;
599 /* If we are normal server then backup router has sent us back
600 this reply and we use the backup as primary router now. */
601 if (server->server_type == SILC_SERVER) {
602 /* Nothing to do here actually, since we have switched already. */
603 SILC_LOG_DEBUG(("Received successful START_USE from backup router"));
604 silc_packet_free(packet);
608 /* Backup router following. */
610 /* If we are marked as router then the primary is down and we send
611 success START_USE back to the server. */
612 if (server->server_type == SILC_ROUTER) {
613 SILC_LOG_DEBUG(("Sending success START_USE back"));
614 silc_server_backup_send_start_use(server, sock, FALSE);
615 silc_packet_free(packet);
619 /* We have just lost primary, send success START_USE back */
620 if (server->standalone) {
621 SILC_LOG_DEBUG(("We are stanalone, sending success START_USE back"));
622 silc_server_backup_send_start_use(server, sock, FALSE);
623 silc_packet_free(packet);
627 /* We are backup router. This server claims that our primary is down.
628 We will check this ourselves by sending PING command to the primary. */
629 SILC_LOG_DEBUG(("Sending PING to detect status of primary router"));
630 idp = silc_id_payload_encode(server->router->id, SILC_ID_SERVER);
631 silc_server_send_command(server, SILC_PRIMARY_ROUTE(server),
632 SILC_COMMAND_PING, ++server->cmd_ident, 1,
633 1, idp->data, silc_buffer_len(idp));
634 silc_buffer_free(idp);
636 /* Reprocess this packet after received reply from router */
637 pc = silc_calloc(1, sizeof(*pc));
641 silc_packet_stream_ref(sock);
642 silc_server_command_pending_timed(server, SILC_COMMAND_PING,
644 silc_server_backup_ping_reply, pc, 15);
649 /* Start the resuming protocol if requested. */
650 if (type == SILC_SERVER_BACKUP_START) {
651 /* We have received a start for resuming protocol. We are either
652 primary router that came back online or normal server. */
653 SilcServerBackupProtocolContext proto_ctx;
655 /* If backup had closed the connection earlier we won't allow resuming
656 since we (primary router) have never gone away. */
657 if (server->server_type == SILC_ROUTER && !server->backup_router &&
658 server->backup_closed) {
659 unsigned char data[4];
660 SILC_LOG_DEBUG(("Backup resuming not allowed since we are still "
662 SILC_LOG_INFO(("Backup resuming not allowed since we are still "
664 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
665 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
667 server->backup_closed = FALSE;
668 silc_packet_free(packet);
672 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
673 proto_ctx->server = server;
674 proto_ctx->sock = sock;
675 proto_ctx->responder = TRUE;
676 proto_ctx->type = type;
677 proto_ctx->session = session;
678 proto_ctx->start = time(0);
679 silc_packet_stream_ref(sock);
681 SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
682 SILC_LOG_INFO(("Starting backup resuming protocol"));
684 /* Start protocol immediately */
685 silc_schedule_task_add_timeout(server->schedule,
686 silc_server_backup_responder_start,
691 /* If we are router and the packet is coming from our primary router
692 then it means we have been replaced by an backup router in our cell. */
693 if (type == SILC_SERVER_BACKUP_REPLACED &&
694 server->server_type == SILC_ROUTER &&
695 idata->conn_type == SILC_CONN_ROUTER &&
696 SILC_PRIMARY_ROUTE(server) == sock) {
697 /* We have been replaced by an backup router in our cell. We must
698 mark our primary router connection disabled since we are not allowed
699 to use it at this moment. */
700 SILC_LOG_INFO(("We are replaced by an backup router in this cell, will "
701 "wait until backup resuming protocol is executed"));
702 idata->status |= SILC_IDLIST_STATUS_DISABLED;
703 silc_packet_free(packet);
708 /* Activate the shared protocol context for this socket connection
710 if (type == SILC_SERVER_BACKUP_RESUMED &&
711 idata->conn_type == SILC_CONN_ROUTER && !sock->protocol &&
712 idata->status & SILC_IDLIST_STATUS_DISABLED) {
713 SilcServerEntry backup_router;
715 if (silc_server_backup_replaced_get(server, ((SilcServerEntry)idata)->id,
717 SilcPacketStream bsock =
718 (SilcSocketConnection)backup_router->connection;
719 if (bsock->protocol && bsock->protocol->protocol &&
720 bsock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) {
721 sock->protocol = bsock->protocol;
722 ctx = sock->protocol->context;
724 silc_socket_free(ctx->sock); /* unref */
725 ctx->sock = silc_socket_dup(sock);
731 /* Call the resuming protocol if the protocol is active. */
732 if (SILC_SERVER_IS_BACKUP(sock)) {
733 ctx = sock->protocol->context;
736 for (i = 0; i < ctx->sessions_count; i++) {
737 if (session == ctx->sessions[i].session) {
738 ctx->session = session;
739 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
740 silc_packet_free(packet);
745 /* If RESUMED received the session ID is zero, execute the protocol. */
746 if (type == SILC_SERVER_BACKUP_RESUMED) {
747 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
748 silc_packet_free(packet);
752 SILC_LOG_ERROR(("Unknown backup resuming session %d", session));
753 silc_packet_free(packet);
758 silc_packet_free(packet);
761 /* Task that is called after backup router has connected back to
762 primary router and we are starting the resuming protocol */
764 SILC_TASK_CALLBACK(silc_server_backup_connected_later)
766 SilcServerBackupProtocolContext proto_ctx =
767 (SilcServerBackupProtocolContext)context;
768 SilcServer server = proto_ctx->server;
769 SilcPacketStream sock = proto_ctx->sock;
771 SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
772 SILC_LOG_INFO(("Starting backup resuming protocol"));
774 /* Register protocol timeout */
775 silc_schedule_task_add_timeout(server->schedule,
776 silc_server_backup_timeout,
779 /* Run the backup resuming protocol */
780 silc_schedule_task_add_timeout(server->schedule,
781 silc_server_protocol_backup,
785 SILC_TASK_CALLBACK(silc_server_backup_connected_again)
787 SilcServer server = app_context;
788 SilcServerConfigRouter *primary;
790 primary = silc_server_config_get_primary_router(server);
792 if (!silc_server_find_socket_by_host(server, SILC_CONN_ROUTER,
793 primary->host, primary->port))
794 silc_server_create_connection(server, FALSE,
795 primary->host, primary->port,
796 silc_server_backup_connected,
801 /* Called when we've established connection back to our primary router
802 when we've acting as backup router and have replaced the primary router
803 in the cell. This function will start the backup resuming protocol. */
805 void silc_server_backup_connected(SilcServer server,
806 SilcServerEntry server_entry,
809 SilcServerBackupProtocolContext proto_ctx;
810 SilcPacketStream sock;
814 silc_schedule_task_add_timeout(server->schedule,
815 silc_server_backup_connected_again,
820 sock = server_entry->connection;
821 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
822 proto_ctx->server = server;
823 proto_ctx->sock = sock;
824 proto_ctx->responder = FALSE;
825 proto_ctx->type = SILC_SERVER_BACKUP_START;
826 proto_ctx->start = time(0);
827 silc_packet_stream_ref(sock);
829 /* Start through scheduler */
830 silc_schedule_task_add_timeout(server->schedule,
831 silc_server_backup_connected_later,
835 SILC_TASK_CALLBACK(silc_server_backup_connect_primary_again)
837 SilcServer server = app_context;
838 SilcServerConfigRouter *primary;
840 primary = silc_server_config_get_primary_router(server);
842 if (!silc_server_find_socket_by_host(server, SILC_CONN_ROUTER,
843 primary->host, primary->port))
844 silc_server_create_connection(server, FALSE,
845 primary->host, primary->port,
846 silc_server_backup_connect_primary,
851 /* Called when normal server has connected to its primary router after
852 backup router has sent the START packet in reusming protocol. We will
853 move the protocol context from the backup router connection to the
856 static void silc_server_backup_connect_primary(SilcServer server,
857 SilcServerEntry server_entry,
861 SilcPacketStream backup_router = context;
862 SilcServerBackupProtocolContext ctx;
863 SilcPacketStream sock;
864 SilcIDListData idata;
865 unsigned char data[2];
869 silc_schedule_task_add_timeout(server->schedule,
870 silc_server_backup_connect_primary_again,
876 silc_packet_stream_unref(backup_router);
878 if (!backup_router->protocol)
880 if (!server_entry->connection)
883 ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context;
884 sock = (SilcSocketConnection)server_entry->connection;
885 idata = (SilcIDListData)server_entry;
887 SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
888 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
891 /* Send the CONNECTED packet back to the backup router. */
892 data[0] = SILC_SERVER_BACKUP_CONNECTED;
893 data[1] = ctx->session;
894 silc_server_packet_send(server, backup_router,
895 SILC_PACKET_RESUME_ROUTER, 0, data, 2, FALSE);
897 /* The primary connection is disabled until it sends the RESUMED packet
899 idata->status |= SILC_IDLIST_STATUS_DISABLED;
901 /* Move this protocol context from this backup router connection to
902 the primary router connection since it will send the subsequent
903 packets in this protocol. We don't talk with backup router
905 sock->protocol = backup_router->protocol;
907 silc_socket_free(ctx->sock); /* unref */
908 ctx->sock = silc_socket_dup(server_entry->connection);
909 backup_router->protocol = NULL;
913 /* Timeout callback used by the backup router to send the ENDING packet
914 to primary router to indicate that it can now resume as being primary
915 router. All CONNECTED packets has been received when we reach this. */
917 SILC_TASK_CALLBACK(silc_server_backup_send_resumed)
920 SilcServerBackupProtocolContext ctx = context;
921 SilcServer server = ctx->server;
922 unsigned char data[2];
925 SILC_LOG_DEBUG(("Start"));
927 for (i = 0; i < ctx->sessions_count; i++)
928 if (ctx->sessions[i].server_entry == ctx->sock->user_data)
929 ctx->session = ctx->sessions[i].session;
931 /* We've received all the CONNECTED packets and now we'll send the
932 ENDING packet to the new primary router. */
933 data[0] = SILC_SERVER_BACKUP_ENDING;
934 data[1] = ctx->session;
935 silc_server_packet_send(server, ctx->sock, SILC_PACKET_RESUME_ROUTER, 0,
938 /* The protocol will go to END state. */
939 protocol->state = SILC_PROTOCOL_STATE_END;
943 /* Backup resuming protocol. This protocol is executed when the primary
944 router wants to resume its position as being primary router. */
946 SILC_TASK_CALLBACK(silc_server_protocol_backup)
949 SilcServerBackupProtocolContext ctx = context;
950 SilcServer server = ctx->server;
951 SilcServerEntry server_entry;
952 SilcPacketStream sock = NULL;
953 unsigned char data[2];
956 if (protocol->state == SILC_PROTOCOL_STATE_UNKNOWN)
957 protocol->state = SILC_PROTOCOL_STATE_START;
959 switch(protocol->state) {
960 case SILC_PROTOCOL_STATE_START:
961 if (ctx->responder == FALSE) {
963 * Initiator (backup router)
966 /* Send the START packet to primary router and normal servers. The
967 packet will indicate to the primary router that it has been replaced
968 by us. For normal servers it means that we will be resigning as
969 being primary router shortly. */
970 for (i = 0; i < server->config->param.connections_max; i++) {
971 sock = server->sockets[i];
972 if (!sock || !sock->user_data ||
973 sock->user_data == server->id_entry ||
974 (sock->type != SILC_CONN_ROUTER &&
975 sock->type != SILC_CONN_SERVER))
978 server_entry = sock->user_data;
979 if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
982 ctx->sessions = silc_realloc(ctx->sessions,
983 sizeof(*ctx->sessions) *
984 (ctx->sessions_count + 1));
985 ctx->sessions[ctx->sessions_count].session = ctx->sessions_count;
986 ctx->sessions[ctx->sessions_count].connected = FALSE;
987 ctx->sessions[ctx->sessions_count].server_entry = server_entry;
989 SILC_LOG_DEBUG(("Sending START to %s (session %d)",
990 server_entry->server_name, ctx->sessions_count));
991 SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
992 server_entry->server_name, ctx->sessions_count));
994 /* This connection is performing this protocol too now */
995 sock->protocol = protocol;
997 data[0] = SILC_SERVER_BACKUP_START;
998 data[1] = ctx->sessions_count;
999 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1000 data, sizeof(data), FALSE);
1001 ctx->sessions_count++;
1004 /* Announce data to the new primary to be. */
1005 silc_server_announce_servers(server, TRUE, 0, ctx->sock);
1006 silc_server_announce_clients(server, 0, ctx->sock);
1007 silc_server_announce_channels(server, 0, ctx->sock);
1013 * Responder (all servers and routers)
1015 SilcServerConfigRouter *primary;
1017 /* We should have received START packet */
1018 if (ctx->type != SILC_SERVER_BACKUP_START) {
1019 SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
1023 /* Connect to the primary router that was down that is now supposed
1024 to be back online. We send the CONNECTED packet after we've
1025 established the connection to the primary router. */
1026 primary = silc_server_config_get_primary_router(server);
1027 if (primary && server->backup_primary &&
1028 !silc_server_num_sockets_by_remote(server,
1029 silc_net_is_ip(primary->host) ?
1030 primary->host : NULL,
1031 silc_net_is_ip(primary->host) ?
1032 NULL : primary->host,
1034 SILC_CONN_ROUTER)) {
1035 SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
1037 silc_server_create_connection(server, FALSE;
1038 primary->host, primary->port,
1039 silc_server_backup_connect_primary,
1040 silc_socket_dup(ctx->sock));
1042 /* Nowhere to connect just return the CONNECTED packet */
1043 SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
1045 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1048 /* Send the CONNECTED packet back to the backup router. */
1049 data[0] = SILC_SERVER_BACKUP_CONNECTED;
1050 data[1] = ctx->session;
1051 silc_server_packet_send(server, ctx->sock,
1052 SILC_PACKET_RESUME_ROUTER, 0,
1053 data, sizeof(data), FALSE);
1056 /* Add this resuming session */
1057 ctx->sessions = silc_realloc(ctx->sessions,
1058 sizeof(*ctx->sessions) *
1059 (ctx->sessions_count + 1));
1060 ctx->sessions[ctx->sessions_count].session = ctx->session;
1061 ctx->sessions_count++;
1063 /* Normal server goes directly to the END state. */
1064 if (server->server_type == SILC_ROUTER &&
1066 server->router->data.status & SILC_IDLIST_STATUS_DISABLED))
1069 protocol->state = SILC_PROTOCOL_STATE_END;
1074 if (ctx->responder == FALSE) {
1076 * Initiator (backup router)
1079 /* We should have received CONNECTED packet */
1080 if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
1081 SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
1085 for (i = 0; i < ctx->sessions_count; i++) {
1086 if (ctx->sessions[i].session == ctx->session) {
1087 ctx->sessions[i].connected = TRUE;
1088 SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
1089 ctx->sessions[i].server_entry->server_name,
1091 SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
1096 /* See if all returned CONNECTED, if not, then continue waiting. */
1097 for (i = 0; i < ctx->sessions_count; i++) {
1098 if (!ctx->sessions[i].connected)
1102 SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
1104 SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
1106 /* The ENDING is sent with timeout, and then we continue to the
1107 END state in the protocol. */
1108 silc_schedule_task_add(server->schedule, 0,
1109 silc_server_backup_send_resumed,
1110 protocol, 1, 0, SILC_TASK_TIMEOUT,
1111 SILC_TASK_PRI_NORMAL);
1116 * Responder (primary router)
1119 /* We should have been received ENDING packet */
1120 if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
1121 SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
1125 SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now"));
1127 /* Switch announced informations to our primary router of using the
1129 silc_server_local_servers_toggle_enabled(server, TRUE);
1130 silc_server_update_servers_by_server(server, ctx->sock->user_data,
1132 silc_server_update_clients_by_server(server, ctx->sock->user_data,
1133 server->router, TRUE);
1135 /* We as primary router now must send RESUMED packets to all servers
1136 and routers so that they know we are back. For backup router we
1137 send the packet last so that we give the backup as much time as
1138 possible to deal with message routing at this critical moment. */
1139 for (i = 0; i < server->config->param.connections_max; i++) {
1140 sock = server->sockets[i];
1141 if (!sock || !sock->user_data ||
1142 sock->user_data == server->id_entry ||
1143 (sock->type != SILC_CONN_ROUTER &&
1144 sock->type != SILC_CONN_SERVER))
1147 /* Send to backup last */
1148 if (sock == ctx->sock)
1152 server_entry = sock->user_data;
1153 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1155 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1156 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1158 /* This connection is performing this protocol too now */
1159 sock->protocol = protocol;
1161 data[0] = SILC_SERVER_BACKUP_RESUMED;
1163 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1164 data, sizeof(data), FALSE);
1167 /* Now send the same packet to backup */
1168 if (sock != ctx->sock) {
1171 goto send_to_backup;
1174 /* We are now resumed and are back as primary router in the cell. */
1175 SILC_LOG_INFO(("We are now the primary router of our cell again"));
1176 server->wait_backup = FALSE;
1178 /* Announce WATCH list a little later */
1179 silc_schedule_task_add(server->schedule, 0,
1180 silc_server_backup_announce_watches,
1181 silc_socket_dup(ctx->sock), 4, 0,
1182 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
1184 /* For us this is the end of this protocol. */
1185 if (protocol->final_callback)
1186 silc_protocol_execute_final(protocol, server->schedule);
1188 silc_protocol_free(protocol);
1192 case SILC_PROTOCOL_STATE_END:
1195 * Responder (backup router, servers, and remote router)
1197 SilcServerEntry router, backup_router;
1199 /* We should have been received RESUMED from our primary router. */
1200 if (ctx->type != SILC_SERVER_BACKUP_RESUMED) {
1201 SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
1205 SILC_LOG_INFO(("Received RESUMED from new primary router"));
1207 /* If we are the backup router, mark that we are no longer primary
1208 but are back to backup router status. */
1209 if (server->backup_router)
1210 server->server_type = SILC_BACKUP_ROUTER;
1212 /* We have now new primary router. All traffic goes there from now on. */
1213 router = ctx->sock->user_data;
1214 if (silc_server_backup_replaced_get(server, router->id,
1217 if (backup_router == server->router) {
1218 /* We have new primary router now */
1219 server->id_entry->router = router;
1220 server->router = router;
1221 SILC_LOG_INFO(("Switching back to primary router %s",
1222 server->router->server_name));
1224 /* We are connected to new primary and now continue using it */
1225 SILC_LOG_INFO(("Resuming the use of primary router %s",
1226 router->server_name));
1228 server->backup_primary = FALSE;
1229 sock = router->connection;
1231 /* Update the client entries of the backup router to the new
1233 silc_server_local_servers_toggle_enabled(server, FALSE);
1234 router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1235 silc_server_update_servers_by_server(server, backup_router, router);
1236 silc_server_update_clients_by_server(
1237 server, NULL, router,
1238 server->server_type == SILC_BACKUP_ROUTER);
1239 if (server->server_type == SILC_SERVER)
1240 silc_server_update_channels_by_server(server, backup_router, router);
1241 silc_server_backup_replaced_del(server, backup_router);
1244 /* Send notify about primary router going down to local operators */
1245 SILC_SERVER_SEND_OPERS(server, FALSE, TRUE,
1246 SILC_NOTIFY_TYPE_NONE,
1247 ("%s resumed the use of primary router %s",
1248 server->server_name,
1249 server->router->server_name));
1251 /* Protocol has ended, call the final callback */
1252 if (protocol->final_callback)
1253 silc_protocol_execute_final(protocol, server->schedule);
1255 silc_protocol_free(protocol);
1259 case SILC_PROTOCOL_STATE_ERROR:
1260 /* Protocol has ended, call the final callback */
1261 if (protocol->final_callback)
1262 silc_protocol_execute_final(protocol, server->schedule);
1264 silc_protocol_free(protocol);
1267 case SILC_PROTOCOL_STATE_FAILURE:
1268 /* Protocol has ended, call the final callback */
1269 SILC_LOG_ERROR(("Error during backup resume: received Failure"));
1270 ctx->received_failure = TRUE;
1271 if (protocol->final_callback)
1272 silc_protocol_execute_final(protocol, server->schedule);
1274 silc_protocol_free(protocol);
1277 case SILC_PROTOCOL_STATE_UNKNOWN:
1283 /* Final resuming protocol completion callback */
1285 SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
1288 SilcServerBackupProtocolContext ctx = context;
1289 SilcServer server = ctx->server;
1290 SilcServerEntry server_entry;
1291 SilcPacketStream sock;
1295 silc_schedule_task_del_by_context(server->schedule, protocol);
1300 SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
1301 if (server->server_type == SILC_SERVER)
1302 silc_schedule_task_del_by_callback(server->schedule,
1303 silc_server_backup_connect_to_router);
1306 if (server->server_shutdown)
1309 /* Remove this protocol from all server entries that has it */
1310 for (i = 0; i < server->config->param.connections_max; i++) {
1311 sock = server->sockets[i];
1312 if (!sock || !sock->user_data ||
1313 (sock->type != SILC_CONN_ROUTER &&
1314 sock->type != SILC_CONN_SERVER))
1317 server_entry = sock->user_data;
1319 /* The SilcProtocol context was shared between all connections, clear
1320 it from all connections. */
1321 if (sock->protocol == protocol) {
1322 sock->protocol = NULL;
1326 if (server->server_type == SILC_SERVER &&
1327 server_entry->server_type == SILC_ROUTER)
1331 if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
1332 if (ctx->sock == sock) {
1333 silc_socket_free(sock); /* unref */
1337 /* If failed after 10 attempts, it won't work, give up */
1338 if (ctx->initiator_restart > 10)
1339 ctx->received_failure = TRUE;
1341 if (!ctx->received_failure) {
1342 /* Protocol error, probably timeout. Just restart the protocol. */
1343 SilcServerBackupProtocolContext proto_ctx;
1345 /* Restart the protocol. */
1346 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
1347 proto_ctx->server = server;
1348 proto_ctx->sock = silc_socket_dup(sock);
1349 proto_ctx->responder = FALSE;
1350 proto_ctx->type = SILC_SERVER_BACKUP_START;
1351 proto_ctx->start = time(0);
1352 proto_ctx->initiator_restart = ctx->initiator_restart + 1;
1354 /* Start through scheduler */
1355 silc_schedule_task_add(server->schedule, 0,
1356 silc_server_backup_connected_later,
1359 SILC_TASK_PRI_NORMAL);
1361 /* If failure was received, switch back to normal backup router.
1362 For some reason primary wouldn't accept that we were supposed
1363 to perfom resuming protocol. */
1364 server->server_type = SILC_BACKUP_ROUTER;
1365 silc_server_local_servers_toggle_enabled(server, FALSE);
1366 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1367 silc_server_update_servers_by_server(server, server->id_entry,
1369 silc_server_update_clients_by_server(server, NULL,
1370 sock->user_data, TRUE);
1372 /* Announce our clients and channels to the router */
1373 silc_server_announce_clients(server, 0, sock);
1374 silc_server_announce_channels(server, 0, sock);
1376 /* Announce WATCH list a little later */
1377 silc_schedule_task_add(server->schedule, 0,
1378 silc_server_backup_announce_watches,
1379 silc_socket_dup(sock), 5, 0,
1381 SILC_TASK_PRI_NORMAL);
1388 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1393 SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
1395 if (ctx->type == SILC_SERVER_BACKUP_RESUMED && server->router) {
1396 /* Announce all of our information to the router. */
1397 if (server->server_type == SILC_ROUTER)
1398 silc_server_announce_servers(server, FALSE, 0,
1399 server->router->connection);
1401 /* Announce our clients and channels to the router */
1402 silc_server_announce_clients(server, 0, server->router->connection);
1403 silc_server_announce_channels(server, 0, server->router->connection);
1405 /* Announce WATCH list a little later */
1406 silc_schedule_task_add(server->schedule, 0,
1407 silc_server_backup_announce_watches,
1408 silc_socket_dup(server->router->connection), 4, 0,
1409 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
1414 if (server->server_type == SILC_SERVER) {
1415 /* If we are still using backup router Send confirmation to backup
1416 that using it is still ok and continue sending traffic there.
1417 The backup will reply with error if it's not ok. */
1418 if (server->router && server->backup_primary) {
1419 /* Send START_USE just in case using backup wouldn't be ok. */
1420 silc_server_backup_send_start_use(server, server->router->connection,
1423 /* Check couple of times same START_USE just in case. */
1424 silc_schedule_task_add(server->schedule, 0,
1425 silc_server_backup_check_status,
1426 silc_socket_dup(server->router->connection),
1427 5, 1, SILC_TASK_TIMEOUT,
1428 SILC_TASK_PRI_NORMAL);
1429 silc_schedule_task_add(server->schedule, 0,
1430 silc_server_backup_check_status,
1431 silc_socket_dup(server->router->connection),
1432 20, 1, SILC_TASK_TIMEOUT,
1433 SILC_TASK_PRI_NORMAL);
1434 silc_schedule_task_add(server->schedule, 0,
1435 silc_server_backup_check_status,
1436 silc_socket_dup(server->router->connection),
1437 60, 1, SILC_TASK_TIMEOUT,
1438 SILC_TASK_PRI_NORMAL);
1443 if (ctx->sock && ctx->sock->protocol)
1444 ctx->sock->protocol = NULL;
1446 silc_socket_free(ctx->sock); /* unref */
1447 silc_protocol_free(protocol);
1448 silc_free(ctx->sessions);
1453 SILC_TASK_CALLBACK(silc_server_backup_announce_watches)
1456 SilcPacketStream sock = context;
1457 SilcServer server = app_context;
1458 if (sock->users > 1)
1459 silc_server_announce_watches(server, sock);
1460 silc_socket_free(sock);