5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2001 - 2014 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
21 #include "serverincludes.h"
22 #include "server_internal.h"
24 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
25 SILC_TASK_CALLBACK(silc_server_backup_announce_watches);
27 static void silc_server_backup_connect_primary(SilcServer server,
28 SilcServerEntry server_entry,
32 /************************** Types and Definitions ***************************/
36 SilcServerEntry server;
40 } SilcServerBackupEntry;
42 /* Holds IP address and port of the primary router that was replaced
47 SilcServerEntry server; /* Backup router that replaced the primary */
48 } SilcServerBackupReplaced;
51 struct SilcServerBackupStruct {
52 SilcServerBackupEntry *servers;
53 SilcUInt32 servers_count;
54 SilcServerBackupReplaced **replaced;
55 SilcUInt32 replaced_count;
61 SilcServerEntry server_entry;
62 } SilcServerBackupProtocolSession;
64 /* Backup resuming protocol context */
67 SilcPacketStream sock;
70 SilcServerBackupProtocolSession *sessions;
71 SilcUInt32 sessions_count;
72 SilcUInt32 initiator_restart;
75 unsigned int responder : 1;
76 unsigned int received_failure : 1;
77 unsigned int timeout : 1;
78 unsigned int error : 1;
79 } *SilcServerBackupProtocolContext;
82 /********************* Backup Configuration Routines ************************/
84 /* Adds the `backup_server' to be one of our backup router. This can be
85 called multiple times to set multiple backup routers. The `ip' and `port'
86 is the IP and port that the `backup_router' will replace if the `ip'
87 will become unresponsive. If `local' is TRUE then the `backup_server' is
88 in the local cell, if FALSE it is in some other cell. */
90 void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
91 const char *ip, int port, SilcBool local)
98 if (!server->backup) {
99 server->backup = silc_calloc(1, sizeof(*server->backup));
104 /* See if already added */
105 for (i = 0; i < server->backup->servers_count; i++) {
106 if (server->backup->servers[i].server == backup_server)
110 SILC_LOG_DEBUG(("Backup router %s will replace %s",
111 backup_server->data.sconn ?
112 backup_server->data.sconn->remote_host : "(me)", ip));
114 for (i = 0; i < server->backup->servers_count; i++) {
115 if (!server->backup->servers[i].server) {
116 server->backup->servers[i].server = backup_server;
117 server->backup->servers[i].local = local;
118 server->backup->servers[i].port = SILC_SWAB_16(port);
119 memset(server->backup->servers[i].ip.data, 0,
120 sizeof(server->backup->servers[i].ip.data));
121 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
122 sizeof(server->backup->servers[i].ip.data));
127 i = server->backup->servers_count;
128 server->backup->servers = silc_realloc(server->backup->servers,
129 sizeof(*server->backup->servers) *
131 server->backup->servers[i].server = backup_server;
132 server->backup->servers[i].local = local;
133 server->backup->servers[i].port = SILC_SWAB_16(port);
134 memset(server->backup->servers[i].ip.data, 0,
135 sizeof(server->backup->servers[i].ip.data));
136 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
137 sizeof(server->backup->servers[i].ip.data));
138 server->backup->servers_count++;
141 /* Returns backup router for IP and port in `server_id' or NULL if there
142 does not exist backup router. */
144 SilcServerEntry silc_server_backup_get(SilcServer server,
145 SilcServerID *server_id)
152 for (i = 0; i < server->backup->servers_count; i++) {
153 if (server->backup->servers[i].server &&
154 server->backup->servers[i].port == server_id->port &&
155 !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
156 sizeof(server_id->ip.data))) {
157 SILC_LOG_DEBUG(("Found backup router %s for %s",
158 server->backup->servers[i].server->server_name,
159 silc_id_render(server_id, SILC_ID_SERVER)));
160 return server->backup->servers[i].server;
167 /* Deletes the backup server `server_entry'. */
169 void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
176 for (i = 0; i < server->backup->servers_count; i++) {
177 if (server->backup->servers[i].server == server_entry) {
178 SILC_LOG_DEBUG(("Removing %s as backup router",
179 silc_id_render(server->backup->servers[i].server->id,
181 server->backup->servers[i].server = NULL;
182 memset(server->backup->servers[i].ip.data, 0,
183 sizeof(server->backup->servers[i].ip.data));
188 /* Frees all data allocated for backup routers. Call this after deleting
189 all backup routers and when new routers are added no more, for example
190 when shutting down the server. */
192 void silc_server_backup_free(SilcServer server)
199 /* Delete existing servers if caller didn't do it */
200 for (i = 0; i < server->backup->servers_count; i++) {
201 if (server->backup->servers[i].server)
202 silc_server_backup_del(server, server->backup->servers[i].server);
205 silc_free(server->backup->servers);
206 silc_free(server->backup);
207 server->backup = NULL;
210 /* Marks the IP address and port from the `server_id' as being replaced
211 by backup router indicated by the `server'. If the router connects at
212 a later time we can check whether it has been replaced by an backup
215 void silc_server_backup_replaced_add(SilcServer server,
216 SilcServerID *server_id,
217 SilcServerEntry server_entry)
220 SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));;
225 server->backup = silc_calloc(1, sizeof(*server->backup));
228 if (!server->backup->replaced) {
229 server->backup->replaced =
230 silc_calloc(1, sizeof(*server->backup->replaced));
231 server->backup->replaced_count = 1;
233 if (!server->backup->replaced)
236 SILC_LOG_DEBUG(("Replacing router %s with %s",
237 silc_id_render(server_id, SILC_ID_SERVER),
238 server_entry->server_name));
240 memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip));
241 r->server = server_entry;
243 for (i = 0; i < server->backup->replaced_count; i++) {
244 if (!server->backup->replaced[i]) {
245 server->backup->replaced[i] = r;
250 i = server->backup->replaced_count;
251 server->backup->replaced = silc_realloc(server->backup->replaced,
252 sizeof(*server->backup->replaced) *
254 server->backup->replaced[i] = r;
255 server->backup->replaced_count++;
258 /* Checks whether the IP address and port from the `server_id' has been
259 replaced by an backup router. If it has been then this returns TRUE
260 and the bacup router entry to the `server' pointer if non-NULL. Returns
261 FALSE if the router is not replaced by backup router. */
263 SilcBool silc_server_backup_replaced_get(SilcServer server,
264 SilcServerID *server_id,
265 SilcServerEntry *server_entry)
269 if (!server->backup || !server->backup->replaced)
272 for (i = 0; i < server->backup->replaced_count; i++) {
273 if (!server->backup->replaced[i])
275 if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
276 sizeof(server_id->ip.data))) {
278 *server_entry = server->backup->replaced[i]->server;
279 SILC_LOG_DEBUG(("Router %s is replaced by %s",
280 silc_id_render(server_id, SILC_ID_SERVER),
281 server->backup->replaced[i]->server->server_name));
286 SILC_LOG_DEBUG(("Router %s is not replaced by backup router",
287 silc_id_render(server_id, SILC_ID_SERVER)));
291 /* Deletes a replaced host by the set `server_entry. */
293 void silc_server_backup_replaced_del(SilcServer server,
294 SilcServerEntry server_entry)
298 if (!server->backup || !server->backup->replaced)
301 for (i = 0; i < server->backup->replaced_count; i++) {
302 if (!server->backup->replaced[i])
304 if (server->backup->replaced[i]->server == server_entry) {
305 silc_free(server->backup->replaced[i]);
306 server->backup->replaced[i] = NULL;
311 /* Broadcast the received packet indicated by `packet' to all of our backup
312 routers. All router wide information is passed using broadcast packets.
313 That is why all backup routers need to get this data too. It is expected
314 that the caller already knows that the `packet' is broadcast packet. */
316 void silc_server_backup_broadcast(SilcServer server,
317 SilcPacketStream sender,
320 SilcServerEntry backup;
321 SilcPacketStream sock;
324 if (!server->backup || server->server_type != SILC_ROUTER)
327 SILC_LOG_DEBUG(("Broadcasting received packet to backup routers"));
329 for (i = 0; i < server->backup->servers_count; i++) {
330 backup = server->backup->servers[i].server;
332 if (!backup || backup->connection == sender ||
333 server->backup->servers[i].local == FALSE)
335 if (server->backup->servers[i].server == server->id_entry)
338 sock = backup->connection;
339 silc_server_packet_route(server, sock, packet);
343 /* A generic routine to send data to all backup routers. If the `sender'
344 is provided it will indicate the original sender of the packet and the
345 packet won't be resent to that entity. The `data' is the data that will
346 be assembled to packet context before sending. The packet will be
347 encrypted this function. If the `force_send' is TRUE the data is sent
348 immediately and not put to queue. If `local' is TRUE then the packet
349 will be sent only to local backup routers inside the cell. If false the
350 packet can go from one cell to the other. This function has no effect
351 if there are no any backup routers. */
353 void silc_server_backup_send(SilcServer server,
354 SilcServerEntry sender,
356 SilcPacketFlags flags,
362 SilcServerEntry backup;
365 if (!server->backup || server->server_type != SILC_ROUTER)
368 for (i = 0; i < server->backup->servers_count; i++) {
369 backup = server->backup->servers[i].server;
370 if (!backup || sender == backup)
372 if (local && server->backup->servers[i].local == FALSE)
374 if (server->backup->servers[i].server == server->id_entry)
377 silc_server_packet_send(server, backup->connection, type, flags,
382 /* Same as silc_server_backup_send but sets a specific Destination ID to
383 the packet. The Destination ID is indicated by the `dst_id' and the
384 ID type `dst_id_type'. For example, packets destined to channels must
385 be sent using this function. */
387 void silc_server_backup_send_dest(SilcServer server,
388 SilcServerEntry sender,
390 SilcPacketFlags flags,
392 SilcIdType dst_id_type,
398 SilcServerEntry backup;
401 if (!server->backup || server->server_type != SILC_ROUTER)
404 for (i = 0; i < server->backup->servers_count; i++) {
405 backup = server->backup->servers[i].server;
406 if (!backup || sender == backup)
408 if (local && server->backup->servers[i].local == FALSE)
410 if (server->backup->servers[i].server == server->id_entry)
413 silc_server_packet_send_dest(server, backup->connection, type, flags,
414 dst_id, dst_id_type, data, data_len);
418 /* Send the START_USE indication to remote connection. If `failure' is
419 TRUE then this sends SILC_PACKET_FAILURE. Otherwise it sends
420 SILC_PACKET_RESUME_ROUTER. */
422 void silc_server_backup_send_start_use(SilcServer server,
423 SilcPacketStream sock,
426 unsigned char data[4];
428 SILC_LOG_DEBUG(("Sending START_USE (%s)",
429 failure ? "failure" : "success"));
432 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START_USE, data);
433 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
436 data[0] = SILC_SERVER_BACKUP_START_USE;
438 silc_server_packet_send(server, sock,
439 SILC_PACKET_RESUME_ROUTER, 0,
444 /* Send the REPLACED indication to remote router. This is send by the
445 primary router (remote router) of the primary router that came back
446 online. This is not sent by backup router or any other server. */
448 void silc_server_backup_send_replaced(SilcServer server,
449 SilcPacketStream sock)
451 unsigned char data[4];
453 SILC_LOG_DEBUG(("Sending REPLACED"));
455 data[0] = SILC_SERVER_BACKUP_REPLACED;
457 silc_server_packet_send(server, sock,
458 SILC_PACKET_RESUME_ROUTER, 0,
463 /************************ Backup Resuming Protocol **************************/
465 /* Timeout callback for protocol */
467 SILC_TASK_CALLBACK(silc_server_backup_timeout)
469 SilcServerBackupProtocolContext ctx = context;
470 SilcServer server = app_context;
472 SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
475 silc_schedule_task_add_timeout(server->schedule,
476 silc_server_protocol_backup_done, context,
480 /* Callback to start the protocol as responder */
482 SILC_TASK_CALLBACK(silc_server_backup_responder_start)
484 SilcServerBackupProtocolContext proto_ctx = context;
485 SilcPacketStream sock = proto_ctx->sock;
486 SilcIDListData idata = silc_packet_get_context(sock);
487 SilcServer server = app_context;
489 /* If other protocol is executing at the same time, start with timeout. */
490 if (idata->sconn->op) {
491 SILC_LOG_DEBUG(("Other protocol is executing, wait for it to finish"));
492 silc_schedule_task_add_timeout(server->schedule,
493 silc_server_backup_responder_start,
498 /* Register protocol timeout */
499 silc_schedule_task_add_timeout(server->schedule,
500 silc_server_backup_timeout,
503 /* Run the backup resuming protocol */
504 silc_schedule_task_add_timeout(server->schedule,
505 silc_server_protocol_backup,
509 /* Callback to send START_USE to backup to check whether using backup
512 SILC_TASK_CALLBACK(silc_server_backup_check_status)
514 SilcPacketStream sock = context;
515 SilcServer server = app_context;
517 /* Check whether we are still using backup */
518 if (!server->backup_primary)
521 silc_server_backup_send_start_use(server, sock, FALSE);
522 silc_packet_stream_unref(sock);
527 SilcPacketStream sock;
529 } *SilcServerBackupPing;
531 /* PING command reply callback */
533 void silc_server_backup_ping_reply(void *context, void *reply)
535 SilcServerBackupPing pc = context;
536 SilcServerCommandReplyContext cmdr = reply;
538 if (cmdr && !silc_command_get_status(cmdr->payload, NULL, NULL)) {
539 /* Timeout error occurred, the primary is really down. */
540 SilcPacketStream primary = SILC_PRIMARY_ROUTE(pc->server);
542 SILC_LOG_DEBUG(("PING timeout, primary is down"));
545 silc_server_free_sock_user_data(pc->server, primary, NULL);
546 silc_server_close_connection(pc->server, primary);
549 /* Reprocess the RESUME_ROUTER packet */
550 silc_server_backup_resume_router(pc->server, pc->sock, pc->packet);
552 /* The primary is not down, refuse to serve the server as primary */
553 SILC_LOG_DEBUG(("PING received, primary is up"));
554 silc_server_backup_send_start_use(pc->server, pc->sock, TRUE);
555 silc_packet_free(pc->packet);
558 silc_packet_stream_unref(pc->sock);
562 /* Processes incoming RESUME_ROUTER packet. This can give the packet
563 for processing to the protocol handler or allocate new protocol if
564 start command is received. */
566 void silc_server_backup_resume_router(SilcServer server,
567 SilcPacketStream sock,
570 SilcIDListData idata = silc_packet_get_context(sock);
571 SilcServerEntry router = (SilcServerEntry)idata;
572 SilcUInt8 type, session;
573 SilcServerBackupProtocolContext ctx;
576 SILC_LOG_DEBUG(("Received RESUME_ROUTER packet"));
578 if (idata->conn_type == SILC_CONN_CLIENT ||
579 idata->conn_type == SILC_CONN_UNKNOWN) {
580 SILC_LOG_DEBUG(("Bad packet received"));
581 silc_packet_free(packet);
585 ret = silc_buffer_unformat(&packet->buffer,
586 SILC_STR_UI_CHAR(&type),
587 SILC_STR_UI_CHAR(&session),
590 SILC_LOG_ERROR(("Malformed resume router packet received"));
591 silc_packet_free(packet);
595 /* Check whether this packet is used to tell us that server will start
596 using us as primary router. */
597 if (type == SILC_SERVER_BACKUP_START_USE) {
599 SilcServerBackupPing pc;
601 /* If we are normal server then backup router has sent us back
602 this reply and we use the backup as primary router now. */
603 if (server->server_type == SILC_SERVER) {
604 /* Nothing to do here actually, since we have switched already. */
605 SILC_LOG_DEBUG(("Received successful START_USE from backup router"));
606 silc_packet_free(packet);
610 /* Backup router following. */
612 /* If we are marked as router then the primary is down and we send
613 success START_USE back to the server. */
614 if (server->server_type == SILC_ROUTER) {
615 SILC_LOG_DEBUG(("Sending success START_USE back"));
616 silc_server_backup_send_start_use(server, sock, FALSE);
617 silc_packet_free(packet);
621 /* We have just lost primary, send success START_USE back */
622 if (server->standalone) {
623 SILC_LOG_DEBUG(("We are stanalone, sending success START_USE back"));
624 silc_server_backup_send_start_use(server, sock, FALSE);
625 silc_packet_free(packet);
629 /* We are backup router. This server claims that our primary is down.
630 We will check this ourselves by sending PING command to the primary. */
631 SILC_LOG_DEBUG(("Sending PING to detect status of primary router"));
632 idp = silc_id_payload_encode(server->router->id, SILC_ID_SERVER);
633 silc_server_send_command(server, SILC_PRIMARY_ROUTE(server),
634 SILC_COMMAND_PING, ++server->cmd_ident, 1,
635 1, idp->data, silc_buffer_len(idp));
636 silc_buffer_free(idp);
638 /* Reprocess this packet after received reply from router */
639 pc = silc_calloc(1, sizeof(*pc));
641 silc_server_backup_send_start_use(server, sock, FALSE);
642 silc_packet_free(packet);
648 silc_packet_stream_ref(sock);
649 silc_server_command_pending_timed(server, SILC_COMMAND_PING,
651 silc_server_backup_ping_reply, pc, 15);
655 /* Start the resuming protocol if requested. */
656 if (type == SILC_SERVER_BACKUP_START) {
657 /* We have received a start for resuming protocol. We are either
658 primary router that came back online or normal server. */
659 SilcServerBackupProtocolContext proto_ctx;
660 unsigned char data[4];
662 /* If backup had closed the connection earlier we won't allow resuming
663 since we (primary router) have never gone away. */
664 if (server->server_type == SILC_ROUTER && !server->backup_router &&
665 server->backup_closed) {
666 SILC_LOG_DEBUG(("Backup resuming not allowed since we are still "
668 SILC_LOG_INFO(("Backup resuming not allowed since we are still "
670 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
671 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
673 server->backup_closed = FALSE;
674 silc_packet_free(packet);
678 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
680 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
681 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
683 server->backup_closed = FALSE;
684 silc_packet_free(packet);
687 proto_ctx->server = server;
688 proto_ctx->sock = sock;
689 proto_ctx->responder = TRUE;
690 proto_ctx->type = type;
691 proto_ctx->session = session;
692 proto_ctx->start = time(0);
693 silc_packet_stream_ref(sock);
694 router->backup = TRUE;
695 router->backup_proto = proto_ctx;
697 SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
698 SILC_LOG_INFO(("Starting backup resuming protocol"));
700 /* Start protocol immediately */
701 silc_schedule_task_add_timeout(server->schedule,
702 silc_server_backup_responder_start,
707 /* If we are router and the packet is coming from our primary router
708 then it means we have been replaced by an backup router in our cell. */
709 if (type == SILC_SERVER_BACKUP_REPLACED &&
710 server->server_type == SILC_ROUTER &&
711 idata->conn_type == SILC_CONN_ROUTER &&
712 SILC_PRIMARY_ROUTE(server) == sock) {
713 /* We have been replaced by an backup router in our cell. We must
714 mark our primary router connection disabled since we are not allowed
715 to use it at this moment. */
716 SILC_LOG_INFO(("We are replaced by an backup router in this cell, will "
717 "wait until backup resuming protocol is executed"));
718 idata->status |= SILC_IDLIST_STATUS_DISABLED;
719 silc_packet_free(packet);
723 /* Activate the shared protocol context for this socket connection
725 if (type == SILC_SERVER_BACKUP_RESUMED &&
726 idata->conn_type == SILC_CONN_ROUTER && !router->backup &&
727 idata->status & SILC_IDLIST_STATUS_DISABLED) {
728 SilcServerEntry backup_router;
730 if (silc_server_backup_replaced_get(server, router->id, &backup_router)) {
731 ctx = backup_router->backup_proto;
733 silc_packet_stream_unref(ctx->sock);
734 router->backup = TRUE;
735 router->backup_proto = ctx;
737 silc_packet_stream_ref(sock);
741 /* Call the resuming protocol if the protocol is active. */
742 if (router->backup) {
743 ctx = router->backup_proto;
746 for (i = 0; i < ctx->sessions_count; i++) {
747 if (session == ctx->sessions[i].session) {
748 ctx->session = session;
749 silc_schedule_task_add_timeout(server->schedule,
750 silc_server_protocol_backup,
752 silc_packet_free(packet);
757 /* If RESUMED received the session ID is zero, execute the protocol. */
758 if (type == SILC_SERVER_BACKUP_RESUMED) {
759 silc_schedule_task_add_timeout(server->schedule,
760 silc_server_protocol_backup,
762 silc_packet_free(packet);
766 SILC_LOG_ERROR(("Unknown backup resuming session %d", session));
767 silc_packet_free(packet);
771 silc_packet_free(packet);
774 /* Task that is called after backup router has connected back to
775 primary router and we are starting the resuming protocol */
777 SILC_TASK_CALLBACK(silc_server_backup_connected_later)
779 SilcServerBackupProtocolContext proto_ctx =
780 (SilcServerBackupProtocolContext)context;
781 SilcServer server = proto_ctx->server;
783 SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
784 SILC_LOG_INFO(("Starting backup resuming protocol"));
786 /* Register protocol timeout */
787 silc_schedule_task_add_timeout(server->schedule,
788 silc_server_backup_timeout,
791 /* Run the backup resuming protocol */
792 silc_schedule_task_add_timeout(server->schedule,
793 silc_server_protocol_backup,
797 SILC_TASK_CALLBACK(silc_server_backup_connected_again)
799 SilcServer server = app_context;
800 SilcServerConfigRouter *primary;
802 SILC_LOG_DEBUG(("Reconnecting"));
804 if (server->server_shutdown)
807 primary = silc_server_config_get_primary_router(server);
809 if (!silc_server_find_socket_by_host(server, SILC_CONN_ROUTER,
810 primary->host, primary->port))
811 silc_server_create_connection(server, TRUE, FALSE,
812 primary->host, primary->port,
813 silc_server_backup_connected,
818 /* Called when we've established connection back to our primary router
819 when we've acting as backup router and have replaced the primary router
820 in the cell. This function will start the backup resuming protocol. */
822 void silc_server_backup_connected(SilcServer server,
823 SilcServerEntry server_entry,
826 SilcServerBackupProtocolContext proto_ctx;
827 SilcPacketStream sock;
831 SILC_LOG_DEBUG(("Connecting failed"));
832 silc_schedule_task_add_timeout(server->schedule,
833 silc_server_backup_connected_again,
838 sock = server_entry->connection;
839 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
842 proto_ctx->server = server;
843 proto_ctx->sock = sock;
844 proto_ctx->responder = FALSE;
845 proto_ctx->type = SILC_SERVER_BACKUP_START;
846 proto_ctx->start = time(0);
847 silc_packet_stream_ref(sock);
849 /* Start through scheduler */
850 silc_schedule_task_add_timeout(server->schedule,
851 silc_server_backup_connected_later,
855 SILC_TASK_CALLBACK(silc_server_backup_connect_primary_again)
857 SilcServer server = app_context;
858 SilcServerConfigRouter *primary;
860 primary = silc_server_config_get_primary_router(server);
862 if (!silc_server_find_socket_by_host(server, SILC_CONN_ROUTER,
863 primary->host, primary->port))
864 silc_server_create_connection(server, TRUE, FALSE,
865 primary->host, primary->port,
866 silc_server_backup_connect_primary,
871 /* Called when normal server has connected to its primary router after
872 backup router has sent the START packet in reusming protocol. We will
873 move the protocol context from the backup router connection to the
876 static void silc_server_backup_connect_primary(SilcServer server,
877 SilcServerEntry server_entry,
880 SilcPacketStream backup_router = context;
881 SilcIDListData idata = silc_packet_get_context(backup_router);
882 SilcServerEntry router = (SilcServerEntry)idata;
883 SilcServerBackupProtocolContext ctx;
884 SilcPacketStream sock;
885 unsigned char data[2];
889 silc_schedule_task_add_timeout(server->schedule,
890 silc_server_backup_connect_primary_again,
895 if (!router->backup || !server_entry->connection) {
896 silc_packet_stream_unref(backup_router);
900 ctx = router->backup_proto;
901 sock = server_entry->connection;
902 idata = (SilcIDListData)server_entry;
904 SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
905 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
908 /* Send the CONNECTED packet back to the backup router. */
909 data[0] = SILC_SERVER_BACKUP_CONNECTED;
910 data[1] = ctx->session;
911 silc_server_packet_send(server, backup_router,
912 SILC_PACKET_RESUME_ROUTER, 0, data, 2);
914 /* The primary connection is disabled until it sends the RESUMED packet
916 idata->status |= SILC_IDLIST_STATUS_DISABLED;
918 /* Move this protocol context from this backup router connection to
919 the primary router connection since it will send the subsequent
920 packets in this protocol. We don't talk with backup router
923 silc_packet_stream_unref(ctx->sock);
925 silc_packet_stream_ref(sock);
926 server_entry->backup = TRUE;
927 server_entry->backup_proto = ctx;
928 router->backup = FALSE;
929 router->backup_proto = NULL;
932 silc_packet_stream_unref(backup_router);
935 /* Timeout callback used by the backup router to send the ENDING packet
936 to primary router to indicate that it can now resume as being primary
937 router. All CONNECTED packets has been received when we reach this. */
939 SILC_TASK_CALLBACK(silc_server_backup_send_resumed)
941 SilcServerBackupProtocolContext ctx = context;
942 SilcServer server = ctx->server;
943 unsigned char data[2];
946 SILC_LOG_DEBUG(("Start"));
948 for (i = 0; i < ctx->sessions_count; i++)
949 if (ctx->sessions[i].server_entry == silc_packet_get_context(ctx->sock))
950 ctx->session = ctx->sessions[i].session;
952 /* We've received all the CONNECTED packets and now we'll send the
953 ENDING packet to the new primary router. */
954 data[0] = SILC_SERVER_BACKUP_ENDING;
955 data[1] = ctx->session;
956 silc_server_packet_send(server, ctx->sock, SILC_PACKET_RESUME_ROUTER, 0,
959 /* The protocol will go to END state. */
963 /* Backup resuming protocol. This protocol is executed when the primary
964 router wants to resume its position as being primary router. */
966 SILC_TASK_CALLBACK(silc_server_protocol_backup)
968 SilcServerBackupProtocolContext ctx = context;
969 SilcServer server = ctx->server;
970 SilcServerEntry server_entry = NULL;
971 SilcPacketStream sock = NULL;
972 unsigned char data[2];
981 if (ctx->responder == FALSE) {
983 * Initiator (backup router)
986 /* Send the START packet to primary router and normal servers. The
987 packet will indicate to the primary router that it has been replaced
988 by us. For normal servers it means that we will be resigning as
989 being primary router shortly. */
990 list = silc_packet_engine_get_streams(server->packet_engine);
994 silc_dlist_start(list);
995 while ((sock = silc_dlist_get(list))) {
996 server_entry = silc_packet_get_context(sock);
998 if (!server_entry || server_entry == server->id_entry ||
999 (server_entry->data.conn_type != SILC_CONN_ROUTER &&
1000 server_entry->data.conn_type != SILC_CONN_SERVER))
1003 if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
1006 ctx->sessions = silc_realloc(ctx->sessions,
1007 sizeof(*ctx->sessions) *
1008 (ctx->sessions_count + 1));
1009 ctx->sessions[ctx->sessions_count].session = ctx->sessions_count;
1010 ctx->sessions[ctx->sessions_count].connected = FALSE;
1011 ctx->sessions[ctx->sessions_count].server_entry = server_entry;
1013 SILC_LOG_DEBUG(("Sending START to %s (session %d)",
1014 server_entry->server_name, ctx->sessions_count));
1015 SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
1016 server_entry->server_name, ctx->sessions_count));
1018 /* This connection is performing this protocol too now */
1019 server_entry->backup = TRUE;
1020 server_entry->backup_proto = ctx;
1022 data[0] = SILC_SERVER_BACKUP_START;
1023 data[1] = ctx->sessions_count;
1024 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1025 data, sizeof(data));
1026 ctx->sessions_count++;
1028 silc_packet_engine_free_streams_list(list);
1030 /* Announce data to the new primary to be. */
1031 silc_server_announce_servers(server, TRUE, 0, ctx->sock);
1032 silc_server_announce_clients(server, 0, ctx->sock);
1033 silc_server_announce_channels(server, 0, ctx->sock);
1039 * Responder (all servers and routers)
1041 SilcServerConfigRouter *primary;
1043 /* We should have received START packet */
1044 if (ctx->type != SILC_SERVER_BACKUP_START) {
1045 SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
1049 /* Connect to the primary router that was down that is now supposed
1050 to be back online. We send the CONNECTED packet after we've
1051 established the connection to the primary router. */
1052 primary = silc_server_config_get_primary_router(server);
1053 if (primary && server->backup_primary &&
1054 !silc_server_num_sockets_by_remote(server,
1055 silc_net_is_ip(primary->host) ?
1056 primary->host : NULL,
1057 silc_net_is_ip(primary->host) ?
1058 NULL : primary->host,
1060 SILC_CONN_ROUTER)) {
1061 SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
1063 silc_packet_stream_ref(ctx->sock);
1064 silc_server_create_connection(server, TRUE, FALSE,
1065 primary->host, primary->port,
1066 silc_server_backup_connect_primary,
1069 /* Nowhere to connect just return the CONNECTED packet */
1070 SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
1072 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1075 /* Send the CONNECTED packet back to the backup router. */
1076 data[0] = SILC_SERVER_BACKUP_CONNECTED;
1077 data[1] = ctx->session;
1078 silc_server_packet_send(server, ctx->sock,
1079 SILC_PACKET_RESUME_ROUTER, 0,
1080 data, sizeof(data));
1083 /* Add this resuming session */
1084 ctx->sessions = silc_realloc(ctx->sessions,
1085 sizeof(*ctx->sessions) *
1086 (ctx->sessions_count + 1));
1087 ctx->sessions[ctx->sessions_count].session = ctx->session;
1088 ctx->sessions_count++;
1090 /* Normal server goes directly to the END state. */
1091 if (server->server_type == SILC_ROUTER &&
1093 server->router->data.status & SILC_IDLIST_STATUS_DISABLED))
1101 if (ctx->responder == FALSE) {
1103 * Initiator (backup router)
1106 /* We should have received CONNECTED packet */
1107 if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
1108 SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
1112 for (i = 0; i < ctx->sessions_count; i++) {
1113 if (ctx->sessions[i].session == ctx->session) {
1114 ctx->sessions[i].connected = TRUE;
1115 SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
1116 ctx->sessions[i].server_entry->server_name,
1118 SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
1123 /* See if all returned CONNECTED, if not, then continue waiting. */
1124 for (i = 0; i < ctx->sessions_count; i++) {
1125 if (!ctx->sessions[i].connected)
1129 SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
1131 SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
1133 /* The ENDING is sent with timeout, and then we continue to the
1134 END state in the protocol. */
1135 silc_schedule_task_add_timeout(server->schedule,
1136 silc_server_backup_send_resumed,
1142 * Responder (primary router)
1145 /* We should have been received ENDING packet */
1146 if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
1147 SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
1151 SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now"));
1153 /* Switch announced informations to our primary router of using the
1155 silc_server_local_servers_toggle_enabled(server, TRUE);
1156 silc_server_update_servers_by_server(server,
1157 silc_packet_get_context(ctx->sock),
1159 silc_server_update_clients_by_server(server,
1160 silc_packet_get_context(ctx->sock),
1161 server->router, TRUE);
1163 /* We as primary router now must send RESUMED packets to all servers
1164 and routers so that they know we are back. For backup router we
1165 send the packet last so that we give the backup as much time as
1166 possible to deal with message routing at this critical moment. */
1167 list = silc_packet_engine_get_streams(server->packet_engine);
1171 silc_dlist_start(list);
1172 while ((sock = silc_dlist_get(list))) {
1173 server_entry = silc_packet_get_context(sock);
1175 if (!server_entry || server_entry == server->id_entry ||
1176 (server_entry->data.conn_type != SILC_CONN_ROUTER &&
1177 server_entry->data.conn_type != SILC_CONN_SERVER))
1180 /* Send to backup last */
1181 if (sock == ctx->sock)
1184 server_entry = silc_packet_get_context(sock);
1185 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1187 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1188 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1190 /* This connection is performing this protocol too now */
1191 server_entry->backup = TRUE;
1192 server_entry->backup_proto = ctx;
1194 data[0] = SILC_SERVER_BACKUP_RESUMED;
1196 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1197 data, sizeof(data));
1200 /* Now send the same packet to backup */
1201 if (sock != ctx->sock) {
1204 server_entry = silc_packet_get_context(sock);
1205 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1207 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1208 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1210 /* This connection is performing this protocol too now */
1211 server_entry->backup = TRUE;
1212 server_entry->backup_proto = ctx;
1214 data[0] = SILC_SERVER_BACKUP_RESUMED;
1216 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1217 data, sizeof(data));
1219 silc_packet_engine_free_streams_list(list);
1221 /* We are now resumed and are back as primary router in the cell. */
1222 SILC_LOG_INFO(("We are now the primary router of our cell again"));
1223 server->wait_backup = FALSE;
1225 /* Announce WATCH list a little later */
1226 silc_packet_stream_ref(ctx->sock);
1227 silc_schedule_task_add_timeout(server->schedule,
1228 silc_server_backup_announce_watches,
1231 /* For us this is the end of this protocol. */
1232 silc_schedule_task_add_timeout(server->schedule,
1233 silc_server_protocol_backup_done,
1241 * Responder (backup router, servers, and remote router)
1243 SilcServerEntry router, backup_router;
1245 /* We should have been received RESUMED from our primary router. */
1246 if (ctx->type != SILC_SERVER_BACKUP_RESUMED) {
1247 SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
1251 SILC_LOG_INFO(("Received RESUMED from new primary router"));
1253 /* If we are the backup router, mark that we are no longer primary
1254 but are back to backup router status. */
1255 if (server->backup_router)
1256 server->server_type = SILC_BACKUP_ROUTER;
1258 /* We have now new primary router. All traffic goes there from now on. */
1259 router = silc_packet_get_context(ctx->sock);
1260 if (silc_server_backup_replaced_get(server, router->id,
1263 if (backup_router == server->router) {
1264 /* We have new primary router now */
1265 server->id_entry->router = router;
1266 server->router = router;
1267 SILC_LOG_INFO(("Switching back to primary router %s",
1268 server->router->server_name));
1270 /* We are connected to new primary and now continue using it */
1271 SILC_LOG_INFO(("Resuming the use of primary router %s",
1272 router->server_name));
1274 server->backup_primary = FALSE;
1275 sock = router->connection;
1277 /* Update the client entries of the backup router to the new
1279 silc_server_local_servers_toggle_enabled(server, FALSE);
1280 router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1281 silc_server_update_servers_by_server(server, backup_router, router);
1282 silc_server_update_clients_by_server(
1283 server, NULL, router,
1284 server->server_type == SILC_BACKUP_ROUTER);
1285 if (server->server_type == SILC_SERVER)
1286 silc_server_update_channels_by_server(server, backup_router, router);
1287 silc_server_backup_replaced_del(server, backup_router);
1290 /* Send notify about primary router going down to local operators */
1291 SILC_SERVER_SEND_OPERS(server, FALSE, TRUE,
1292 SILC_NOTIFY_TYPE_NONE,
1293 ("%s resumed the use of primary router %s",
1294 server->server_name,
1295 server->router->server_name));
1297 /* Protocol has ended, call the final callback */
1298 silc_schedule_task_add_timeout(server->schedule,
1299 silc_server_protocol_backup_done,
1305 /* Protocol has ended, call the final callback */
1306 silc_schedule_task_add_timeout(server->schedule,
1307 silc_server_protocol_backup_done,
1312 /* Protocol has ended, call the final callback */
1313 SILC_LOG_ERROR(("Error during backup resume: received Failure"));
1314 ctx->received_failure = TRUE;
1315 silc_schedule_task_add_timeout(server->schedule,
1316 silc_server_protocol_backup_done,
1325 /* Final resuming protocol completion callback */
1327 SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
1329 SilcServerBackupProtocolContext ctx = context;
1330 SilcServer server = ctx->server;
1332 SilcServerEntry server_entry;
1333 SilcPacketStream sock;
1336 silc_schedule_task_del_by_context(server->schedule, ctx);
1341 SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
1343 if (server->server_shutdown)
1346 /* Remove this protocol from all server entries that has it */
1347 list = silc_packet_engine_get_streams(server->packet_engine);
1351 silc_dlist_start(list);
1352 while ((sock = silc_dlist_get(list))) {
1353 server_entry = silc_packet_get_context(sock);
1357 if (server_entry->data.conn_type != SILC_CONN_ROUTER &&
1358 server_entry->data.conn_type != SILC_CONN_SERVER)
1361 if (server_entry->backup_proto == ctx) {
1364 if (server->server_type == SILC_SERVER &&
1365 server_entry->server_type == SILC_ROUTER)
1369 if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
1370 if (ctx->sock == sock) {
1371 silc_packet_stream_unref(sock);
1375 /* If failed after 10 attempts, it won't work, give up */
1376 if (ctx->initiator_restart > 10)
1377 ctx->received_failure = TRUE;
1379 if (!ctx->received_failure) {
1380 /* Protocol error, probably timeout. Just restart the protocol. */
1381 SilcServerBackupProtocolContext proto_ctx;
1383 /* Restart the protocol. */
1384 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
1387 proto_ctx->server = server;
1388 proto_ctx->sock = sock;
1389 proto_ctx->responder = FALSE;
1390 proto_ctx->type = SILC_SERVER_BACKUP_START;
1391 proto_ctx->start = time(0);
1392 proto_ctx->initiator_restart = ctx->initiator_restart + 1;
1393 silc_packet_stream_ref(sock);
1395 /* Start through scheduler */
1396 silc_schedule_task_add_timeout(server->schedule,
1397 silc_server_backup_connected_later,
1400 /* If failure was received, switch back to normal backup router.
1401 For some reason primary wouldn't accept that we were supposed
1402 to perfom resuming protocol. */
1403 server->server_type = SILC_BACKUP_ROUTER;
1404 silc_server_local_servers_toggle_enabled(server, FALSE);
1405 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1406 silc_server_update_servers_by_server(server, server->id_entry,
1407 silc_packet_get_context(sock));
1408 silc_server_update_clients_by_server(server, NULL,
1409 silc_packet_get_context(sock),
1412 /* Announce our clients and channels to the router */
1413 silc_server_announce_clients(server, 0, sock);
1414 silc_server_announce_channels(server, 0, sock);
1416 /* Announce WATCH list a little later */
1417 silc_packet_stream_ref(sock);
1418 silc_schedule_task_add_timeout(server->schedule,
1419 silc_server_backup_announce_watches,
1427 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1430 silc_packet_engine_free_streams_list(list);
1433 SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
1435 if (ctx->type == SILC_SERVER_BACKUP_RESUMED && server->router) {
1436 /* Announce all of our information to the router. */
1437 if (server->server_type == SILC_ROUTER)
1438 silc_server_announce_servers(server, FALSE, 0,
1439 server->router->connection);
1441 /* Announce our clients and channels to the router */
1442 silc_server_announce_clients(server, 0, server->router->connection);
1443 silc_server_announce_channels(server, 0, server->router->connection);
1445 /* Announce WATCH list a little later */
1446 silc_packet_stream_ref(server->router->connection);
1447 silc_schedule_task_add_timeout(server->schedule,
1448 silc_server_backup_announce_watches,
1449 server->router->connection, 4, 0);
1454 if (server->server_type == SILC_SERVER) {
1455 /* If we are still using backup router Send confirmation to backup
1456 that using it is still ok and continue sending traffic there.
1457 The backup will reply with error if it's not ok. */
1458 if (server->router && server->backup_primary) {
1459 /* Send START_USE just in case using backup wouldn't be ok. */
1460 silc_server_backup_send_start_use(server, server->router->connection,
1463 /* Check couple of times same START_USE just in case. */
1464 silc_packet_stream_ref(server->router->connection);
1465 silc_schedule_task_add_timeout(server->schedule,
1466 silc_server_backup_check_status,
1467 server->router->connection,
1469 silc_packet_stream_ref(server->router->connection);
1470 silc_schedule_task_add_timeout(server->schedule,
1471 silc_server_backup_check_status,
1472 server->router->connection,
1474 silc_packet_stream_ref(server->router->connection);
1475 silc_schedule_task_add_timeout(server->schedule,
1476 silc_server_backup_check_status,
1477 server->router->connection,
1484 SilcServerEntry r = silc_packet_get_context(ctx->sock);
1487 r->backup_proto = NULL;
1489 silc_packet_stream_unref(ctx->sock);
1491 silc_free(ctx->sessions);
1495 SILC_TASK_CALLBACK(silc_server_backup_announce_watches)
1497 SilcPacketStream sock = context;
1498 SilcServer server = app_context;
1499 if (silc_packet_stream_is_valid(sock))
1500 silc_server_announce_watches(server, sock);
1501 silc_packet_stream_unref(sock);