5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2001 - 2003 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
21 #include "serverincludes.h"
22 #include "server_internal.h"
24 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
25 static void silc_server_backup_connect_primary(SilcServer server,
26 SilcServerEntry server_entry,
30 /************************** Types and Definitions ***************************/
34 SilcServerEntry server;
38 } SilcServerBackupEntry;
40 /* Holds IP address and port of the primary router that was replaced
45 SilcServerEntry server; /* Backup router that replaced the primary */
46 } SilcServerBackupReplaced;
49 struct SilcServerBackupStruct {
50 SilcServerBackupEntry *servers;
51 SilcUInt32 servers_count;
52 SilcServerBackupReplaced **replaced;
53 SilcUInt32 replaced_count;
59 SilcServerEntry server_entry;
60 } SilcServerBackupProtocolSession;
62 /* Backup resuming protocol context */
65 SilcSocketConnection sock;
68 SilcServerBackupProtocolSession *sessions;
69 SilcUInt32 sessions_count;
71 unsigned int responder : 1;
72 unsigned int received_failure : 1;
73 unsigned int timeout : 1;
74 } *SilcServerBackupProtocolContext;
77 /********************* Backup Configuration Routines ************************/
79 /* Adds the `backup_server' to be one of our backup router. This can be
80 called multiple times to set multiple backup routers. The `ip' and `port'
81 is the IP and port that the `backup_router' will replace if the `ip'
82 will become unresponsive. If `local' is TRUE then the `backup_server' is
83 in the local cell, if FALSE it is in some other cell. */
85 void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
86 const char *ip, int port, bool local)
93 if (!server->backup) {
94 server->backup = silc_calloc(1, sizeof(*server->backup));
99 /* See if already added */
100 for (i = 0; i < server->backup->servers_count; i++) {
101 if (server->backup->servers[i].server == backup_server)
105 SILC_LOG_DEBUG(("Backup router %s will replace %s",
106 ((SilcSocketConnection)backup_server->connection)->ip,
109 for (i = 0; i < server->backup->servers_count; i++) {
110 if (!server->backup->servers[i].server) {
111 server->backup->servers[i].server = backup_server;
112 server->backup->servers[i].local = local;
113 server->backup->servers[i].port = SILC_SWAB_16(port);
114 memset(server->backup->servers[i].ip.data, 0,
115 sizeof(server->backup->servers[i].ip.data));
116 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
117 sizeof(server->backup->servers[i].ip.data));
122 i = server->backup->servers_count;
123 server->backup->servers = silc_realloc(server->backup->servers,
124 sizeof(*server->backup->servers) *
126 server->backup->servers[i].server = backup_server;
127 server->backup->servers[i].local = local;
128 server->backup->servers[i].port = SILC_SWAB_16(port);
129 memset(server->backup->servers[i].ip.data, 0,
130 sizeof(server->backup->servers[i].ip.data));
131 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
132 sizeof(server->backup->servers[i].ip.data));
133 server->backup->servers_count++;
136 /* Returns backup router for IP and port in `server_id' or NULL if there
137 does not exist backup router. */
139 SilcServerEntry silc_server_backup_get(SilcServer server,
140 SilcServerID *server_id)
147 for (i = 0; i < server->backup->servers_count; i++) {
148 if (server->backup->servers[i].server &&
149 server->backup->servers[i].port == server_id->port &&
150 !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
151 sizeof(server_id->ip.data))) {
152 SILC_LOG_DEBUG(("Found backup router %s for %s",
153 server->backup->servers[i].server->server_name,
154 silc_id_render(server_id, SILC_ID_SERVER)));
155 return server->backup->servers[i].server;
162 /* Deletes the backup server `server_entry'. */
164 void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
171 for (i = 0; i < server->backup->servers_count; i++) {
172 if (server->backup->servers[i].server == server_entry) {
173 SILC_LOG_DEBUG(("Removing %s as backup router",
174 silc_id_render(server->backup->servers[i].server->id,
176 server->backup->servers[i].server = NULL;
177 memset(server->backup->servers[i].ip.data, 0,
178 sizeof(server->backup->servers[i].ip.data));
183 /* Frees all data allocated for backup routers. Call this after deleting
184 all backup routers and when new routers are added no more, for example
185 when shutting down the server. */
187 void silc_server_backup_free(SilcServer server)
194 /* Delete existing servers if caller didn't do it */
195 for (i = 0; i < server->backup->servers_count; i++) {
196 if (server->backup->servers[i].server)
197 silc_server_backup_del(server, server->backup->servers[i].server);
200 silc_free(server->backup->servers);
201 silc_free(server->backup);
202 server->backup = NULL;
205 /* Marks the IP address and port from the `server_id' as being replaced
206 by backup router indicated by the `server'. If the router connects at
207 a later time we can check whether it has been replaced by an backup
210 void silc_server_backup_replaced_add(SilcServer server,
211 SilcServerID *server_id,
212 SilcServerEntry server_entry)
215 SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));;
218 server->backup = silc_calloc(1, sizeof(*server->backup));
219 if (!server->backup->replaced) {
220 server->backup->replaced =
221 silc_calloc(1, sizeof(*server->backup->replaced));
222 server->backup->replaced_count = 1;
225 SILC_LOG_DEBUG(("Replacing router %s with %s",
226 silc_id_render(server_id, SILC_ID_SERVER),
227 server_entry->server_name));
229 memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip));
230 r->server = server_entry;
232 for (i = 0; i < server->backup->replaced_count; i++) {
233 if (!server->backup->replaced[i]) {
234 server->backup->replaced[i] = r;
239 i = server->backup->replaced_count;
240 server->backup->replaced = silc_realloc(server->backup->replaced,
241 sizeof(*server->backup->replaced) *
243 server->backup->replaced[i] = r;
244 server->backup->replaced_count++;
247 /* Checks whether the IP address and port from the `server_id' has been
248 replaced by an backup router. If it has been then this returns TRUE
249 and the bacup router entry to the `server' pointer if non-NULL. Returns
250 FALSE if the router is not replaced by backup router. */
252 bool silc_server_backup_replaced_get(SilcServer server,
253 SilcServerID *server_id,
254 SilcServerEntry *server_entry)
258 if (!server->backup || !server->backup->replaced)
261 for (i = 0; i < server->backup->replaced_count; i++) {
262 if (!server->backup->replaced[i])
264 if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
265 sizeof(server_id->ip.data))) {
267 *server_entry = server->backup->replaced[i]->server;
268 SILC_LOG_DEBUG(("Router %s is replaced by %s",
269 silc_id_render(server_id, SILC_ID_SERVER),
270 server->backup->replaced[i]->server->server_name));
275 SILC_LOG_DEBUG(("Router %s is not replaced by backup router",
276 silc_id_render(server_id, SILC_ID_SERVER)));
280 /* Deletes a replaced host by the set `server_entry. */
282 void silc_server_backup_replaced_del(SilcServer server,
283 SilcServerEntry server_entry)
287 if (!server->backup || !server->backup->replaced)
290 for (i = 0; i < server->backup->replaced_count; i++) {
291 if (!server->backup->replaced[i])
293 if (server->backup->replaced[i]->server == server_entry) {
294 silc_free(server->backup->replaced[i]);
295 server->backup->replaced[i] = NULL;
301 /* Broadcast the received packet indicated by `packet' to all of our backup
302 routers. All router wide information is passed using broadcast packets.
303 That is why all backup routers need to get this data too. It is expected
304 that the caller already knows that the `packet' is broadcast packet. */
306 void silc_server_backup_broadcast(SilcServer server,
307 SilcSocketConnection sender,
308 SilcPacketContext *packet)
310 SilcServerEntry backup;
311 SilcSocketConnection sock;
313 const SilcBufferStruct p;
314 SilcIDListData idata;
317 if (!server->backup || server->server_type != SILC_ROUTER)
320 SILC_LOG_DEBUG(("Broadcasting received packet to backup routers"));
322 buffer = packet->buffer;
323 silc_buffer_push(buffer, buffer->data - buffer->head);
325 for (i = 0; i < server->backup->servers_count; i++) {
326 backup = server->backup->servers[i].server;
328 if (!backup || backup->connection == sender ||
329 server->backup->servers[i].local == FALSE)
331 if (server->backup->servers[i].server == server->id_entry)
334 idata = (SilcIDListData)backup;
335 sock = backup->connection;
337 if (!silc_packet_send_prepare(sock, 0, 0, buffer->len, idata->hmac_send,
338 (const SilcBuffer)&p)) {
339 SILC_LOG_ERROR(("Cannot send packet"));
342 silc_buffer_put((SilcBuffer)&p, buffer->data, buffer->len);
343 silc_packet_encrypt(idata->send_key, idata->hmac_send, idata->psn_send++,
344 (SilcBuffer)&p, p.len);
346 SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", p.len), p.data, p.len);
348 /* Now actually send the packet */
349 silc_server_packet_send_real(server, sock, FALSE);
351 /* Check for mandatory rekey */
352 if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD)
353 silc_schedule_task_add(server->schedule, sender->sock,
354 silc_server_rekey_callback, sender, 0, 1,
355 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
359 /* A generic routine to send data to all backup routers. If the `sender'
360 is provided it will indicate the original sender of the packet and the
361 packet won't be resent to that entity. The `data' is the data that will
362 be assembled to packet context before sending. The packet will be
363 encrypted this function. If the `force_send' is TRUE the data is sent
364 immediately and not put to queue. If `local' is TRUE then the packet
365 will be sent only to local backup routers inside the cell. If false the
366 packet can go from one cell to the other. This function has no effect
367 if there are no any backup routers. */
369 void silc_server_backup_send(SilcServer server,
370 SilcServerEntry sender,
372 SilcPacketFlags flags,
378 SilcServerEntry backup;
379 SilcSocketConnection sock;
382 if (!server->backup || server->server_type != SILC_ROUTER)
385 for (i = 0; i < server->backup->servers_count; i++) {
386 backup = server->backup->servers[i].server;
387 if (!backup || sender == backup)
389 if (local && server->backup->servers[i].local == FALSE)
391 if (server->backup->servers[i].server == server->id_entry)
394 sock = backup->connection;
396 SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
397 silc_get_packet_name(type), sock->hostname, sock->ip));
399 silc_server_packet_send(server, backup->connection, type, flags,
400 data, data_len, force_send);
404 /* Same as silc_server_backup_send but sets a specific Destination ID to
405 the packet. The Destination ID is indicated by the `dst_id' and the
406 ID type `dst_id_type'. For example, packets destined to channels must
407 be sent using this function. */
409 void silc_server_backup_send_dest(SilcServer server,
410 SilcServerEntry sender,
412 SilcPacketFlags flags,
414 SilcIdType dst_id_type,
420 SilcServerEntry backup;
421 SilcSocketConnection sock;
424 if (!server->backup || server->server_type != SILC_ROUTER)
427 for (i = 0; i < server->backup->servers_count; i++) {
428 backup = server->backup->servers[i].server;
429 if (!backup || sender == backup)
431 if (local && server->backup->servers[i].local == FALSE)
433 if (server->backup->servers[i].server == server->id_entry)
436 sock = backup->connection;
438 SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
439 silc_get_packet_name(type), sock->hostname, sock->ip));
441 silc_server_packet_send_dest(server, backup->connection, type, flags,
442 dst_id, dst_id_type, data, data_len,
447 /* Send the START_USE indication to remote connection. If `failure' is
448 TRUE then this sends SILC_PACKET_FAILURE. Otherwise it sends
449 SILC_PACKET_RESUME_ROUTER. */
451 void silc_server_backup_send_start_use(SilcServer server,
452 SilcSocketConnection sock,
455 unsigned char data[4];
457 SILC_LOG_DEBUG(("Sending START_USE (%s) to %s",
458 failure ? "failure" : "success", sock->ip));
461 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START_USE, data);
462 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
465 data[0] = SILC_SERVER_BACKUP_START_USE;
467 silc_server_packet_send(server, sock,
468 SILC_PACKET_RESUME_ROUTER, 0,
473 /* Send the REPLACED indication to remote router. This is send by the
474 primary router (remote router) of the primary router that came back
475 online. This is not sent by backup router or any other server. */
477 void silc_server_backup_send_replaced(SilcServer server,
478 SilcSocketConnection sock)
480 unsigned char data[4];
482 SILC_LOG_DEBUG(("Sending REPLACED (%s) to %s", sock->ip));
484 data[0] = SILC_SERVER_BACKUP_REPLACED;
486 silc_server_packet_send(server, sock,
487 SILC_PACKET_RESUME_ROUTER, 0,
492 /************************ Backup Resuming Protocol **************************/
494 /* Timeout callback for protocol */
496 SILC_TASK_CALLBACK(silc_server_backup_timeout)
498 SilcProtocol protocol = context;
499 SilcServerBackupProtocolContext ctx = protocol->context;
500 SilcServer server = app_context;
502 SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
504 silc_protocol_cancel(protocol, server->schedule);
505 protocol->state = SILC_PROTOCOL_STATE_ERROR;
506 silc_protocol_execute_final(protocol, server->schedule);
509 /* Callback to start the protocol as responder */
511 SILC_TASK_CALLBACK(silc_server_backup_responder_start)
513 SilcServerBackupProtocolContext proto_ctx = context;
514 SilcSocketConnection sock = proto_ctx->sock;
515 SilcServer server = app_context;
517 /* If other protocol is executing at the same time, start with timeout. */
518 if (sock->protocol) {
519 SILC_LOG_DEBUG(("Other protocol is executing, wait for it to finish"));
520 silc_schedule_task_add(server->schedule, sock->sock,
521 silc_server_backup_responder_start,
523 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
527 /* Run the backup resuming protocol */
528 silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
529 &sock->protocol, proto_ctx,
530 silc_server_protocol_backup_done);
531 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
532 silc_schedule_task_add(server->schedule, sock->sock,
533 silc_server_backup_timeout,
534 sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
535 SILC_TASK_PRI_NORMAL);
538 /* Callback to send START_USE to backup to check whether using backup
541 SILC_TASK_CALLBACK(silc_server_backup_check_status)
543 SilcSocketConnection sock = context;
544 SilcServer server = app_context;
546 /* Check whether we are still using backup */
547 if (!server->backup_primary)
550 silc_server_backup_send_start_use(server, sock, FALSE);
551 silc_socket_free(sock); /* unref */
556 SilcSocketConnection sock;
557 SilcPacketContext *packet;
558 } *SilcServerBackupPing;
560 /* PING command reply callback */
562 void silc_server_backup_ping_reply(void *context, void *reply)
564 SilcServerBackupPing pc = context;
565 SilcServerCommandReplyContext cmdr = reply;
567 if (cmdr && !silc_command_get_status(cmdr->payload, NULL, NULL)) {
568 /* Timeout error occurred, the primary is really down. */
569 SilcSocketConnection primary = SILC_PRIMARY_ROUTE(pc->server);
571 SILC_LOG_DEBUG(("PING timeout, primary is down"));
574 if (primary->user_data)
575 silc_server_free_sock_user_data(pc->server, primary, NULL);
576 SILC_SET_DISCONNECTING(primary);
577 silc_server_close_connection(pc->server, primary);
580 /* Reprocess the RESUME_ROUTER packet */
581 silc_server_backup_resume_router(pc->server, pc->sock, pc->packet);
583 /* The primary is not down, refuse to serve the server as primary */
584 SILC_LOG_DEBUG(("PING received, primary is up"));
585 silc_server_backup_send_start_use(pc->server, pc->sock, TRUE);
588 silc_socket_free(pc->sock);
589 silc_packet_context_free(pc->packet);
593 /* Processes incoming RESUME_ROUTER packet. This can give the packet
594 for processing to the protocol handler or allocate new protocol if
595 start command is received. */
597 void silc_server_backup_resume_router(SilcServer server,
598 SilcSocketConnection sock,
599 SilcPacketContext *packet)
601 SilcUInt8 type, session;
602 SilcServerBackupProtocolContext ctx;
603 SilcIDListData idata;
606 SILC_LOG_DEBUG(("Received RESUME_ROUTER packet"));
608 if (sock->type == SILC_SOCKET_TYPE_CLIENT ||
609 sock->type == SILC_SOCKET_TYPE_UNKNOWN) {
610 SILC_LOG_DEBUG(("Bad packet received"));
614 idata = (SilcIDListData)sock->user_data;
616 ret = silc_buffer_unformat(packet->buffer,
617 SILC_STR_UI_CHAR(&type),
618 SILC_STR_UI_CHAR(&session),
621 SILC_LOG_ERROR(("Malformed resume router packet received"));
625 /* Check whether this packet is used to tell us that server will start
626 using us as primary router. */
627 if (type == SILC_SERVER_BACKUP_START_USE) {
629 SilcServerBackupPing pc;
631 /* If we are normal server then backup router has sent us back
632 this reply and we use the backup as primary router now. */
633 if (server->server_type == SILC_SERVER) {
634 /* Nothing to do here actually, since we have switched already. */
635 SILC_LOG_DEBUG(("Received successful START_USE from backup router"));
639 /* Backup router following. */
641 /* If we are marked as router then the primary is down and we send
642 success START_USE back to the server. */
643 if (server->server_type == SILC_ROUTER) {
644 SILC_LOG_DEBUG(("Sending success START_USE back to %s", sock->ip));
645 silc_server_backup_send_start_use(server, sock, FALSE);
649 /* We have just lost primary, send success START_USE back */
650 if (server->standalone) {
651 SILC_LOG_DEBUG(("We are stanalone, sending success START_USE back to %s",
653 silc_server_backup_send_start_use(server, sock, FALSE);
657 /* We are backup router. This server claims that our primary is down.
658 We will check this ourselves by sending PING command to the primary. */
659 SILC_LOG_DEBUG(("Sending PING to detect status of primary router"));
660 idp = silc_id_payload_encode(server->router->id, SILC_ID_SERVER);
661 silc_server_send_command(server, SILC_PRIMARY_ROUTE(server),
662 SILC_COMMAND_PING, ++server->cmd_ident, 1,
663 1, idp->data, idp->len);
664 silc_buffer_free(idp);
666 /* Reprocess this packet after received reply from router */
667 pc = silc_calloc(1, sizeof(*pc));
669 pc->sock = silc_socket_dup(sock);
670 pc->packet = silc_packet_context_dup(packet);
671 silc_server_command_pending_timed(server, SILC_COMMAND_PING,
673 silc_server_backup_ping_reply, pc, 15);
678 /* Start the resuming protocol if requested. */
679 if (type == SILC_SERVER_BACKUP_START) {
680 /* We have received a start for resuming protocol. We are either
681 primary router that came back online or normal server. */
682 SilcServerBackupProtocolContext proto_ctx;
684 /* If backup had closed the connection earlier we won't allow resuming
685 since we (primary router) have never gone away. */
686 if (server->server_type == SILC_ROUTER && !server->backup_router &&
687 server->backup_closed) {
688 unsigned char data[4];
689 SILC_LOG_DEBUG(("Backup resuming not allowed since we are still "
691 SILC_LOG_INFO(("Backup resuming not allowed since we are still "
693 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
694 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
696 server->backup_closed = FALSE;
700 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
701 proto_ctx->server = server;
702 proto_ctx->sock = silc_socket_dup(sock);
703 proto_ctx->responder = TRUE;
704 proto_ctx->type = type;
705 proto_ctx->session = session;
706 proto_ctx->start = time(0);
708 SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
709 SILC_LOG_INFO(("Starting backup resuming protocol"));
711 /* Start protocol immediately */
712 silc_schedule_task_add(server->schedule, sock->sock,
713 silc_server_backup_responder_start,
715 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
720 /* If we are router and the packet is coming from our primary router
721 then it means we have been replaced by an backup router in our cell. */
722 if (type == SILC_SERVER_BACKUP_REPLACED &&
723 server->server_type == SILC_ROUTER &&
724 sock->type == SILC_SOCKET_TYPE_ROUTER &&
725 SILC_PRIMARY_ROUTE(server) == sock) {
726 /* We have been replaced by an backup router in our cell. We must
727 mark our primary router connection disabled since we are not allowed
728 to use it at this moment. */
729 SILC_LOG_INFO(("We are replaced by an backup router in this cell, will "
730 "wait until backup resuming protocol is executed"));
731 idata->status |= SILC_IDLIST_STATUS_DISABLED;
736 /* Activate the shared protocol context for this socket connection
738 if (type == SILC_SERVER_BACKUP_RESUMED &&
739 sock->type == SILC_SOCKET_TYPE_ROUTER && !sock->protocol &&
740 idata->status & SILC_IDLIST_STATUS_DISABLED) {
741 SilcServerEntry backup_router;
743 if (silc_server_backup_replaced_get(server, ((SilcServerEntry)idata)->id,
745 SilcSocketConnection bsock =
746 (SilcSocketConnection)backup_router->connection;
747 if (bsock->protocol && bsock->protocol->protocol &&
748 bsock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) {
749 sock->protocol = bsock->protocol;
750 ctx = sock->protocol->context;
752 silc_socket_free(ctx->sock); /* unref */
753 ctx->sock = silc_socket_dup(sock);
759 /* Call the resuming protocol if the protocol is active. */
760 if (SILC_SERVER_IS_BACKUP(sock)) {
761 ctx = sock->protocol->context;
764 for (i = 0; i < ctx->sessions_count; i++) {
765 if (session == ctx->sessions[i].session) {
766 ctx->session = session;
767 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
772 /* If RESUMED received the session ID is zero, execute the protocol. */
773 if (type == SILC_SERVER_BACKUP_RESUMED) {
774 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
778 SILC_LOG_ERROR(("Unknown backup resuming session %d", session));
783 /* Timeout task callback to connect to remote router */
785 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
787 SilcServer server = app_context;
788 SilcServerConnection sconn = (SilcServerConnection)context;
790 const char *server_ip;
792 SILC_LOG_DEBUG(("Connecting to router %s:%d", sconn->remote_host,
793 sconn->remote_port));
795 /* Connect to remote host */
796 server_ip = server->config->server_info->primary == NULL ? NULL :
797 server->config->server_info->primary->server_ip;
798 sock = silc_net_create_connection(server_ip, sconn->remote_port,
801 if (server->server_type == SILC_SERVER) {
802 sconn->retry_count++;
803 if (sconn->retry_count > 3) {
804 silc_free(sconn->remote_host);
809 silc_schedule_task_add(server->schedule, 0,
810 silc_server_backup_connect_to_router,
811 context, 10, 0, SILC_TASK_TIMEOUT,
812 SILC_TASK_PRI_NORMAL);
816 /* Continue with key exchange protocol */
817 silc_server_start_key_exchange(server, sconn, sock);
820 /* Constantly tries to reconnect to a primary router indicated by the
821 `ip' and `port'. The `connected' callback will be called when the
822 connection is created. */
824 void silc_server_backup_reconnect(SilcServer server,
825 const char *ip, SilcUInt16 port,
826 SilcServerConnectRouterCallback callback,
829 SilcServerConnection sconn;
831 SILC_LOG_INFO(("Attempting to reconnect to primary router"));
833 sconn = silc_calloc(1, sizeof(*sconn));
834 sconn->remote_host = strdup(ip);
835 sconn->remote_port = port;
836 sconn->callback = callback;
837 sconn->callback_context = context;
838 sconn->no_reconnect = TRUE;
839 sconn->retry_count = 0;
840 silc_schedule_task_add(server->schedule, 0,
841 silc_server_backup_connect_to_router,
842 sconn, 1, 0, SILC_TASK_TIMEOUT,
843 SILC_TASK_PRI_NORMAL);
846 /* Task that is called after backup router has connected back to
847 primary router and we are starting the resuming protocol */
849 SILC_TASK_CALLBACK(silc_server_backup_connected_later)
851 SilcServerBackupProtocolContext proto_ctx =
852 (SilcServerBackupProtocolContext)context;
853 SilcServer server = proto_ctx->server;
854 SilcSocketConnection sock = proto_ctx->sock;
856 /* If running other protocol already run this one a bit later. */
857 if (sock->protocol) {
858 SILC_LOG_DEBUG(("Other protocol is running, wait for it to finish"));
859 silc_schedule_task_add(server->schedule, 0,
860 silc_server_backup_connected_later,
863 SILC_TASK_PRI_NORMAL);
867 SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
868 SILC_LOG_INFO(("Starting backup resuming protocol"));
870 /* Run the backup resuming protocol */
871 silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
872 &sock->protocol, proto_ctx,
873 silc_server_protocol_backup_done);
874 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
876 silc_schedule_task_add(server->schedule, sock->sock,
877 silc_server_backup_timeout,
878 sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
879 SILC_TASK_PRI_NORMAL);
882 /* Called when we've established connection back to our primary router
883 when we've acting as backup router and have replaced the primary router
884 in the cell. This function will start the backup resuming protocol. */
886 void silc_server_backup_connected(SilcServer server,
887 SilcServerEntry server_entry,
890 SilcServerBackupProtocolContext proto_ctx;
891 SilcSocketConnection sock;
895 SilcServerConfigRouter *primary;
896 primary = silc_server_config_get_primary_router(server);
898 if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
899 primary->host, primary->port))
900 silc_server_backup_reconnect(server,
901 primary->host, primary->port,
902 silc_server_backup_connected,
908 sock = (SilcSocketConnection)server_entry->connection;
909 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
910 proto_ctx->server = server;
911 proto_ctx->sock = silc_socket_dup(sock);
912 proto_ctx->responder = FALSE;
913 proto_ctx->type = SILC_SERVER_BACKUP_START;
914 proto_ctx->start = time(0);
916 /* Start through scheduler */
917 silc_schedule_task_add(server->schedule, 0,
918 silc_server_backup_connected_later,
921 SILC_TASK_PRI_NORMAL);
924 /* Called when normal server has connected to its primary router after
925 backup router has sent the START packet in reusming protocol. We will
926 move the protocol context from the backup router connection to the
929 static void silc_server_backup_connect_primary(SilcServer server,
930 SilcServerEntry server_entry,
933 SilcSocketConnection backup_router = (SilcSocketConnection)context;
934 SilcServerBackupProtocolContext ctx;
935 SilcSocketConnection sock;
936 SilcIDListData idata;
937 unsigned char data[2];
939 if (SILC_IS_DISCONNECTING(backup_router) ||
940 SILC_IS_DISCONNECTED(backup_router)) {
941 silc_socket_free(backup_router);
947 SilcServerConfigRouter *primary;
948 primary = silc_server_config_get_primary_router(server);
950 if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
951 primary->host, primary->port))
952 silc_server_backup_reconnect(server,
953 primary->host, primary->port,
954 silc_server_backup_connect_primary,
960 silc_socket_free(backup_router);
962 if (!backup_router->protocol)
964 if (!server_entry->connection)
967 ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context;
968 sock = (SilcSocketConnection)server_entry->connection;
969 idata = (SilcIDListData)server_entry;
971 SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
972 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
975 /* Send the CONNECTED packet back to the backup router. */
976 data[0] = SILC_SERVER_BACKUP_CONNECTED;
977 data[1] = ctx->session;
978 silc_server_packet_send(server, backup_router,
979 SILC_PACKET_RESUME_ROUTER, 0, data, 2, FALSE);
981 /* The primary connection is disabled until it sends the RESUMED packet
983 idata->status |= SILC_IDLIST_STATUS_DISABLED;
985 /* Move this protocol context from this backup router connection to
986 the primary router connection since it will send the subsequent
987 packets in this protocol. We don't talk with backup router
989 sock->protocol = backup_router->protocol;
991 silc_socket_free(ctx->sock); /* unref */
992 ctx->sock = silc_socket_dup(server_entry->connection);
993 backup_router->protocol = NULL;
996 /* Timeout callback used by the backup router to send the ENDING packet
997 to primary router to indicate that it can now resume as being primary
998 router. All CONNECTED packets has been received when we reach this. */
1000 SILC_TASK_CALLBACK(silc_server_backup_send_resumed)
1002 SilcProtocol protocol = (SilcProtocol)context;
1003 SilcServerBackupProtocolContext ctx = protocol->context;
1004 SilcServer server = ctx->server;
1005 unsigned char data[2];
1008 SILC_LOG_DEBUG(("Start"));
1010 for (i = 0; i < ctx->sessions_count; i++)
1011 if (ctx->sessions[i].server_entry == ctx->sock->user_data)
1012 ctx->session = ctx->sessions[i].session;
1014 /* We've received all the CONNECTED packets and now we'll send the
1015 ENDING packet to the new primary router. */
1016 data[0] = SILC_SERVER_BACKUP_ENDING;
1017 data[1] = ctx->session;
1018 silc_server_packet_send(server, ctx->sock, SILC_PACKET_RESUME_ROUTER, 0,
1019 data, sizeof(data), FALSE);
1021 /* The protocol will go to END state. */
1022 protocol->state = SILC_PROTOCOL_STATE_END;
1025 /* Backup resuming protocol. This protocol is executed when the primary
1026 router wants to resume its position as being primary router. */
1028 SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
1030 SilcProtocol protocol = (SilcProtocol)context;
1031 SilcServerBackupProtocolContext ctx = protocol->context;
1032 SilcServer server = ctx->server;
1033 SilcServerEntry server_entry;
1034 SilcSocketConnection sock = NULL;
1035 unsigned char data[2];
1038 if (protocol->state == SILC_PROTOCOL_STATE_UNKNOWN)
1039 protocol->state = SILC_PROTOCOL_STATE_START;
1041 switch(protocol->state) {
1042 case SILC_PROTOCOL_STATE_START:
1043 if (ctx->responder == FALSE) {
1045 * Initiator (backup router)
1048 /* Send the START packet to primary router and normal servers. The
1049 packet will indicate to the primary router that it has been replaced
1050 by us. For normal servers it means that we will be resigning as
1051 being primary router shortly. */
1052 for (i = 0; i < server->config->param.connections_max; i++) {
1053 sock = server->sockets[i];
1054 if (!sock || !sock->user_data ||
1055 sock->user_data == server->id_entry ||
1056 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1057 sock->type != SILC_SOCKET_TYPE_SERVER))
1060 server_entry = sock->user_data;
1061 if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
1064 ctx->sessions = silc_realloc(ctx->sessions,
1065 sizeof(*ctx->sessions) *
1066 (ctx->sessions_count + 1));
1067 ctx->sessions[ctx->sessions_count].session = ctx->sessions_count;
1068 ctx->sessions[ctx->sessions_count].connected = FALSE;
1069 ctx->sessions[ctx->sessions_count].server_entry = server_entry;
1071 SILC_LOG_DEBUG(("Sending START to %s (session %d)",
1072 server_entry->server_name, ctx->sessions_count));
1073 SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
1074 server_entry->server_name, ctx->sessions_count));
1076 /* This connection is performing this protocol too now */
1077 sock->protocol = protocol;
1079 data[0] = SILC_SERVER_BACKUP_START;
1080 data[1] = ctx->sessions_count;
1081 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1082 data, sizeof(data), FALSE);
1083 ctx->sessions_count++;
1086 /* Announce data to the new primary to be. */
1087 silc_server_announce_servers(server, TRUE, 0, ctx->sock);
1088 silc_server_announce_clients(server, 0, ctx->sock);
1089 silc_server_announce_channels(server, 0, ctx->sock);
1095 * Responder (all servers and routers)
1097 SilcServerConfigRouter *primary;
1099 /* We should have received START packet */
1100 if (ctx->type != SILC_SERVER_BACKUP_START) {
1101 SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
1105 /* Connect to the primary router that was down that is now supposed
1106 to be back online. We send the CONNECTED packet after we've
1107 established the connection to the primary router. */
1108 primary = silc_server_config_get_primary_router(server);
1109 if (primary && server->backup_primary &&
1110 !silc_server_num_sockets_by_remote(server,
1111 silc_net_is_ip(primary->host) ?
1112 primary->host : NULL,
1113 silc_net_is_ip(primary->host) ?
1114 NULL : primary->host,
1116 SILC_SOCKET_TYPE_ROUTER)) {
1117 SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
1119 silc_server_backup_reconnect(server,
1120 primary->host, primary->port,
1121 silc_server_backup_connect_primary,
1122 silc_socket_dup(ctx->sock));
1124 /* Nowhere to connect just return the CONNECTED packet */
1125 SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
1127 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1130 /* Send the CONNECTED packet back to the backup router. */
1131 data[0] = SILC_SERVER_BACKUP_CONNECTED;
1132 data[1] = ctx->session;
1133 silc_server_packet_send(server, ctx->sock,
1134 SILC_PACKET_RESUME_ROUTER, 0,
1135 data, sizeof(data), FALSE);
1138 /* Add this resuming session */
1139 ctx->sessions = silc_realloc(ctx->sessions,
1140 sizeof(*ctx->sessions) *
1141 (ctx->sessions_count + 1));
1142 ctx->sessions[ctx->sessions_count].session = ctx->session;
1143 ctx->sessions_count++;
1145 /* Normal server goes directly to the END state. */
1146 if (server->server_type == SILC_ROUTER &&
1148 server->router->data.status & SILC_IDLIST_STATUS_DISABLED))
1151 protocol->state = SILC_PROTOCOL_STATE_END;
1156 if (ctx->responder == FALSE) {
1158 * Initiator (backup router)
1161 /* We should have received CONNECTED packet */
1162 if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
1163 SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
1167 for (i = 0; i < ctx->sessions_count; i++) {
1168 if (ctx->sessions[i].session == ctx->session) {
1169 ctx->sessions[i].connected = TRUE;
1170 SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
1171 ctx->sessions[i].server_entry->server_name,
1173 SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
1178 /* See if all returned CONNECTED, if not, then continue waiting. */
1179 for (i = 0; i < ctx->sessions_count; i++) {
1180 if (!ctx->sessions[i].connected)
1184 SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
1186 SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
1188 /* The ENDING is sent with timeout, and then we continue to the
1189 END state in the protocol. */
1190 silc_schedule_task_add(server->schedule, 0,
1191 silc_server_backup_send_resumed,
1192 protocol, 1, 0, SILC_TASK_TIMEOUT,
1193 SILC_TASK_PRI_NORMAL);
1198 * Responder (primary router)
1201 /* We should have been received ENDING packet */
1202 if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
1203 SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
1207 SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now"));
1209 /* Switch announced informations to our primary router of using the
1211 silc_server_local_servers_toggle_enabled(server, TRUE);
1212 silc_server_update_servers_by_server(server, ctx->sock->user_data,
1214 silc_server_update_clients_by_server(server, ctx->sock->user_data,
1215 server->router, TRUE);
1217 /* We as primary router now must send RESUMED packets to all servers
1218 and routers so that they know we are back. For backup router we
1219 send the packet last so that we give the backup as much time as
1220 possible to deal with message routing at this critical moment. */
1221 for (i = 0; i < server->config->param.connections_max; i++) {
1222 sock = server->sockets[i];
1223 if (!sock || !sock->user_data ||
1224 sock->user_data == server->id_entry ||
1225 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1226 sock->type != SILC_SOCKET_TYPE_SERVER))
1229 /* Send to backup last */
1230 if (sock == ctx->sock)
1234 server_entry = sock->user_data;
1235 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1237 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1238 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1240 /* This connection is performing this protocol too now */
1241 sock->protocol = protocol;
1243 data[0] = SILC_SERVER_BACKUP_RESUMED;
1245 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1246 data, sizeof(data), FALSE);
1247 silc_server_packet_queue_purge(server, sock);
1250 /* Now send the same packet to backup */
1251 if (sock != ctx->sock) {
1254 goto send_to_backup;
1257 /* We are now resumed and are back as primary router in the cell. */
1258 SILC_LOG_INFO(("We are now the primary router of our cell again"));
1259 server->wait_backup = FALSE;
1261 /* For us this is the end of this protocol. */
1262 if (protocol->final_callback)
1263 silc_protocol_execute_final(protocol, server->schedule);
1265 silc_protocol_free(protocol);
1269 case SILC_PROTOCOL_STATE_END:
1272 * Responder (backup router, servers, and remote router)
1274 SilcServerEntry router, backup_router;
1276 /* We should have been received RESUMED from our primary router. */
1277 if (ctx->type != SILC_SERVER_BACKUP_RESUMED) {
1278 SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
1282 SILC_LOG_INFO(("Received RESUMED from new primary router"));
1284 /* If we are the backup router, mark that we are no longer primary
1285 but are back to backup router status. */
1286 if (server->backup_router)
1287 server->server_type = SILC_BACKUP_ROUTER;
1289 /* We have now new primary router. All traffic goes there from now on. */
1290 router = ctx->sock->user_data;
1291 if (silc_server_backup_replaced_get(server, router->id,
1294 if (backup_router == server->router) {
1295 /* We have new primary router now */
1296 server->id_entry->router = router;
1297 server->router = router;
1298 SILC_LOG_INFO(("Switching back to primary router %s",
1299 server->router->server_name));
1301 /* We are connected to new primary and now continue using it */
1302 SILC_LOG_INFO(("Resuming the use of primary router %s",
1303 router->server_name));
1305 server->backup_primary = FALSE;
1306 sock = router->connection;
1308 /* Update the client entries of the backup router to the new
1310 silc_server_local_servers_toggle_enabled(server, FALSE);
1311 router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1312 silc_server_update_servers_by_server(server, backup_router, router);
1313 silc_server_update_clients_by_server(
1314 server, NULL, router,
1315 server->server_type == SILC_BACKUP_ROUTER);
1316 if (server->server_type == SILC_SERVER)
1317 silc_server_update_channels_by_server(server, backup_router, router);
1318 silc_server_backup_replaced_del(server, backup_router);
1321 /* Send notify about primary router going down to local operators */
1322 SILC_SERVER_SEND_OPERS(server, FALSE, TRUE,
1323 SILC_NOTIFY_TYPE_NONE,
1324 ("%s resumed the use of primary router %s",
1325 server->server_name,
1326 server->router->server_name));
1328 /* Protocol has ended, call the final callback */
1329 if (protocol->final_callback)
1330 silc_protocol_execute_final(protocol, server->schedule);
1332 silc_protocol_free(protocol);
1336 case SILC_PROTOCOL_STATE_ERROR:
1337 /* Protocol has ended, call the final callback */
1338 if (protocol->final_callback)
1339 silc_protocol_execute_final(protocol, server->schedule);
1341 silc_protocol_free(protocol);
1344 case SILC_PROTOCOL_STATE_FAILURE:
1345 /* Protocol has ended, call the final callback */
1346 SILC_LOG_ERROR(("Error during backup resume: received Failure"));
1347 ctx->received_failure = TRUE;
1348 if (protocol->final_callback)
1349 silc_protocol_execute_final(protocol, server->schedule);
1351 silc_protocol_free(protocol);
1354 case SILC_PROTOCOL_STATE_UNKNOWN:
1359 /* Final resuming protocol completion callback */
1361 SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
1363 SilcProtocol protocol = (SilcProtocol)context;
1364 SilcServerBackupProtocolContext ctx = protocol->context;
1365 SilcServer server = ctx->server;
1366 SilcServerEntry server_entry;
1367 SilcSocketConnection sock;
1371 silc_schedule_task_del_by_context(server->schedule, protocol);
1373 error = (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
1374 protocol->state == SILC_PROTOCOL_STATE_FAILURE);
1377 SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
1378 if (server->server_type == SILC_SERVER)
1379 silc_schedule_task_del_by_callback(server->schedule,
1380 silc_server_backup_connect_to_router);
1383 if (server->server_shutdown)
1386 /* Remove this protocol from all server entries that has it */
1387 for (i = 0; i < server->config->param.connections_max; i++) {
1388 sock = server->sockets[i];
1389 if (!sock || !sock->user_data ||
1390 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1391 sock->type != SILC_SOCKET_TYPE_SERVER))
1394 server_entry = sock->user_data;
1396 /* The SilcProtocol context was shared between all connections, clear
1397 it from all connections. */
1398 if (sock->protocol == protocol) {
1399 silc_server_packet_queue_purge(server, sock);
1400 sock->protocol = NULL;
1404 if (server->server_type == SILC_SERVER &&
1405 server_entry->server_type == SILC_ROUTER)
1409 if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
1410 if (ctx->sock == sock) {
1411 silc_socket_free(sock); /* unref */
1415 if (!ctx->received_failure) {
1416 /* Protocol error, probably timeout. Just restart the protocol. */
1417 SilcServerBackupProtocolContext proto_ctx;
1419 /* Restart the protocol. */
1420 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
1421 proto_ctx->server = server;
1422 proto_ctx->sock = silc_socket_dup(sock);
1423 proto_ctx->responder = FALSE;
1424 proto_ctx->type = SILC_SERVER_BACKUP_START;
1425 proto_ctx->start = time(0);
1427 /* Start through scheduler */
1428 silc_schedule_task_add(server->schedule, 0,
1429 silc_server_backup_connected_later,
1432 SILC_TASK_PRI_NORMAL);
1434 /* If failure was received, switch back to normal backup router.
1435 For some reason primary wouldn't accept that we were supposed
1436 to perfom resuming protocol. */
1437 server->server_type = SILC_BACKUP_ROUTER;
1438 silc_server_local_servers_toggle_enabled(server, FALSE);
1439 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1440 silc_server_update_servers_by_server(server, server->id_entry,
1442 silc_server_update_clients_by_server(server, NULL,
1443 sock->user_data, TRUE);
1445 /* Announce our clients and channels to the router */
1446 silc_server_announce_clients(server, 0, sock);
1447 silc_server_announce_channels(server, 0, sock);
1454 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1459 SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
1461 if (ctx->type == SILC_SERVER_BACKUP_RESUMED && server->router) {
1462 /* Announce all of our information to the router. */
1463 if (server->server_type == SILC_ROUTER)
1464 silc_server_announce_servers(server, FALSE, 0,
1465 server->router->connection);
1467 /* Announce our clients and channels to the router */
1468 silc_server_announce_clients(server, 0, server->router->connection);
1469 silc_server_announce_channels(server, 0, server->router->connection);
1474 if (server->server_type == SILC_SERVER) {
1475 /* If we are still using backup router Send confirmation to backup
1476 that using it is still ok and continue sending traffic there.
1477 The backup will reply with error if it's not ok. */
1478 if (server->router && server->backup_primary) {
1479 /* Send START_USE just in case using backup wouldn't be ok. */
1480 silc_server_backup_send_start_use(server, server->router->connection,
1483 /* Check couple of times same START_USE just in case. */
1484 silc_schedule_task_add(server->schedule, 0,
1485 silc_server_backup_check_status,
1486 silc_socket_dup(server->router->connection),
1487 5, 1, SILC_TASK_TIMEOUT,
1488 SILC_TASK_PRI_NORMAL);
1489 silc_schedule_task_add(server->schedule, 0,
1490 silc_server_backup_check_status,
1491 silc_socket_dup(server->router->connection),
1492 20, 1, SILC_TASK_TIMEOUT,
1493 SILC_TASK_PRI_NORMAL);
1494 silc_schedule_task_add(server->schedule, 0,
1495 silc_server_backup_check_status,
1496 silc_socket_dup(server->router->connection),
1497 60, 1, SILC_TASK_TIMEOUT,
1498 SILC_TASK_PRI_NORMAL);
1503 if (ctx->sock && ctx->sock->protocol)
1504 ctx->sock->protocol = NULL;
1506 silc_socket_free(ctx->sock); /* unref */
1507 silc_protocol_free(protocol);
1508 silc_free(ctx->sessions);