5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2001 - 2003 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
21 #include "serverincludes.h"
22 #include "server_internal.h"
24 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
25 static void silc_server_backup_connect_primary(SilcServer server,
26 SilcServerEntry server_entry,
30 /************************** Types and Definitions ***************************/
34 SilcServerEntry server;
38 } SilcServerBackupEntry;
40 /* Holds IP address and port of the primary router that was replaced
45 SilcServerEntry server; /* Backup router that replaced the primary */
46 } SilcServerBackupReplaced;
49 struct SilcServerBackupStruct {
50 SilcServerBackupEntry *servers;
51 SilcUInt32 servers_count;
52 SilcServerBackupReplaced **replaced;
53 SilcUInt32 replaced_count;
59 SilcServerEntry server_entry;
60 } SilcServerBackupProtocolSession;
62 /* Backup resuming protocol context */
65 SilcSocketConnection sock;
68 SilcServerBackupProtocolSession *sessions;
69 SilcUInt32 sessions_count;
71 unsigned int responder : 1;
72 unsigned int received_failure : 1;
73 unsigned int timeout : 1;
74 } *SilcServerBackupProtocolContext;
77 /********************* Backup Configuration Routines ************************/
79 /* Adds the `backup_server' to be one of our backup router. This can be
80 called multiple times to set multiple backup routers. The `ip' and `port'
81 is the IP and port that the `backup_router' will replace if the `ip'
82 will become unresponsive. If `local' is TRUE then the `backup_server' is
83 in the local cell, if FALSE it is in some other cell. */
85 void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
86 const char *ip, int port, bool local)
93 if (!server->backup) {
94 server->backup = silc_calloc(1, sizeof(*server->backup));
99 /* See if already added */
100 for (i = 0; i < server->backup->servers_count; i++) {
101 if (server->backup->servers[i].server == backup_server)
105 SILC_LOG_DEBUG(("Backup router %s will replace %s",
106 ((SilcSocketConnection)backup_server->connection)->ip,
109 for (i = 0; i < server->backup->servers_count; i++) {
110 if (!server->backup->servers[i].server) {
111 server->backup->servers[i].server = backup_server;
112 server->backup->servers[i].local = local;
113 server->backup->servers[i].port = SILC_SWAB_16(port);
114 memset(server->backup->servers[i].ip.data, 0,
115 sizeof(server->backup->servers[i].ip.data));
116 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
117 sizeof(server->backup->servers[i].ip.data));
122 i = server->backup->servers_count;
123 server->backup->servers = silc_realloc(server->backup->servers,
124 sizeof(*server->backup->servers) *
126 server->backup->servers[i].server = backup_server;
127 server->backup->servers[i].local = local;
128 server->backup->servers[i].port = SILC_SWAB_16(port);
129 memset(server->backup->servers[i].ip.data, 0,
130 sizeof(server->backup->servers[i].ip.data));
131 silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
132 sizeof(server->backup->servers[i].ip.data));
133 server->backup->servers_count++;
136 /* Returns backup router for IP and port in `server_id' or NULL if there
137 does not exist backup router. */
139 SilcServerEntry silc_server_backup_get(SilcServer server,
140 SilcServerID *server_id)
147 for (i = 0; i < server->backup->servers_count; i++) {
148 if (server->backup->servers[i].server &&
149 server->backup->servers[i].port == server_id->port &&
150 !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
151 sizeof(server_id->ip.data))) {
152 SILC_LOG_DEBUG(("Found backup router %s for %s",
153 server->backup->servers[i].server->server_name,
154 silc_id_render(server_id, SILC_ID_SERVER)));
155 return server->backup->servers[i].server;
162 /* Deletes the backup server `server_entry'. */
164 void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
171 for (i = 0; i < server->backup->servers_count; i++) {
172 if (server->backup->servers[i].server == server_entry) {
173 SILC_LOG_DEBUG(("Removing %s as backup router",
174 silc_id_render(server->backup->servers[i].server->id,
176 server->backup->servers[i].server = NULL;
177 memset(server->backup->servers[i].ip.data, 0,
178 sizeof(server->backup->servers[i].ip.data));
183 /* Frees all data allocated for backup routers. Call this after deleting
184 all backup routers and when new routers are added no more, for example
185 when shutting down the server. */
187 void silc_server_backup_free(SilcServer server)
194 /* Delete existing servers if caller didn't do it */
195 for (i = 0; i < server->backup->servers_count; i++) {
196 if (server->backup->servers[i].server)
197 silc_server_backup_del(server, server->backup->servers[i].server);
200 silc_free(server->backup->servers);
201 silc_free(server->backup);
202 server->backup = NULL;
205 /* Marks the IP address and port from the `server_id' as being replaced
206 by backup router indicated by the `server'. If the router connects at
207 a later time we can check whether it has been replaced by an backup
210 void silc_server_backup_replaced_add(SilcServer server,
211 SilcServerID *server_id,
212 SilcServerEntry server_entry)
215 SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));;
218 server->backup = silc_calloc(1, sizeof(*server->backup));
219 if (!server->backup->replaced) {
220 server->backup->replaced =
221 silc_calloc(1, sizeof(*server->backup->replaced));
222 server->backup->replaced_count = 1;
225 SILC_LOG_DEBUG(("Replacing router %s with %s",
226 silc_id_render(server_id, SILC_ID_SERVER),
227 server_entry->server_name));
229 memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip));
230 r->server = server_entry;
232 for (i = 0; i < server->backup->replaced_count; i++) {
233 if (!server->backup->replaced[i]) {
234 server->backup->replaced[i] = r;
239 i = server->backup->replaced_count;
240 server->backup->replaced = silc_realloc(server->backup->replaced,
241 sizeof(*server->backup->replaced) *
243 server->backup->replaced[i] = r;
244 server->backup->replaced_count++;
247 /* Checks whether the IP address and port from the `server_id' has been
248 replaced by an backup router. If it has been then this returns TRUE
249 and the bacup router entry to the `server' pointer if non-NULL. Returns
250 FALSE if the router is not replaced by backup router. */
252 bool silc_server_backup_replaced_get(SilcServer server,
253 SilcServerID *server_id,
254 SilcServerEntry *server_entry)
258 if (!server->backup || !server->backup->replaced)
261 for (i = 0; i < server->backup->replaced_count; i++) {
262 if (!server->backup->replaced[i])
264 if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
265 sizeof(server_id->ip.data))) {
267 *server_entry = server->backup->replaced[i]->server;
268 SILC_LOG_DEBUG(("Router %s is replaced by %s",
269 silc_id_render(server_id, SILC_ID_SERVER),
270 server->backup->replaced[i]->server->server_name));
275 SILC_LOG_DEBUG(("Router %s is not replaced by backup router",
276 silc_id_render(server_id, SILC_ID_SERVER)));
280 /* Deletes a replaced host by the set `server_entry. */
282 void silc_server_backup_replaced_del(SilcServer server,
283 SilcServerEntry server_entry)
287 if (!server->backup || !server->backup->replaced)
290 for (i = 0; i < server->backup->replaced_count; i++) {
291 if (!server->backup->replaced[i])
293 if (server->backup->replaced[i]->server == server_entry) {
294 silc_free(server->backup->replaced[i]);
295 server->backup->replaced[i] = NULL;
301 /* Broadcast the received packet indicated by `packet' to all of our backup
302 routers. All router wide information is passed using broadcast packets.
303 That is why all backup routers need to get this data too. It is expected
304 that the caller already knows that the `packet' is broadcast packet. */
306 void silc_server_backup_broadcast(SilcServer server,
307 SilcSocketConnection sender,
308 SilcPacketContext *packet)
310 SilcServerEntry backup;
311 SilcSocketConnection sock;
313 const SilcBufferStruct p;
314 SilcIDListData idata;
317 if (!server->backup || server->server_type != SILC_ROUTER)
320 SILC_LOG_DEBUG(("Broadcasting received packet to backup routers"));
322 buffer = packet->buffer;
323 silc_buffer_push(buffer, buffer->data - buffer->head);
325 for (i = 0; i < server->backup->servers_count; i++) {
326 backup = server->backup->servers[i].server;
328 if (!backup || backup->connection == sender ||
329 server->backup->servers[i].local == FALSE)
331 if (server->backup->servers[i].server == server->id_entry)
334 idata = (SilcIDListData)backup;
335 sock = backup->connection;
337 if (!silc_packet_send_prepare(sock, 0, 0, buffer->len, idata->hmac_send,
338 (const SilcBuffer)&p)) {
339 SILC_LOG_ERROR(("Cannot send packet"));
342 silc_buffer_put((SilcBuffer)&p, buffer->data, buffer->len);
343 silc_packet_encrypt(idata->send_key, idata->hmac_send, idata->psn_send++,
344 (SilcBuffer)&p, p.len);
346 SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", p.len), p.data, p.len);
348 /* Now actually send the packet */
349 silc_server_packet_send_real(server, sock, FALSE);
351 /* Check for mandatory rekey */
352 if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD)
353 silc_schedule_task_add(server->schedule, sender->sock,
354 silc_server_rekey_callback, sender, 0, 1,
355 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
359 /* A generic routine to send data to all backup routers. If the `sender'
360 is provided it will indicate the original sender of the packet and the
361 packet won't be resent to that entity. The `data' is the data that will
362 be assembled to packet context before sending. The packet will be
363 encrypted this function. If the `force_send' is TRUE the data is sent
364 immediately and not put to queue. If `local' is TRUE then the packet
365 will be sent only to local backup routers inside the cell. If false the
366 packet can go from one cell to the other. This function has no effect
367 if there are no any backup routers. */
369 void silc_server_backup_send(SilcServer server,
370 SilcServerEntry sender,
372 SilcPacketFlags flags,
378 SilcServerEntry backup;
379 SilcSocketConnection sock;
382 if (!server->backup || server->server_type != SILC_ROUTER)
385 for (i = 0; i < server->backup->servers_count; i++) {
386 backup = server->backup->servers[i].server;
387 if (!backup || sender == backup)
389 if (local && server->backup->servers[i].local == FALSE)
391 if (server->backup->servers[i].server == server->id_entry)
394 sock = backup->connection;
396 SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
397 silc_get_packet_name(type), sock->hostname, sock->ip));
399 silc_server_packet_send(server, backup->connection, type, flags,
400 data, data_len, force_send);
404 /* Same as silc_server_backup_send but sets a specific Destination ID to
405 the packet. The Destination ID is indicated by the `dst_id' and the
406 ID type `dst_id_type'. For example, packets destined to channels must
407 be sent using this function. */
409 void silc_server_backup_send_dest(SilcServer server,
410 SilcServerEntry sender,
412 SilcPacketFlags flags,
414 SilcIdType dst_id_type,
420 SilcServerEntry backup;
421 SilcSocketConnection sock;
424 if (!server->backup || server->server_type != SILC_ROUTER)
427 for (i = 0; i < server->backup->servers_count; i++) {
428 backup = server->backup->servers[i].server;
429 if (!backup || sender == backup)
431 if (local && server->backup->servers[i].local == FALSE)
433 if (server->backup->servers[i].server == server->id_entry)
436 sock = backup->connection;
438 SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
439 silc_get_packet_name(type), sock->hostname, sock->ip));
441 silc_server_packet_send_dest(server, backup->connection, type, flags,
442 dst_id, dst_id_type, data, data_len,
447 /* Send the START_USE indication to remote connection. If `failure' is
448 TRUE then this sends SILC_PACKET_FAILURE. Otherwise it sends
449 SILC_PACKET_RESUME_ROUTER. */
451 void silc_server_backup_send_start_use(SilcServer server,
452 SilcSocketConnection sock,
455 unsigned char data[4];
457 SILC_LOG_DEBUG(("Sending START_USE (%s) to %s",
458 failure ? "failure" : "success", sock->ip));
461 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START_USE, data);
462 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
465 data[0] = SILC_SERVER_BACKUP_START_USE;
467 silc_server_packet_send(server, sock,
468 SILC_PACKET_RESUME_ROUTER, 0,
473 /* Send the REPLACED indication to remote router. This is send by the
474 primary router (remote router) of the primary router that came back
475 online. This is not sent by backup router or any other server. */
477 void silc_server_backup_send_replaced(SilcServer server,
478 SilcSocketConnection sock)
480 unsigned char data[4];
482 SILC_LOG_DEBUG(("Sending REPLACED (%s) to %s", sock->ip));
484 data[0] = SILC_SERVER_BACKUP_REPLACED;
486 silc_server_packet_send(server, sock,
487 SILC_PACKET_RESUME_ROUTER, 0,
492 /************************ Backup Resuming Protocol **************************/
494 /* Timeout callback for protocol */
496 SILC_TASK_CALLBACK(silc_server_backup_timeout)
498 SilcProtocol protocol = context;
499 SilcServerBackupProtocolContext ctx = protocol->context;
500 SilcServer server = app_context;
502 SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
504 silc_protocol_cancel(protocol, server->schedule);
505 protocol->state = SILC_PROTOCOL_STATE_ERROR;
506 silc_protocol_execute_final(protocol, server->schedule);
509 /* Callback to start the protocol as responder */
511 SILC_TASK_CALLBACK(silc_server_backup_responder_start)
513 SilcServerBackupProtocolContext proto_ctx = context;
514 SilcSocketConnection sock = proto_ctx->sock;
515 SilcServer server = app_context;
517 /* If other protocol is executing at the same time, start with timeout. */
518 if (sock->protocol) {
519 SILC_LOG_DEBUG(("Other protocol is executing, wait for it to finish"));
520 silc_schedule_task_add(server->schedule, sock->sock,
521 silc_server_backup_responder_start,
523 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
527 /* Run the backup resuming protocol */
528 silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
529 &sock->protocol, proto_ctx,
530 silc_server_protocol_backup_done);
531 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
532 silc_schedule_task_add(server->schedule, sock->sock,
533 silc_server_backup_timeout,
534 sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
535 SILC_TASK_PRI_NORMAL);
538 /* Callback to send START_USE to backup to check whether using backup
541 SILC_TASK_CALLBACK(silc_server_backup_check_status)
543 SilcSocketConnection sock = context;
544 SilcServer server = app_context;
546 /* Check whether we are still using backup */
547 if (!server->backup_primary)
550 silc_server_backup_send_start_use(server, sock, FALSE);
551 silc_socket_free(sock); /* unref */
556 SilcSocketConnection sock;
557 SilcPacketContext *packet;
558 } *SilcServerBackupPing;
560 /* PING command reply callback */
562 void silc_server_backup_ping_reply(void *context, void *reply)
564 SilcServerBackupPing pc = context;
565 SilcServerCommandReplyContext cmdr = reply;
567 if (cmdr && !silc_command_get_status(cmdr->payload, NULL, NULL)) {
568 /* Timeout error occurred, the primary is really down. */
569 SilcSocketConnection primary = SILC_PRIMARY_ROUTE(pc->server);
571 SILC_LOG_DEBUG(("PING timeout, primary is down"));
574 if (primary->user_data)
575 silc_server_free_sock_user_data(pc->server, primary, NULL);
576 SILC_SET_DISCONNECTING(primary);
577 silc_server_close_connection(pc->server, primary);
580 /* Reprocess the RESUME_ROUTER packet */
581 silc_server_backup_resume_router(pc->server, pc->sock, pc->packet);
583 /* The primary is not down, refuse to serve the server as primary */
584 SILC_LOG_DEBUG(("PING received, primary is up"));
585 silc_server_backup_send_start_use(pc->server, pc->sock, TRUE);
588 silc_socket_free(pc->sock);
589 silc_packet_context_free(pc->packet);
593 /* Processes incoming RESUME_ROUTER packet. This can give the packet
594 for processing to the protocol handler or allocate new protocol if
595 start command is received. */
597 void silc_server_backup_resume_router(SilcServer server,
598 SilcSocketConnection sock,
599 SilcPacketContext *packet)
601 SilcUInt8 type, session;
602 SilcServerBackupProtocolContext ctx;
603 SilcIDListData idata;
606 SILC_LOG_DEBUG(("Received RESUME_ROUTER packet"));
608 if (sock->type == SILC_SOCKET_TYPE_CLIENT ||
609 sock->type == SILC_SOCKET_TYPE_UNKNOWN) {
610 SILC_LOG_DEBUG(("Bad packet received"));
614 idata = (SilcIDListData)sock->user_data;
616 ret = silc_buffer_unformat(packet->buffer,
617 SILC_STR_UI_CHAR(&type),
618 SILC_STR_UI_CHAR(&session),
621 SILC_LOG_ERROR(("Malformed resume router packet received"));
625 /* Check whether this packet is used to tell us that server will start
626 using us as primary router. */
627 if (type == SILC_SERVER_BACKUP_START_USE) {
629 SilcServerBackupPing pc;
631 /* If we are normal server then backup router has sent us back
632 this reply and we use the backup as primary router now. */
633 if (server->server_type == SILC_SERVER) {
634 /* Nothing to do here actually, since we have switched already. */
635 SILC_LOG_DEBUG(("Received successful START_USE from backup router"));
639 /* Backup router following. */
641 /* If we are marked as router then the primary is down and we send
642 success START_USE back to the server. */
643 if (server->server_type == SILC_ROUTER) {
644 SILC_LOG_DEBUG(("Sending success START_USE back to %s", sock->ip));
645 silc_server_backup_send_start_use(server, sock, FALSE);
649 /* We have just lost primary, send success START_USE back */
650 if (server->standalone) {
651 SILC_LOG_DEBUG(("We are stanalone, sending success START_USE back to %s",
653 silc_server_backup_send_start_use(server, sock, FALSE);
657 /* We are backup router. This server claims that our primary is down.
658 We will check this ourselves by sending PING command to the primary. */
659 SILC_LOG_DEBUG(("Sending PING to detect status of primary router"));
660 idp = silc_id_payload_encode(server->router->id, SILC_ID_SERVER);
661 silc_server_send_command(server, SILC_PRIMARY_ROUTE(server),
662 SILC_COMMAND_PING, ++server->cmd_ident, 1,
663 1, idp->data, idp->len);
664 silc_buffer_free(idp);
666 /* Reprocess this packet after received reply from router */
667 pc = silc_calloc(1, sizeof(*pc));
669 pc->sock = silc_socket_dup(sock);
670 pc->packet = silc_packet_context_dup(packet);
671 silc_server_command_pending_timed(server, SILC_COMMAND_PING,
673 silc_server_backup_ping_reply, pc, 15);
678 /* Start the resuming protocol if requested. */
679 if (type == SILC_SERVER_BACKUP_START) {
680 /* We have received a start for resuming protocol. We are either
681 primary router that came back online or normal server. */
682 SilcServerBackupProtocolContext proto_ctx;
684 /* If backup had closed the connection earlier we won't allow resuming
685 since we (primary router) have never gone away. */
686 if (server->server_type == SILC_ROUTER && !server->backup_router &&
687 server->backup_closed) {
688 unsigned char data[4];
689 SILC_LOG_DEBUG(("Backup resuming not allowed since we are still "
691 SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
692 silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
694 server->backup_closed = FALSE;
698 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
699 proto_ctx->server = server;
700 proto_ctx->sock = silc_socket_dup(sock);
701 proto_ctx->responder = TRUE;
702 proto_ctx->type = type;
703 proto_ctx->session = session;
704 proto_ctx->start = time(0);
706 SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
707 SILC_LOG_INFO(("Starting backup resuming protocol"));
709 /* Start protocol immediately */
710 silc_schedule_task_add(server->schedule, sock->sock,
711 silc_server_backup_responder_start,
713 SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
718 /* If we are router and the packet is coming from our primary router
719 then it means we have been replaced by an backup router in our cell. */
720 if (type == SILC_SERVER_BACKUP_REPLACED &&
721 server->server_type == SILC_ROUTER &&
722 sock->type == SILC_SOCKET_TYPE_ROUTER &&
723 SILC_PRIMARY_ROUTE(server) == sock) {
724 /* We have been replaced by an backup router in our cell. We must
725 mark our primary router connection disabled since we are not allowed
726 to use it at this moment. */
727 SILC_LOG_INFO(("We are replaced by an backup router in this cell, will "
728 "wait until backup resuming protocol is executed"));
729 idata->status |= SILC_IDLIST_STATUS_DISABLED;
734 /* Activate the shared protocol context for this socket connection
736 if (type == SILC_SERVER_BACKUP_RESUMED &&
737 sock->type == SILC_SOCKET_TYPE_ROUTER && !sock->protocol &&
738 idata->status & SILC_IDLIST_STATUS_DISABLED) {
739 SilcServerEntry backup_router;
741 if (silc_server_backup_replaced_get(server, ((SilcServerEntry)idata)->id,
743 SilcSocketConnection bsock =
744 (SilcSocketConnection)backup_router->connection;
745 if (bsock->protocol && bsock->protocol->protocol &&
746 bsock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) {
747 sock->protocol = bsock->protocol;
748 ctx = sock->protocol->context;
750 silc_socket_free(ctx->sock); /* unref */
751 ctx->sock = silc_socket_dup(sock);
757 /* Call the resuming protocol if the protocol is active. */
758 if (SILC_SERVER_IS_BACKUP(sock)) {
759 ctx = sock->protocol->context;
762 for (i = 0; i < ctx->sessions_count; i++) {
763 if (session == ctx->sessions[i].session) {
764 ctx->session = session;
765 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
770 /* If RESUMED received the session ID is zero, execute the protocol. */
771 if (type == SILC_SERVER_BACKUP_RESUMED) {
772 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
776 SILC_LOG_ERROR(("Unknown backup resuming session %d", session));
781 /* Timeout task callback to connect to remote router */
783 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
785 SilcServer server = app_context;
786 SilcServerConnection sconn = (SilcServerConnection)context;
788 const char *server_ip;
790 SILC_LOG_DEBUG(("Connecting to router %s:%d", sconn->remote_host,
791 sconn->remote_port));
793 /* Connect to remote host */
794 server_ip = server->config->server_info->primary == NULL ? NULL :
795 server->config->server_info->primary->server_ip;
796 sock = silc_net_create_connection(server_ip, sconn->remote_port,
799 if (server->server_type == SILC_SERVER) {
800 sconn->retry_count++;
801 if (sconn->retry_count > 3) {
802 silc_free(sconn->remote_host);
807 silc_schedule_task_add(server->schedule, 0,
808 silc_server_backup_connect_to_router,
809 context, 10, 0, SILC_TASK_TIMEOUT,
810 SILC_TASK_PRI_NORMAL);
814 /* Continue with key exchange protocol */
815 silc_server_start_key_exchange(server, sconn, sock);
818 /* Constantly tries to reconnect to a primary router indicated by the
819 `ip' and `port'. The `connected' callback will be called when the
820 connection is created. */
822 void silc_server_backup_reconnect(SilcServer server,
823 const char *ip, SilcUInt16 port,
824 SilcServerConnectRouterCallback callback,
827 SilcServerConnection sconn;
829 SILC_LOG_INFO(("Attempting to reconnect to primary router"));
831 sconn = silc_calloc(1, sizeof(*sconn));
832 sconn->remote_host = strdup(ip);
833 sconn->remote_port = port;
834 sconn->callback = callback;
835 sconn->callback_context = context;
836 sconn->no_reconnect = TRUE;
837 sconn->retry_count = 0;
838 silc_schedule_task_add(server->schedule, 0,
839 silc_server_backup_connect_to_router,
840 sconn, 1, 0, SILC_TASK_TIMEOUT,
841 SILC_TASK_PRI_NORMAL);
844 /* Task that is called after backup router has connected back to
845 primary router and we are starting the resuming protocol */
847 SILC_TASK_CALLBACK(silc_server_backup_connected_later)
849 SilcServerBackupProtocolContext proto_ctx =
850 (SilcServerBackupProtocolContext)context;
851 SilcServer server = proto_ctx->server;
852 SilcSocketConnection sock = proto_ctx->sock;
854 /* If running other protocol already run this one a bit later. */
855 if (sock->protocol) {
856 SILC_LOG_DEBUG(("Other protocol is running, wait for it to finish"));
857 silc_schedule_task_add(server->schedule, 0,
858 silc_server_backup_connected_later,
861 SILC_TASK_PRI_NORMAL);
865 SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
866 SILC_LOG_INFO(("Starting backup resuming protocol"));
868 /* Run the backup resuming protocol */
869 silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
870 &sock->protocol, proto_ctx,
871 silc_server_protocol_backup_done);
872 silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
874 silc_schedule_task_add(server->schedule, sock->sock,
875 silc_server_backup_timeout,
876 sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
877 SILC_TASK_PRI_NORMAL);
880 /* Called when we've established connection back to our primary router
881 when we've acting as backup router and have replaced the primary router
882 in the cell. This function will start the backup resuming protocol. */
884 void silc_server_backup_connected(SilcServer server,
885 SilcServerEntry server_entry,
888 SilcServerBackupProtocolContext proto_ctx;
889 SilcSocketConnection sock;
893 SilcServerConfigRouter *primary;
894 primary = silc_server_config_get_primary_router(server);
896 if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
897 primary->host, primary->port))
898 silc_server_backup_reconnect(server,
899 primary->host, primary->port,
900 silc_server_backup_connected,
906 sock = (SilcSocketConnection)server_entry->connection;
907 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
908 proto_ctx->server = server;
909 proto_ctx->sock = silc_socket_dup(sock);
910 proto_ctx->responder = FALSE;
911 proto_ctx->type = SILC_SERVER_BACKUP_START;
912 proto_ctx->start = time(0);
914 /* Start through scheduler */
915 silc_schedule_task_add(server->schedule, 0,
916 silc_server_backup_connected_later,
919 SILC_TASK_PRI_NORMAL);
922 /* Called when normal server has connected to its primary router after
923 backup router has sent the START packet in reusming protocol. We will
924 move the protocol context from the backup router connection to the
927 static void silc_server_backup_connect_primary(SilcServer server,
928 SilcServerEntry server_entry,
931 SilcSocketConnection backup_router = (SilcSocketConnection)context;
932 SilcServerBackupProtocolContext ctx;
933 SilcSocketConnection sock;
934 SilcIDListData idata;
935 unsigned char data[2];
937 if (SILC_IS_DISCONNECTING(backup_router) ||
938 SILC_IS_DISCONNECTED(backup_router)) {
939 silc_socket_free(backup_router);
945 SilcServerConfigRouter *primary;
946 primary = silc_server_config_get_primary_router(server);
948 if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
949 primary->host, primary->port))
950 silc_server_backup_reconnect(server,
951 primary->host, primary->port,
952 silc_server_backup_connect_primary,
958 silc_socket_free(backup_router);
960 if (!backup_router->protocol)
962 if (!server_entry->connection)
965 ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context;
966 sock = (SilcSocketConnection)server_entry->connection;
967 idata = (SilcIDListData)server_entry;
969 SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
970 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
973 /* Send the CONNECTED packet back to the backup router. */
974 data[0] = SILC_SERVER_BACKUP_CONNECTED;
975 data[1] = ctx->session;
976 silc_server_packet_send(server, backup_router,
977 SILC_PACKET_RESUME_ROUTER, 0, data, 2, FALSE);
979 /* The primary connection is disabled until it sends the RESUMED packet
981 idata->status |= SILC_IDLIST_STATUS_DISABLED;
983 /* Move this protocol context from this backup router connection to
984 the primary router connection since it will send the subsequent
985 packets in this protocol. We don't talk with backup router
987 sock->protocol = backup_router->protocol;
989 silc_socket_free(ctx->sock); /* unref */
990 ctx->sock = silc_socket_dup(server_entry->connection);
991 backup_router->protocol = NULL;
994 /* Timeout callback used by the backup router to send the ENDING packet
995 to primary router to indicate that it can now resume as being primary
996 router. All CONNECTED packets has been received when we reach this. */
998 SILC_TASK_CALLBACK(silc_server_backup_send_resumed)
1000 SilcProtocol protocol = (SilcProtocol)context;
1001 SilcServerBackupProtocolContext ctx = protocol->context;
1002 SilcServer server = ctx->server;
1003 unsigned char data[2];
1006 SILC_LOG_DEBUG(("Start"));
1008 for (i = 0; i < ctx->sessions_count; i++)
1009 if (ctx->sessions[i].server_entry == ctx->sock->user_data)
1010 ctx->session = ctx->sessions[i].session;
1012 /* We've received all the CONNECTED packets and now we'll send the
1013 ENDING packet to the new primary router. */
1014 data[0] = SILC_SERVER_BACKUP_ENDING;
1015 data[1] = ctx->session;
1016 silc_server_packet_send(server, ctx->sock, SILC_PACKET_RESUME_ROUTER, 0,
1017 data, sizeof(data), FALSE);
1019 /* The protocol will go to END state. */
1020 protocol->state = SILC_PROTOCOL_STATE_END;
1023 /* Backup resuming protocol. This protocol is executed when the primary
1024 router wants to resume its position as being primary router. */
1026 SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
1028 SilcProtocol protocol = (SilcProtocol)context;
1029 SilcServerBackupProtocolContext ctx = protocol->context;
1030 SilcServer server = ctx->server;
1031 SilcServerEntry server_entry;
1032 SilcSocketConnection sock = NULL;
1033 unsigned char data[2];
1036 if (protocol->state == SILC_PROTOCOL_STATE_UNKNOWN)
1037 protocol->state = SILC_PROTOCOL_STATE_START;
1039 switch(protocol->state) {
1040 case SILC_PROTOCOL_STATE_START:
1041 if (ctx->responder == FALSE) {
1043 * Initiator (backup router)
1046 /* Send the START packet to primary router and normal servers. The
1047 packet will indicate to the primary router that it has been replaced
1048 by us. For normal servers it means that we will be resigning as
1049 being primary router shortly. */
1050 for (i = 0; i < server->config->param.connections_max; i++) {
1051 sock = server->sockets[i];
1052 if (!sock || !sock->user_data ||
1053 sock->user_data == server->id_entry ||
1054 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1055 sock->type != SILC_SOCKET_TYPE_SERVER))
1058 server_entry = sock->user_data;
1059 if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
1062 ctx->sessions = silc_realloc(ctx->sessions,
1063 sizeof(*ctx->sessions) *
1064 (ctx->sessions_count + 1));
1065 ctx->sessions[ctx->sessions_count].session = ctx->sessions_count;
1066 ctx->sessions[ctx->sessions_count].connected = FALSE;
1067 ctx->sessions[ctx->sessions_count].server_entry = server_entry;
1069 SILC_LOG_DEBUG(("Sending START to %s (session %d)",
1070 server_entry->server_name, ctx->sessions_count));
1071 SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
1072 server_entry->server_name, ctx->sessions_count));
1074 /* This connection is performing this protocol too now */
1075 sock->protocol = protocol;
1077 data[0] = SILC_SERVER_BACKUP_START;
1078 data[1] = ctx->sessions_count;
1079 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1080 data, sizeof(data), FALSE);
1081 ctx->sessions_count++;
1084 /* If we are not standalone and our primary is not the one we're
1085 talking to now, then announce our information to it since we
1086 haven't done that yet. Standalone backup router announces
1087 these during connecting to the primary. */
1088 if (!server->standalone && SILC_PRIMARY_ROUTE(server) != ctx->sock) {
1089 silc_server_announce_servers(server, TRUE, 0, ctx->sock);
1090 silc_server_announce_clients(server, 0, ctx->sock);
1091 silc_server_announce_channels(server, 0, ctx->sock);
1098 * Responder (all servers and routers)
1100 SilcServerConfigRouter *primary;
1102 /* We should have received START packet */
1103 if (ctx->type != SILC_SERVER_BACKUP_START) {
1104 SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
1108 /* Connect to the primary router that was down that is now supposed
1109 to be back online. We send the CONNECTED packet after we've
1110 established the connection to the primary router. */
1111 primary = silc_server_config_get_primary_router(server);
1112 if (primary && server->backup_primary &&
1113 !silc_server_num_sockets_by_remote(server,
1114 silc_net_is_ip(primary->host) ?
1115 primary->host : NULL,
1116 silc_net_is_ip(primary->host) ?
1117 NULL : primary->host,
1119 SILC_SOCKET_TYPE_ROUTER)) {
1120 SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
1122 silc_server_backup_reconnect(server,
1123 primary->host, primary->port,
1124 silc_server_backup_connect_primary,
1125 silc_socket_dup(ctx->sock));
1127 /* Nowhere to connect just return the CONNECTED packet */
1128 SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
1130 SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1133 /* Send the CONNECTED packet back to the backup router. */
1134 data[0] = SILC_SERVER_BACKUP_CONNECTED;
1135 data[1] = ctx->session;
1136 silc_server_packet_send(server, ctx->sock,
1137 SILC_PACKET_RESUME_ROUTER, 0,
1138 data, sizeof(data), FALSE);
1141 /* Add this resuming session */
1142 ctx->sessions = silc_realloc(ctx->sessions,
1143 sizeof(*ctx->sessions) *
1144 (ctx->sessions_count + 1));
1145 ctx->sessions[ctx->sessions_count].session = ctx->session;
1146 ctx->sessions_count++;
1148 /* Normal server goes directly to the END state. */
1149 if (server->server_type == SILC_ROUTER &&
1151 server->router->data.status & SILC_IDLIST_STATUS_DISABLED))
1154 protocol->state = SILC_PROTOCOL_STATE_END;
1159 if (ctx->responder == FALSE) {
1161 * Initiator (backup router)
1164 /* We should have received CONNECTED packet */
1165 if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
1166 SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
1170 for (i = 0; i < ctx->sessions_count; i++) {
1171 if (ctx->sessions[i].session == ctx->session) {
1172 ctx->sessions[i].connected = TRUE;
1173 SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
1174 ctx->sessions[i].server_entry->server_name,
1176 SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
1181 /* See if all returned CONNECTED, if not, then continue waiting. */
1182 for (i = 0; i < ctx->sessions_count; i++) {
1183 if (!ctx->sessions[i].connected)
1187 SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
1189 SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
1191 /* The ENDING is sent with timeout, and then we continue to the
1192 END state in the protocol. */
1193 silc_schedule_task_add(server->schedule, 0,
1194 silc_server_backup_send_resumed,
1195 protocol, 1, 0, SILC_TASK_TIMEOUT,
1196 SILC_TASK_PRI_NORMAL);
1201 * Responder (primary router)
1204 /* We should have been received ENDING packet */
1205 if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
1206 SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
1210 SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now"));
1212 /* Switch announced informations to our primary router of using the
1214 silc_server_local_servers_toggle_enabled(server, TRUE);
1215 silc_server_update_servers_by_server(server, ctx->sock->user_data,
1217 silc_server_update_clients_by_server(server, ctx->sock->user_data,
1218 server->router, TRUE);
1220 /* We as primary router now must send RESUMED packets to all servers
1221 and routers so that they know we are back. For backup router we
1222 send the packet last so that we give the backup as much time as
1223 possible to deal with message routing at this critical moment. */
1224 for (i = 0; i < server->config->param.connections_max; i++) {
1225 sock = server->sockets[i];
1226 if (!sock || !sock->user_data ||
1227 sock->user_data == server->id_entry ||
1228 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1229 sock->type != SILC_SOCKET_TYPE_SERVER))
1232 /* Send to backup last */
1233 if (sock == ctx->sock)
1237 server_entry = sock->user_data;
1238 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1240 SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1241 SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1243 /* This connection is performing this protocol too now */
1244 sock->protocol = protocol;
1246 data[0] = SILC_SERVER_BACKUP_RESUMED;
1248 silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1249 data, sizeof(data), FALSE);
1250 silc_server_packet_queue_purge(server,sock);
1253 /* Now send the same packet to backup */
1254 if (sock != ctx->sock) {
1257 goto send_to_backup;
1260 /* We are now resumed and are back as primary router in the cell. */
1261 SILC_LOG_INFO(("We are now the primary router of our cell again"));
1262 server->wait_backup = FALSE;
1264 /* For us this is the end of this protocol. */
1265 if (protocol->final_callback)
1266 silc_protocol_execute_final(protocol, server->schedule);
1268 silc_protocol_free(protocol);
1272 case SILC_PROTOCOL_STATE_END:
1275 * Responder (backup router, servers, and remote router)
1277 SilcServerEntry router, backup_router;
1279 /* We should have been received RESUMED from our primary router. */
1280 if (ctx->type != SILC_SERVER_BACKUP_RESUMED) {
1281 SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
1285 SILC_LOG_INFO(("Received RESUMED from new primary router"));
1287 /* If we are the backup router, mark that we are no longer primary
1288 but are back to backup router status. */
1289 if (server->backup_router)
1290 server->server_type = SILC_BACKUP_ROUTER;
1292 /* We have now new primary router. All traffic goes there from now on. */
1293 router = ctx->sock->user_data;
1294 if (silc_server_backup_replaced_get(server, router->id,
1297 if (backup_router == server->router) {
1298 /* We have new primary router now */
1299 server->id_entry->router = router;
1300 server->router = router;
1301 SILC_LOG_INFO(("Switching back to primary router %s",
1302 server->router->server_name));
1304 /* We are connected to new primary and now continue using it */
1305 SILC_LOG_INFO(("Resuming the use of primary router %s",
1306 router->server_name));
1308 server->backup_primary = FALSE;
1309 sock = router->connection;
1311 /* Update the client entries of the backup router to the new
1313 silc_server_local_servers_toggle_enabled(server, FALSE);
1314 router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1315 silc_server_update_servers_by_server(server, backup_router, router);
1316 silc_server_update_clients_by_server(server, NULL, router, FALSE);
1317 if (server->server_type == SILC_SERVER)
1318 silc_server_update_channels_by_server(server, backup_router, router);
1319 silc_server_backup_replaced_del(server, backup_router);
1322 /* Send notify about primary router going down to local operators */
1323 SILC_SERVER_SEND_OPERS(server, FALSE, TRUE,
1324 SILC_NOTIFY_TYPE_NONE,
1325 ("%s resumed the use of primary router %s",
1326 server->server_name,
1327 server->router->server_name));
1329 /* Protocol has ended, call the final callback */
1330 if (protocol->final_callback)
1331 silc_protocol_execute_final(protocol, server->schedule);
1333 silc_protocol_free(protocol);
1337 case SILC_PROTOCOL_STATE_ERROR:
1338 /* Protocol has ended, call the final callback */
1339 if (protocol->final_callback)
1340 silc_protocol_execute_final(protocol, server->schedule);
1342 silc_protocol_free(protocol);
1345 case SILC_PROTOCOL_STATE_FAILURE:
1346 /* Protocol has ended, call the final callback */
1347 SILC_LOG_ERROR(("Error during backup resume: received Failure"));
1348 ctx->received_failure = TRUE;
1349 if (protocol->final_callback)
1350 silc_protocol_execute_final(protocol, server->schedule);
1352 silc_protocol_free(protocol);
1355 case SILC_PROTOCOL_STATE_UNKNOWN:
1360 /* Final resuming protocol completion callback */
1362 SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
1364 SilcProtocol protocol = (SilcProtocol)context;
1365 SilcServerBackupProtocolContext ctx = protocol->context;
1366 SilcServer server = ctx->server;
1367 SilcServerEntry server_entry;
1368 SilcSocketConnection sock;
1372 silc_schedule_task_del_by_context(server->schedule, protocol);
1374 error = (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
1375 protocol->state == SILC_PROTOCOL_STATE_FAILURE);
1378 SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
1379 if (server->server_type == SILC_SERVER)
1380 silc_schedule_task_del_by_callback(server->schedule,
1381 silc_server_backup_connect_to_router);
1384 if (server->server_shutdown)
1387 /* Remove this protocol from all server entries that has it */
1388 for (i = 0; i < server->config->param.connections_max; i++) {
1389 sock = server->sockets[i];
1390 if (!sock || !sock->user_data ||
1391 (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1392 sock->type != SILC_SOCKET_TYPE_SERVER))
1395 server_entry = sock->user_data;
1397 /* The SilcProtocol context was shared between all connections, clear
1398 it from all connections. */
1399 if (sock->protocol == protocol) {
1400 sock->protocol = NULL;
1404 if (server->server_type == SILC_SERVER &&
1405 server_entry->server_type == SILC_ROUTER)
1409 if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
1410 if (ctx->sock == sock) {
1411 silc_socket_free(sock); /* unref */
1415 if (!ctx->received_failure) {
1416 /* Protocol error, probably timeout. Just restart the protocol. */
1417 SilcServerBackupProtocolContext proto_ctx;
1419 /* Restart the protocol. */
1420 proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
1421 proto_ctx->server = server;
1422 proto_ctx->sock = silc_socket_dup(sock);
1423 proto_ctx->responder = FALSE;
1424 proto_ctx->type = SILC_SERVER_BACKUP_START;
1425 proto_ctx->start = time(0);
1427 /* Start through scheduler */
1428 silc_schedule_task_add(server->schedule, 0,
1429 silc_server_backup_connected_later,
1432 SILC_TASK_PRI_NORMAL);
1434 /* If failure was received, switch back to normal backup router.
1435 For some reason primary wouldn't accept that we were supposed
1436 to perfom resuming protocol. */
1437 server->server_type = SILC_BACKUP_ROUTER;
1438 silc_server_local_servers_toggle_enabled(server, FALSE);
1439 silc_server_update_servers_by_server(server, server->id_entry,
1441 silc_server_update_clients_by_server(server, NULL,
1442 sock->user_data, FALSE);
1444 /* Announce our clients and channels to the router */
1445 silc_server_announce_clients(server, ctx->start, sock);
1446 silc_server_announce_channels(server, ctx->start, sock);
1453 server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1458 SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
1460 if (ctx->type == SILC_SERVER_BACKUP_RESUMED && server->router) {
1461 /* Announce all of our information to the router. */
1462 if (server->server_type == SILC_ROUTER)
1463 silc_server_announce_servers(server, FALSE, ctx->start,
1464 server->router->connection);
1466 /* Announce our clients and channels to the router */
1467 silc_server_announce_clients(server, ctx->start,
1468 server->router->connection);
1469 silc_server_announce_channels(server, ctx->start,
1470 server->router->connection);
1475 if (server->server_type == SILC_SERVER) {
1476 /* If we are still using backup router Send confirmation to backup
1477 that using it is still ok and continue sending traffic there.
1478 The backup will reply with error if it's not ok. */
1479 if (server->router && server->backup_primary) {
1480 /* Send START_USE just in case using backup wouldn't be ok. */
1481 silc_server_backup_send_start_use(server, server->router->connection,
1484 /* Check couple of times same START_USE just in case. */
1485 silc_schedule_task_add(server->schedule, 0,
1486 silc_server_backup_check_status,
1487 silc_socket_dup(server->router->connection),
1488 5, 1, SILC_TASK_TIMEOUT,
1489 SILC_TASK_PRI_NORMAL);
1490 silc_schedule_task_add(server->schedule, 0,
1491 silc_server_backup_check_status,
1492 silc_socket_dup(server->router->connection),
1493 20, 1, SILC_TASK_TIMEOUT,
1494 SILC_TASK_PRI_NORMAL);
1495 silc_schedule_task_add(server->schedule, 0,
1496 silc_server_backup_check_status,
1497 silc_socket_dup(server->router->connection),
1498 60, 1, SILC_TASK_TIMEOUT,
1499 SILC_TASK_PRI_NORMAL);
1504 if (ctx->sock && ctx->sock->protocol)
1505 ctx->sock->protocol = NULL;
1507 silc_socket_free(ctx->sock); /* unref */
1508 silc_protocol_free(protocol);
1509 silc_free(ctx->sessions);