X-Git-Url: http://git.silcnet.org/gitweb/?p=silc.git;a=blobdiff_plain;f=apps%2Fsilcd%2Fserver_backup.c;h=78634fe2e02fcc35a8b58da88df56808590256ce;hp=b70bd095966cac93bbc527ece3e9cd8c31794fc9;hb=382d15d447b7a95390decfa783836ae4fe255b3d;hpb=33b943ac6034b92fcc5c09802f9fe9add12a51c9 diff --git a/apps/silcd/server_backup.c b/apps/silcd/server_backup.c index b70bd095..78634fe2 100644 --- a/apps/silcd/server_backup.c +++ b/apps/silcd/server_backup.c @@ -4,7 +4,7 @@ Author: Pekka Riikonen - Copyright (C) 2001 Pekka Riikonen + Copyright (C) 2001 - 2002 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,10 +22,15 @@ #include "server_internal.h" SILC_TASK_CALLBACK(silc_server_protocol_backup_done); +static void silc_server_backup_connect_primary(SilcServer server, + SilcServerEntry server_entry, + void *context); /* Backup router */ typedef struct { SilcServerEntry server; + SilcIDIP ip; + SilcUInt16 port; bool local; } SilcServerBackupEntry; @@ -33,20 +38,20 @@ typedef struct { by backup router. */ typedef struct { SilcIDIP ip; - uint16 port; + SilcUInt16 port; SilcServerEntry server; /* Backup router that replaced the primary */ } SilcServerBackupReplaced; /* Backup context */ struct SilcServerBackupStruct { SilcServerBackupEntry *servers; - uint32 servers_count; + SilcUInt32 servers_count; SilcServerBackupReplaced **replaced; - uint32 replaced_count; + SilcUInt32 replaced_count; }; typedef struct { - uint8 session; + SilcUInt8 session; bool connected; SilcServerEntry server_entry; } SilcServerBackupProtocolSession; @@ -56,30 +61,46 @@ typedef struct { SilcServer server; SilcSocketConnection sock; bool responder; - uint8 type; - uint8 session; + SilcUInt8 type; + SilcUInt8 session; SilcServerBackupProtocolSession *sessions; - uint32 sessions_count; + SilcUInt32 sessions_count; long start; } *SilcServerBackupProtocolContext; -/* Sets the `backup_server' to be one of our backup router. This can be - called multiple times to set multiple backup routers. If `local' is - TRUE then the `backup_server' is in the local cell, if FALSE it is - in some other cell. */ +/* Adds the `backup_server' to be one of our backup router. This can be + called multiple times to set multiple backup routers. The `ip' and `port' + is the IP and port that the `backup_router' will replace if the `ip' + will become unresponsive. If `local' is TRUE then the `backup_server' is + in the local cell, if FALSE it is in some other cell. */ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server, - bool local) + const char *ip, int port, bool local) { int i; - if (!server->backup) + if (!ip) + return; + + if (!server->backup) { server->backup = silc_calloc(1, sizeof(*server->backup)); + if (!server->backup) + return; + } + + SILC_LOG_DEBUG(("Backup router %s will replace %s", + ((SilcSocketConnection)backup_server->connection)->ip, + ip, port)); for (i = 0; i < server->backup->servers_count; i++) { if (!server->backup->servers[i].server) { server->backup->servers[i].server = backup_server; server->backup->servers[i].local = local; + server->backup->servers[i].port = htons(port); + memset(server->backup->servers[i].ip.data, 0, + sizeof(server->backup->servers[i].ip.data)); + silc_net_addr2bin(ip, server->backup->servers[i].ip.data, + sizeof(server->backup->servers[i].ip.data)); return; } } @@ -90,49 +111,83 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server, (i + 1)); server->backup->servers[i].server = backup_server; server->backup->servers[i].local = local; + server->backup->servers[i].port = htons(port); + memset(server->backup->servers[i].ip.data, 0, + sizeof(server->backup->servers[i].ip.data)); + silc_net_addr2bin(ip, server->backup->servers[i].ip.data, + sizeof(server->backup->servers[i].ip.data)); server->backup->servers_count++; } -/* Returns the first backup router context. Returns NULL if we do not have - any backup servers. */ +/* Returns backup router for IP and port in `replacing' or NULL if there + does not exist backup router. */ -SilcServerEntry silc_server_backup_get(SilcServer server) +SilcServerEntry silc_server_backup_get(SilcServer server, + SilcServerID *server_id) { - SilcServerEntry backup_router; int i; if (!server->backup) return NULL; for (i = 0; i < server->backup->servers_count; i++) { - if (server->backup->servers[i].server) { - backup_router = server->backup->servers[i].server; - server->backup->servers[i].server = NULL; - return backup_router; + if (server->backup->servers[i].server && + server->backup->servers[i].port == server_id->port && + !memcmp(server->backup->servers[i].ip.data, server_id->ip.data, + sizeof(server_id->ip.data))) { + SILC_LOG_DEBUG(("Found backup router %s for %s", + server->backup->servers[i].server->server_name, + silc_id_render(server_id, SILC_ID_SERVER))); + return server->backup->servers[i].server; } } return NULL; } -/* Deletes the backup server `server_entry. */ +/* Deletes the backup server `server_entry'. */ -void silc_server_backup_del(SilcServer server, - SilcServerEntry server_entry) +void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry) { int i; if (!server->backup) - return ; + return; for (i = 0; i < server->backup->servers_count; i++) { if (server->backup->servers[i].server == server_entry) { + SILC_LOG_DEBUG(("Removing %s as backup router", + silc_id_render(server->backup->servers[i].server->id, + SILC_ID_SERVER))); server->backup->servers[i].server = NULL; - return; + memset(server->backup->servers[i].ip.data, 0, + sizeof(server->backup->servers[i].ip.data)); } } } +/* Frees all data allocated for backup routers. Call this after deleting + all backup routers and when new routers are added no more, for example + when shutting down the server. */ + +void silc_server_backup_free(SilcServer server) +{ + int i; + + if (!server->backup) + return; + + /* Delete existing servers if caller didn't do it */ + for (i = 0; i < server->backup->servers_count; i++) { + if (server->backup->servers[i].server) + silc_server_backup_del(server, server->backup->servers[i].server); + } + + silc_free(server->backup->servers); + silc_free(server->backup); + server->backup = NULL; +} + /* Marks the IP address and port from the `server_id' as being replaced by backup router indicated by the `server'. If the router connects at a later time we can check whether it has been replaced by an backup @@ -153,11 +208,11 @@ void silc_server_backup_replaced_add(SilcServer server, server->backup->replaced_count = 1; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Replaced added")); + SILC_LOG_DEBUG(("Replacing router %s with %s", + silc_id_render(server_id, SILC_ID_SERVER), + server_entry->server_name)); memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip)); - //r->port = server_id->port; r->server = server_entry; for (i = 0; i < server->backup->replaced_count; i++) { @@ -186,27 +241,25 @@ bool silc_server_backup_replaced_get(SilcServer server, { int i; - SILC_LOG_DEBUG(("***********************************************")); - if (!server->backup || !server->backup->replaced) return FALSE; for (i = 0; i < server->backup->replaced_count; i++) { if (!server->backup->replaced[i]) continue; - SILC_LOG_HEXDUMP(("IP"), server_id->ip.data, server_id->ip.data_len); - SILC_LOG_HEXDUMP(("IP"), server->backup->replaced[i]->ip.data, - server->backup->replaced[i]->ip.data_len); - if (!memcmp(&server->backup->replaced[i]->ip, &server_id->ip, - sizeof(server_id->ip))) { + if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data, + sizeof(server_id->ip.data))) { if (server_entry) *server_entry = server->backup->replaced[i]->server; - SILC_LOG_DEBUG(("REPLACED")); + SILC_LOG_DEBUG(("Router %s is replaced by %s", + silc_id_render(server_id, SILC_ID_SERVER), + server->backup->replaced[i]->server->server_name)); return TRUE; } } - SILC_LOG_DEBUG(("NOT REPLACED")); + SILC_LOG_DEBUG(("Router %s is not replaced by backup router", + silc_id_render(server_id, SILC_ID_SERVER))); return FALSE; } @@ -243,6 +296,7 @@ void silc_server_backup_broadcast(SilcServer server, SilcServerEntry backup; SilcSocketConnection sock; SilcBuffer buffer; + const SilcBufferStruct p; SilcIDListData idata; int i; @@ -260,20 +314,31 @@ void silc_server_backup_broadcast(SilcServer server, if (!backup || backup->connection == sender || server->backup->servers[i].local == FALSE) continue; + if (server->backup->servers[i].server == server->id_entry) + continue; idata = (SilcIDListData)backup; sock = backup->connection; - silc_packet_send_prepare(sock, 0, 0, buffer->len); - silc_buffer_put(sock->outbuf, buffer->data, buffer->len); - silc_packet_encrypt(idata->send_key, idata->hmac_send, - sock->outbuf, sock->outbuf->len); + if (!silc_packet_send_prepare(sock, 0, 0, buffer->len, idata->hmac_send, + (const SilcBuffer)&p)) { + SILC_LOG_ERROR(("Cannot send packet")); + return; + } + silc_buffer_put((SilcBuffer)&p, buffer->data, buffer->len); + silc_packet_encrypt(idata->send_key, idata->hmac_send, idata->psn_send++, + (SilcBuffer)&p, p.len); - SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", sock->outbuf->len), - sock->outbuf->data, sock->outbuf->len); + SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", p.len), p.data, p.len); /* Now actually send the packet */ silc_server_packet_send_real(server, sock, FALSE); + + /* Check for mandatory rekey */ + if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD) + silc_schedule_task_add(server->schedule, sender->sock, + silc_server_rekey_callback, sender, 0, 1, + SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL); } } @@ -292,7 +357,7 @@ void silc_server_backup_send(SilcServer server, SilcPacketType type, SilcPacketFlags flags, unsigned char *data, - uint32 data_len, + SilcUInt32 data_len, bool force_send, bool local) { @@ -303,20 +368,20 @@ void silc_server_backup_send(SilcServer server, if (!server->backup || server->server_type != SILC_ROUTER) return; - SILC_LOG_DEBUG(("Start")); - for (i = 0; i < server->backup->servers_count; i++) { backup = server->backup->servers[i].server; - if (!backup) + if (!backup || sender == backup) continue; - - if (sender == backup) - continue; - if (local && server->backup->servers[i].local == FALSE) continue; + if (server->backup->servers[i].server == server->id_entry) + continue; sock = backup->connection; + + SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)", + silc_get_packet_name(type), sock->hostname, sock->ip)); + silc_server_packet_send(server, backup->connection, type, flags, data, data_len, force_send); } @@ -334,7 +399,7 @@ void silc_server_backup_send_dest(SilcServer server, void *dst_id, SilcIdType dst_id_type, unsigned char *data, - uint32 data_len, + SilcUInt32 data_len, bool force_send, bool local) { @@ -345,26 +410,37 @@ void silc_server_backup_send_dest(SilcServer server, if (!server->backup || server->server_type != SILC_ROUTER) return; - SILC_LOG_DEBUG(("Start")); - for (i = 0; i < server->backup->servers_count; i++) { backup = server->backup->servers[i].server; - if (!backup) + if (!backup || sender == backup) continue; - - if (sender == backup) - continue; - if (local && server->backup->servers[i].local == FALSE) continue; + if (server->backup->servers[i].server == server->id_entry) + continue; sock = backup->connection; + + SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)", + silc_get_packet_name(type), sock->hostname, sock->ip)); + silc_server_packet_send_dest(server, backup->connection, type, flags, dst_id, dst_id_type, data, data_len, force_send); } } +SILC_TASK_CALLBACK(silc_server_backup_timeout) +{ + SilcProtocol protocol = context; + SilcServer server = app_context; + + SILC_LOG_INFO(("Timeout occurred during backup resuming protocol")); + silc_protocol_cancel(protocol, server->schedule); + protocol->state = SILC_PROTOCOL_STATE_ERROR; + silc_protocol_execute_final(protocol, server->schedule); +} + /* Processes incoming RESUME_ROUTER packet. This can give the packet for processing to the protocol handler or allocate new protocol if start command is received. */ @@ -373,34 +449,55 @@ void silc_server_backup_resume_router(SilcServer server, SilcSocketConnection sock, SilcPacketContext *packet) { - uint8 type, session; - int ret; + SilcUInt8 type, session; + SilcServerBackupProtocolContext ctx; + int i, ret; if (sock->type == SILC_SOCKET_TYPE_CLIENT || - sock->type == SILC_SOCKET_TYPE_UNKNOWN) + sock->type == SILC_SOCKET_TYPE_UNKNOWN) { + SILC_LOG_DEBUG(("Bad packet received")); return; - - SILC_LOG_DEBUG(("Start")); + } ret = silc_buffer_unformat(packet->buffer, SILC_STR_UI_CHAR(&type), SILC_STR_UI_CHAR(&session), SILC_STR_END); - if (ret < 0) + if (ret < 0) { + SILC_LOG_ERROR(("Malformed resume router packet received")); return; + } + /* Activate the protocol for this socket if necessary */ + if ((type == SILC_SERVER_BACKUP_RESUMED || + type == SILC_SERVER_BACKUP_RESUMED_GLOBAL) && + sock->type == SILC_SOCKET_TYPE_ROUTER && !sock->protocol && + ((SilcIDListData)sock->user_data)->status & + SILC_IDLIST_STATUS_DISABLED) { + SilcServerEntry backup_router; + + if (silc_server_backup_replaced_get(server, + ((SilcServerEntry)sock-> + user_data)->id, + &backup_router)) { + SilcSocketConnection bsock = + (SilcSocketConnection)backup_router->connection; + if (bsock->protocol && bsock->protocol->protocol && + bsock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) { + sock->protocol = bsock->protocol; + ctx = sock->protocol->context; + ctx->sock = sock; + } + } + } + /* If the backup resuming protocol is active then process the packet in the protocol. */ if (sock->protocol && sock->protocol->protocol && sock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) { - SilcServerBackupProtocolContext ctx = sock->protocol->context; - int i; - + ctx = sock->protocol->context; ctx->type = type; - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Continuing protocol, type %d", type)); - if (type != SILC_SERVER_BACKUP_RESUMED && type != SILC_SERVER_BACKUP_RESUMED_GLOBAL) { for (i = 0; i < ctx->sessions_count; i++) { @@ -415,7 +512,7 @@ void silc_server_backup_resume_router(SilcServer server, return; } - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", type)); return; } @@ -425,14 +522,14 @@ void silc_server_backup_resume_router(SilcServer server, immediately after we've connected to our primary router. */ if (sock->type == SILC_SOCKET_TYPE_ROUTER && - server->router == sock->user_data && + sock && SILC_PRIMARY_ROUTE(server) == sock && type == SILC_SERVER_BACKUP_REPLACED) { /* We have been replaced by an backup router in our cell. We must mark our primary router connection disabled since we are not allowed to use it at this moment. */ SilcIDListData idata = (SilcIDListData)sock->user_data; - - SILC_LOG_DEBUG(("We are replaced by an backup router in this cell")); + SILC_LOG_INFO(("We are replaced by an backup router in this cell, will " + "wait until backup resuming protocol is executed")); idata->status |= SILC_IDLIST_STATUS_DISABLED; return; } @@ -451,12 +548,17 @@ void silc_server_backup_resume_router(SilcServer server, proto_ctx->start = time(0); SILC_LOG_DEBUG(("Starting backup resuming protocol as responder")); + SILC_LOG_INFO(("Starting backup resuming protocol")); /* Run the backup resuming protocol */ silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP, &sock->protocol, proto_ctx, silc_server_protocol_backup_done); silc_protocol_execute(sock->protocol, server->schedule, 0, 0); + silc_schedule_task_add(server->schedule, sock->sock, + silc_server_backup_timeout, + sock->protocol, 30, 0, SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); } } @@ -464,21 +566,23 @@ void silc_server_backup_resume_router(SilcServer server, SILC_TASK_CALLBACK(silc_server_backup_connect_to_router) { + SilcServer server = app_context; SilcServerConnection sconn = (SilcServerConnection)context; - SilcServer server = sconn->server; int sock; + const char *server_ip; SILC_LOG_DEBUG(("Connecting to router %s:%d", sconn->remote_host, sconn->remote_port)); /* Connect to remote host */ - sock = silc_net_create_connection(server->config->listen_port->local_ip, - sconn->remote_port, + server_ip = server->config->server_info->primary == NULL ? NULL : + server->config->server_info->primary->server_ip; + sock = silc_net_create_connection(server_ip, sconn->remote_port, sconn->remote_host); if (sock < 0) { silc_schedule_task_add(server->schedule, 0, silc_server_backup_connect_to_router, - context, 2, 0, SILC_TASK_TIMEOUT, + context, 5, 0, SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL); return; } @@ -492,24 +596,29 @@ SILC_TASK_CALLBACK(silc_server_backup_connect_to_router) connection is created. */ void silc_server_backup_reconnect(SilcServer server, - const char *ip, uint16 port, + const char *ip, SilcUInt16 port, SilcServerConnectRouterCallback callback, void *context) { SilcServerConnection sconn; + SILC_LOG_INFO(("Attempting to reconnect to primary router")); + sconn = silc_calloc(1, sizeof(*sconn)); - sconn->server = server; sconn->remote_host = strdup(ip); sconn->remote_port = port; sconn->callback = callback; sconn->callback_context = context; + sconn->no_reconnect = TRUE; silc_schedule_task_add(server->schedule, 0, silc_server_backup_connect_to_router, - sconn, 2, 0, SILC_TASK_TIMEOUT, + sconn, 1, 0, SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL); } +/* Task that is called after backup router has connected back to + primary router and we are starting the resuming protocol */ + SILC_TASK_CALLBACK(silc_server_backup_connected_later) { SilcServerBackupProtocolContext proto_ctx = @@ -518,12 +627,18 @@ SILC_TASK_CALLBACK(silc_server_backup_connected_later) SilcSocketConnection sock = proto_ctx->sock; SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator")); + SILC_LOG_INFO(("Starting backup resuming protocol")); /* Run the backup resuming protocol */ silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP, &sock->protocol, proto_ctx, silc_server_protocol_backup_done); silc_protocol_execute(sock->protocol, server->schedule, 0, 0); + + silc_schedule_task_add(server->schedule, sock->sock, + silc_server_backup_timeout, + sock->protocol, 30, 0, SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); } /* Called when we've established connection back to our primary router @@ -535,8 +650,21 @@ void silc_server_backup_connected(SilcServer server, void *context) { SilcServerBackupProtocolContext proto_ctx; - SilcSocketConnection sock = (SilcSocketConnection)server_entry->connection; + SilcSocketConnection sock; + if (!server_entry) { + /* Try again */ + SilcServerConfigRouter *primary; + primary = silc_server_config_get_primary_router(server); + if (primary) + silc_server_backup_reconnect(server, + primary->host, primary->port, + silc_server_backup_connected, + context); + return; + } + + sock = (SilcSocketConnection)server_entry->connection; proto_ctx = silc_calloc(1, sizeof(*proto_ctx)); proto_ctx->server = server; proto_ctx->sock = sock; @@ -562,14 +690,30 @@ static void silc_server_backup_connect_primary(SilcServer server, void *context) { SilcSocketConnection backup_router = (SilcSocketConnection)context; - SilcSocketConnection sock = (SilcSocketConnection)server_entry->connection; - SilcIDListData idata = (SilcIDListData)server_entry; - SilcServerBackupProtocolContext ctx = - (SilcServerBackupProtocolContext)backup_router->protocol->context; + SilcServerBackupProtocolContext ctx; + SilcSocketConnection sock; + SilcIDListData idata; SilcBuffer buffer; - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending CONNECTED packet, session %d", ctx->session)); + if (!server_entry) { + /* Try again */ + SilcServerConfigRouter *primary; + primary = silc_server_config_get_primary_router(server); + if (primary) + silc_server_backup_reconnect(server, + primary->host, primary->port, + silc_server_backup_connect_primary, + context); + return; + } + + ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context; + sock = (SilcSocketConnection)server_entry->connection; + idata = (SilcIDListData)server_entry; + + SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session)); + SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router", + ctx->session)); /* Send the CONNECTED packet back to the backup router. */ buffer = silc_buffer_alloc(2); @@ -596,59 +740,33 @@ static void silc_server_backup_connect_primary(SilcServer server, backup_router->protocol = NULL; } -/* Resume protocol with RESUME_ROUTER packet: - - SILC_PACKET_RESUME_ROUTER: - - - - = the protocol opcode - = Identifier for this packet and any subsequent reply - packets must include this identifier. - - Types: - - 1 = To router: Comensing backup resuming protocol. This will - indicate that the sender is backup router acting as primary - and the receiver is primary router that has been replaced by - the backup router. - - To server. Comensing backup resuming protocol. This will - indicate that the sender is backup router and the receiver - must reconnect to the real primary router of the cell. - - 2 = To Router: Comesning backup resuming protocol in another - cell. The receiver will connect to its primary router - (the router that is now online again) but will not use - the link. If the receiver is not configured to connect - to any router it does as locally configured. The sender - is always backup router. - - To server: this is never sent to server. - - 3 = To backup router: Sender is normal server or router and it - tells to backup router that they have connected to the - primary router. Backup router never sends this type. - - 4 = To router: Ending backup resuming protocol. This is sent - to the real primary router to tell that it can take over - the task as being primary router. - - To server: same as sending for router. - - Backup router sends this also to the primary route but only - after it has sent them to normal servers and has purged all - traffic coming from normal servers. - - 5 = To router: Sender is the real primary router after it has - received type 4 from backup router. To tell that it is again - primary router of the cell. - - 20 = To router: This is sent only when router is connecting to - another router and has been replaced by an backup router. - The sender knows that the connectee has been replaced. +SILC_TASK_CALLBACK(silc_server_backup_send_resumed) +{ + SilcProtocol protocol = (SilcProtocol)context; + SilcServerBackupProtocolContext ctx = protocol->context; + SilcServer server = ctx->server; + SilcBuffer packet; + int i; - */ + for (i = 0; i < ctx->sessions_count; i++) + if (ctx->sessions[i].server_entry == ctx->sock->user_data) + ctx->session = ctx->sessions[i].session; + + /* We've received all the CONNECTED packets and now we'll send the + ENDING packet to the new primary router. */ + packet = silc_buffer_alloc(2); + silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); + silc_buffer_format(packet, + SILC_STR_UI_CHAR(SILC_SERVER_BACKUP_ENDING), + SILC_STR_UI_CHAR(ctx->session), + SILC_STR_END); + silc_server_packet_send(server, ctx->sock, + SILC_PACKET_RESUME_ROUTER, 0, + packet->data, packet->len, FALSE); + silc_buffer_free(packet); + + protocol->state = SILC_PROTOCOL_STATE_END; +} /* Backup resuming protocol. This protocol is executed when the primary router wants to resume its position as being primary router. */ @@ -664,13 +782,9 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) SilcServerEntry server_entry; int i; - SILC_LOG_DEBUG(("Start")); - if (protocol->state == SILC_PROTOCOL_STATE_UNKNOWN) protocol->state = SILC_PROTOCOL_STATE_START; - SILC_LOG_DEBUG(("State=%d", protocol->state)); - switch(protocol->state) { case SILC_PROTOCOL_STATE_START: if (ctx->responder == FALSE) { @@ -679,16 +793,14 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) packet = silc_buffer_alloc(2); silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending START packets")); - /* Send the START packet to primary router and normal servers. */ if (silc_idcache_get_all(server->local_list->servers, &list)) { if (silc_idcache_list_first(list, &id_cache)) { while (id_cache) { server_entry = (SilcServerEntry)id_cache->context; - if ((server_entry == server->id_entry) || - !server_entry->connection) { + if (!server_entry || (server_entry == server->id_entry) || + !server_entry->connection || !server_entry->data.send_key || + (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)) { if (!silc_idcache_list_next(list, &id_cache)) break; else @@ -702,8 +814,57 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) ctx->sessions[ctx->sessions_count].connected = FALSE; ctx->sessions[ctx->sessions_count].server_entry = server_entry; - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("START for session %d", ctx->sessions_count)); + SILC_LOG_DEBUG(("Sending START to %s (session %d)", + server_entry->server_name, ctx->sessions_count)); + SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)", + server_entry->server_name, ctx->sessions_count)); + + /* This connection is performing this protocol too now */ + ((SilcSocketConnection)server_entry->connection)->protocol = + protocol; + + if (server_entry->server_type == SILC_ROUTER) + packet->data[0] = SILC_SERVER_BACKUP_START; + else + packet->data[0] = SILC_SERVER_BACKUP_START_GLOBAL; + packet->data[1] = ctx->sessions_count; + silc_server_packet_send(server, server_entry->connection, + SILC_PACKET_RESUME_ROUTER, 0, + packet->data, packet->len, FALSE); + ctx->sessions_count++; + + if (!silc_idcache_list_next(list, &id_cache)) + break; + } + } + + silc_idcache_list_free(list); + } + + if (silc_idcache_get_all(server->global_list->servers, &list)) { + if (silc_idcache_list_first(list, &id_cache)) { + while (id_cache) { + server_entry = (SilcServerEntry)id_cache->context; + if (!server_entry || (server_entry == server->id_entry) || + !server_entry->connection || !server_entry->data.send_key || + (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)) { + if (!silc_idcache_list_next(list, &id_cache)) + break; + else + continue; + } + + ctx->sessions = silc_realloc(ctx->sessions, + sizeof(*ctx->sessions) * + (ctx->sessions_count + 1)); + ctx->sessions[ctx->sessions_count].session = ctx->sessions_count; + ctx->sessions[ctx->sessions_count].connected = FALSE; + ctx->sessions[ctx->sessions_count].server_entry = server_entry; + + SILC_LOG_DEBUG(("Sending START to %s (session %d)", + server_entry->server_name, ctx->sessions_count)); + SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)", + server_entry->server_name, ctx->sessions_count)); /* This connection is performing this protocol too now */ ((SilcSocketConnection)server_entry->connection)->protocol = @@ -729,49 +890,53 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) silc_buffer_free(packet); - /* If we are router then announce our possible servers. */ - if (server->server_type == SILC_ROUTER) - silc_server_announce_servers(server, FALSE, 0, ctx->sock); - silc_server_announce_clients(server, 0, ctx->sock); - silc_server_announce_channels(server, 0, ctx->sock); + /* If we are not standalone and our primary is not the one we've + talking to now, then announce our information to it since we + haven't done that yet. Standalone backup router announces + these during connecting to the primary. */ + if (!server->standalone && SILC_PRIMARY_ROUTE(server) != ctx->sock) { + silc_server_announce_servers(server, TRUE, 0, ctx->sock); + silc_server_announce_clients(server, 0, ctx->sock); + silc_server_announce_channels(server, 0, ctx->sock); + } protocol->state++; } else { /* Responder of the protocol. */ - SilcServerConfigSectionServerConnection *primary; + SilcServerConfigRouter *primary; /* We should have received START or START_GLOBAL packet */ if (ctx->type != SILC_SERVER_BACKUP_START && ctx->type != SILC_SERVER_BACKUP_START_GLOBAL) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received START packet, reconnecting to router")); - /* Connect to the primary router that was down that is now supposed to be back online. We send the CONNECTED packet after we've established the connection to the primary router. */ - primary = silc_server_config_get_primary_router(server->config); - if (primary) { + primary = silc_server_config_get_primary_router(server); + if (primary && server->backup_primary && + !silc_server_num_sockets_by_remote(server, + silc_net_is_ip(primary->host) ? + primary->host : NULL, + silc_net_is_ip(primary->host) ? + NULL : primary->host, + primary->port, + SILC_SOCKET_TYPE_ROUTER)) { + SILC_LOG_DEBUG(("Received START (session %d), reconnect to router", + ctx->session)); silc_server_backup_reconnect(server, primary->host, primary->port, silc_server_backup_connect_primary, ctx->sock); - if (server->server_type == SILC_ROUTER && - (!server->router || - server->router->data.status & SILC_IDLIST_STATUS_DISABLED)) - protocol->state++; - else - protocol->state = SILC_PROTOCOL_STATE_END; - } else { /* Nowhere to connect just return the CONNECTED packet */ + SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back", + ctx->session)); + SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router", + ctx->session)); - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending CONNECTED packet, session %d", ctx->session)); - /* Send the CONNECTED packet back to the backup router. */ packet = silc_buffer_alloc(2); silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); @@ -783,9 +948,15 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) SILC_PACKET_RESUME_ROUTER, 0, packet->data, packet->len, FALSE); silc_buffer_free(packet); - protocol->state++; } + if (server->server_type == SILC_ROUTER && + (!server->router || + server->router->data.status & SILC_IDLIST_STATUS_DISABLED)) + protocol->state++; + else + protocol->state = SILC_PROTOCOL_STATE_END; + ctx->sessions = silc_realloc(ctx->sessions, sizeof(*ctx->sessions) * (ctx->sessions_count + 1)); @@ -800,16 +971,17 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) /* We should have received CONNECTED packet */ if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received CONNECTED packet, session %d", ctx->session)); - for (i = 0; i < ctx->sessions_count; i++) { if (ctx->sessions[i].session == ctx->session) { ctx->sessions[i].connected = TRUE; + SILC_LOG_INFO(("Received CONNECTED from %s (session %d)", + ctx->sessions[i].server_entry->server_name, + ctx->session)); + SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session)); break; } } @@ -819,38 +991,26 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) return; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending ENDING packet to primary")); - - for (i = 0; i < ctx->sessions_count; i++) - if (ctx->sessions[i].server_entry == ctx->sock->user_data) - ctx->session = ctx->sessions[i].session; - - /* We've received all the CONNECTED packets and now we'll send the - ENDING packet to the new primary router. */ - packet = silc_buffer_alloc(2); - silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); - silc_buffer_format(packet, - SILC_STR_UI_CHAR(SILC_SERVER_BACKUP_ENDING), - SILC_STR_UI_CHAR(ctx->session), - SILC_STR_END); - silc_server_packet_send(server, ctx->sock, - SILC_PACKET_RESUME_ROUTER, 0, - packet->data, packet->len, FALSE); - silc_buffer_free(packet); + SILC_LOG_INFO(("All sessions have returned CONNECTED packets, " + "continuing")); + SILC_LOG_DEBUG(("Sending ENDING packet to primary router")); - protocol->state = SILC_PROTOCOL_STATE_END; + /* Send with a timeout */ + silc_schedule_task_add(server->schedule, 0, + silc_server_backup_send_resumed, + protocol, 1, 0, SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); + return; } else { /* Responder */ /* We should have been received ENDING packet */ if (ctx->type != SILC_SERVER_BACKUP_ENDING) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received ENDING packet, sending RESUMED packets")); + SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now")); /* This state is received by the primary router but also servers and perhaps other routers so check that if we are the primary @@ -859,16 +1019,22 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) to next state. */ if (server->router && !(server->router->data.status & SILC_IDLIST_STATUS_DISABLED) && - silc_server_config_is_primary_route(server->config)) { + silc_server_config_is_primary_route(server)) { /* We'll wait for RESUMED packet */ protocol->state = SILC_PROTOCOL_STATE_END; break; } - /* Switch announced informations to our entry instead of using the + /* Switch announced informations to our primary router of using the backup router. */ + silc_server_local_servers_toggle_enabled(server, TRUE); + silc_server_update_servers_by_server(server, ctx->sock->user_data, + server->router); silc_server_update_clients_by_server(server, ctx->sock->user_data, - server->id_entry, TRUE, FALSE); + server->router, TRUE); + if (server->server_type == SILC_SERVER) + silc_server_update_channels_by_server(server, ctx->sock->user_data, + server->router); packet = silc_buffer_alloc(2); silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); @@ -878,16 +1044,55 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) if (silc_idcache_list_first(list, &id_cache)) { while (id_cache) { server_entry = (SilcServerEntry)id_cache->context; - if ((server_entry == server->id_entry) || - !server_entry->connection) { + if (!server_entry || (server_entry == server->id_entry) || + !server_entry->connection || !server_entry->data.send_key) { if (!silc_idcache_list_next(list, &id_cache)) break; else continue; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("RESUMED packet")); + SILC_LOG_DEBUG(("Sending RESUMED to %s", + server_entry->server_name)); + + server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; + + /* This connection is performing this protocol too now */ + ((SilcSocketConnection)server_entry->connection)->protocol = + protocol; + + if (server_entry->server_type == SILC_ROUTER) + packet->data[0] = SILC_SERVER_BACKUP_RESUMED; + else + packet->data[0] = SILC_SERVER_BACKUP_RESUMED_GLOBAL; + silc_server_packet_send(server, server_entry->connection, + SILC_PACKET_RESUME_ROUTER, 0, + packet->data, packet->len, FALSE); + + if (!silc_idcache_list_next(list, &id_cache)) + break; + } + } + + silc_idcache_list_free(list); + } + + if (silc_idcache_get_all(server->global_list->servers, &list)) { + if (silc_idcache_list_first(list, &id_cache)) { + while (id_cache) { + server_entry = (SilcServerEntry)id_cache->context; + if (!server_entry || (server_entry == server->id_entry) || + !server_entry->connection || !server_entry->data.send_key) { + if (!silc_idcache_list_next(list, &id_cache)) + break; + else + continue; + } + + SILC_LOG_DEBUG(("Sending RESUMED to %s", + server_entry->server_name)); + + server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; /* This connection is performing this protocol too now */ ((SilcSocketConnection)server_entry->connection)->protocol = @@ -911,6 +1116,9 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) silc_buffer_free(packet); + SILC_LOG_INFO(("We are now the primary router of our cell again")); + server->wait_backup = FALSE; + /* For us this is the end of this protocol. */ if (protocol->final_callback) silc_protocol_execute_final(protocol, server->schedule); @@ -921,75 +1129,68 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) case SILC_PROTOCOL_STATE_END: { - SilcIDListData idata; - SilcServerEntry primary; - SilcServerEntry backup_router; + SilcServerEntry router, backup_router; /* We should have been received RESUMED packet from our primary router. */ if (ctx->type != SILC_SERVER_BACKUP_RESUMED && ctx->type != SILC_SERVER_BACKUP_RESUMED_GLOBAL) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received RESUMED packet")); + SILC_LOG_INFO(("Received RESUMED from new primary router")); - /* We have now new primary router. All traffic goes there from now on. */ if (server->backup_router) server->server_type = SILC_BACKUP_ROUTER; - primary = (SilcServerEntry)ctx->sock->user_data; - if (silc_server_backup_replaced_get(server, primary->id, + /* We have now new primary router. All traffic goes there from now on. */ + router = (SilcServerEntry)ctx->sock->user_data; + if (silc_server_backup_replaced_get(server, router->id, &backup_router)) { if (backup_router == server->router) { - server->id_entry->router = ctx->sock->user_data; - server->router = ctx->sock->user_data; + /* We have new primary router now */ + server->id_entry->router = router; + server->router = router; SILC_LOG_INFO(("Switching back to primary router %s", server->router->server_name)); - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Switching back to primary router %s", - server->router->server_name)); - idata = (SilcIDListData)server->router; - idata->status &= ~SILC_IDLIST_STATUS_DISABLED; - - /* Update the client entries of the backup router to the new - primary router. */ - silc_server_update_clients_by_server(server, backup_router, - primary, TRUE, FALSE); - silc_server_backup_replaced_del(server, backup_router); - silc_server_backup_add(server, backup_router, - backup_router->server_type != SILC_ROUTER ? - TRUE : FALSE); + } else { + /* We are connected to new primary and now continue using it */ + SILC_LOG_INFO(("Resuming the use of primary router %s", + router->server_name)); } - - /* Announce all of our information to the new primary router. We - announce all that was updated after the protocol was started since - the router knows all the older stuff. */ + server->backup_primary = FALSE; + + /* Update the client entries of the backup router to the new + router */ + silc_server_local_servers_toggle_enabled(server, FALSE); + router->data.status &= ~SILC_IDLIST_STATUS_DISABLED; + silc_server_update_servers_by_server(server, backup_router, router); + silc_server_update_clients_by_server(server, NULL, router, FALSE); + if (server->server_type == SILC_SERVER) + silc_server_update_channels_by_server(server, backup_router, router); + silc_server_backup_replaced_del(server, backup_router); + + /* Announce all of our information to the router. */ if (server->server_type == SILC_ROUTER) - silc_server_announce_servers(server, FALSE, 0, - server->router->connection); - - /* Announce our clients and channels to the router */ - silc_server_announce_clients(server, 0, - server->router->connection); - silc_server_announce_channels(server, 0, - server->router->connection); -#if 0 - if (server->server_type == SILC_ROUTER) - silc_server_announce_servers(server, FALSE, ctx->start - 60, - server->router->connection); - + silc_server_announce_servers(server, FALSE, ctx->start, + router->connection); + /* Announce our clients and channels to the router */ - silc_server_announce_clients(server, ctx->start - 60, - server->router->connection); - silc_server_announce_channels(server, ctx->start - 60, - server->router->connection); -#endif + silc_server_announce_clients(server, ctx->start, + router->connection); + silc_server_announce_channels(server, ctx->start, + router->connection); } + /* Send notify about primary router going down to local operators */ + SILC_SERVER_SEND_OPERS(server, FALSE, TRUE, + SILC_NOTIFY_TYPE_NONE, + ("%s resumed the use of primary router %s", + server->server_name, + server->router->server_name)); + /* Protocol has ended, call the final callback */ if (protocol->final_callback) silc_protocol_execute_final(protocol, server->schedule); @@ -1008,6 +1209,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) case SILC_PROTOCOL_STATE_FAILURE: /* Protocol has ended, call the final callback */ + SILC_LOG_ERROR(("Error during backup resume: received Failure")); if (protocol->final_callback) silc_protocol_execute_final(protocol, server->schedule); else @@ -1029,22 +1231,95 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done) SilcIDCacheList list; SilcIDCacheEntry id_cache; - SILC_LOG_DEBUG(("Start")); + silc_schedule_task_del_by_context(server->schedule, protocol); if (protocol->state == SILC_PROTOCOL_STATE_ERROR || protocol->state == SILC_PROTOCOL_STATE_FAILURE) { SILC_LOG_ERROR(("Error occurred during backup router resuming protcool")); } + if (server->server_shutdown) + return; + + /* Remove this protocol from all server entries that has it */ if (silc_idcache_get_all(server->local_list->servers, &list)) { if (silc_idcache_list_first(list, &id_cache)) { while (id_cache) { server_entry = (SilcServerEntry)id_cache->context; sock = (SilcSocketConnection)server_entry->connection; - if (sock->protocol && sock->protocol->protocol && - sock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) + if (sock->protocol == protocol) { + sock->protocol = NULL; + + /* Backup closes connection and reconnects if error occurred */ + if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) { + if (protocol->state == SILC_PROTOCOL_STATE_ERROR || + protocol->state == SILC_PROTOCOL_STATE_FAILURE) { + server->backup_noswitch = TRUE; + server->server_type = SILC_BACKUP_ROUTER; + + if (sock->user_data) + silc_server_free_sock_user_data(server, sock, NULL); + silc_server_close_connection(server, sock); + + silc_schedule_task_add(server->schedule, 0, + silc_server_connect_to_router, + server, 1, 0, + SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); + + if (!silc_idcache_list_next(list, &id_cache)) + break; + continue; + } + } + + if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED) + server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; + } + + if (!silc_idcache_list_next(list, &id_cache)) + break; + } + } + silc_idcache_list_free(list); + } + + if (silc_idcache_get_all(server->global_list->servers, &list)) { + if (silc_idcache_list_first(list, &id_cache)) { + while (id_cache) { + server_entry = (SilcServerEntry)id_cache->context; + sock = (SilcSocketConnection)server_entry->connection; + + if (sock->protocol == protocol) { sock->protocol = NULL; + + /* Backup closes connection and reconnects if error occurred */ + if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) { + if (protocol->state == SILC_PROTOCOL_STATE_ERROR || + protocol->state == SILC_PROTOCOL_STATE_FAILURE) { + server->backup_noswitch = TRUE; + server->server_type = SILC_BACKUP_ROUTER; + + if (sock->user_data) + silc_server_free_sock_user_data(server, sock, NULL); + silc_server_close_connection(server, sock); + + silc_schedule_task_add(server->schedule, 0, + silc_server_connect_to_router, + server, 1, 0, + SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); + + if (!silc_idcache_list_next(list, &id_cache)) + break; + continue; + } + } + + if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED) + server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; + } if (!silc_idcache_list_next(list, &id_cache)) break; @@ -1053,6 +1328,10 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done) silc_idcache_list_free(list); } + if (protocol->state != SILC_PROTOCOL_STATE_ERROR && + protocol->state != SILC_PROTOCOL_STATE_FAILURE) + SILC_LOG_INFO(("Backup resuming protocol ended successfully")); + if (ctx->sock->protocol) ctx->sock->protocol = NULL; silc_protocol_free(protocol);