X-Git-Url: http://git.silcnet.org/gitweb/?p=silc.git;a=blobdiff_plain;f=apps%2Fsilcd%2Fserver_backup.c;h=78634fe2e02fcc35a8b58da88df56808590256ce;hp=2088dd9a0c8a93f6d8dde710fc208effb03c1cbb;hb=382d15d447b7a95390decfa783836ae4fe255b3d;hpb=1e4e7d57f414a337e084df4072a2690f0c9b71c6 diff --git a/apps/silcd/server_backup.c b/apps/silcd/server_backup.c index 2088dd9a..78634fe2 100644 --- a/apps/silcd/server_backup.c +++ b/apps/silcd/server_backup.c @@ -4,7 +4,7 @@ Author: Pekka Riikonen - Copyright (C) 2001 Pekka Riikonen + Copyright (C) 2001 - 2002 Pekka Riikonen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,12 +22,15 @@ #include "server_internal.h" SILC_TASK_CALLBACK(silc_server_protocol_backup_done); +static void silc_server_backup_connect_primary(SilcServer server, + SilcServerEntry server_entry, + void *context); /* Backup router */ typedef struct { SilcServerEntry server; SilcIDIP ip; - uint16 port; + SilcUInt16 port; bool local; } SilcServerBackupEntry; @@ -35,20 +38,20 @@ typedef struct { by backup router. */ typedef struct { SilcIDIP ip; - uint16 port; + SilcUInt16 port; SilcServerEntry server; /* Backup router that replaced the primary */ } SilcServerBackupReplaced; /* Backup context */ struct SilcServerBackupStruct { SilcServerBackupEntry *servers; - uint32 servers_count; + SilcUInt32 servers_count; SilcServerBackupReplaced **replaced; - uint32 replaced_count; + SilcUInt32 replaced_count; }; typedef struct { - uint8 session; + SilcUInt8 session; bool connected; SilcServerEntry server_entry; } SilcServerBackupProtocolSession; @@ -58,10 +61,10 @@ typedef struct { SilcServer server; SilcSocketConnection sock; bool responder; - uint8 type; - uint8 session; + SilcUInt8 type; + SilcUInt8 session; SilcServerBackupProtocolSession *sessions; - uint32 sessions_count; + SilcUInt32 sessions_count; long start; } *SilcServerBackupProtocolContext; @@ -76,23 +79,28 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server, { int i; - SILC_LOG_DEBUG(("Start")); - if (!ip) return; - if (!server->backup) + if (!server->backup) { server->backup = silc_calloc(1, sizeof(*server->backup)); + if (!server->backup) + return; + } + + SILC_LOG_DEBUG(("Backup router %s will replace %s", + ((SilcSocketConnection)backup_server->connection)->ip, + ip, port)); for (i = 0; i < server->backup->servers_count; i++) { if (!server->backup->servers[i].server) { server->backup->servers[i].server = backup_server; server->backup->servers[i].local = local; + server->backup->servers[i].port = htons(port); memset(server->backup->servers[i].ip.data, 0, sizeof(server->backup->servers[i].ip.data)); - silc_net_addr2bin_ne(ip, server->backup->servers[i].ip.data, - sizeof(server->backup->servers[i].ip.data)); - //server->backup->servers[i].port = port; + silc_net_addr2bin(ip, server->backup->servers[i].ip.data, + sizeof(server->backup->servers[i].ip.data)); return; } } @@ -103,11 +111,11 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server, (i + 1)); server->backup->servers[i].server = backup_server; server->backup->servers[i].local = local; + server->backup->servers[i].port = htons(port); memset(server->backup->servers[i].ip.data, 0, sizeof(server->backup->servers[i].ip.data)); - silc_net_addr2bin_ne(ip, server->backup->servers[i].ip.data, - sizeof(server->backup->servers[i].ip.data)); - //server->backup->servers[i].port = server_id->port; + silc_net_addr2bin(ip, server->backup->servers[i].ip.data, + sizeof(server->backup->servers[i].ip.data)); server->backup->servers_count++; } @@ -119,41 +127,67 @@ SilcServerEntry silc_server_backup_get(SilcServer server, { int i; - SILC_LOG_DEBUG(("Start")); - if (!server->backup) return NULL; for (i = 0; i < server->backup->servers_count; i++) { - SILC_LOG_HEXDUMP(("IP"), server_id->ip.data, 16); - SILC_LOG_HEXDUMP(("IP"), server->backup->servers[i].ip.data, 16); if (server->backup->servers[i].server && - !memcmp(&server->backup->servers[i].ip, &server_id->ip.data, - sizeof(server_id->ip.data))) + server->backup->servers[i].port == server_id->port && + !memcmp(server->backup->servers[i].ip.data, server_id->ip.data, + sizeof(server_id->ip.data))) { + SILC_LOG_DEBUG(("Found backup router %s for %s", + server->backup->servers[i].server->server_name, + silc_id_render(server_id, SILC_ID_SERVER))); return server->backup->servers[i].server; + } } return NULL; } /* Deletes the backup server `server_entry'. */ + void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry) { int i; - SILC_LOG_DEBUG(("Start")); - if (!server->backup) - return ; + return; for (i = 0; i < server->backup->servers_count; i++) { if (server->backup->servers[i].server == server_entry) { + SILC_LOG_DEBUG(("Removing %s as backup router", + silc_id_render(server->backup->servers[i].server->id, + SILC_ID_SERVER))); server->backup->servers[i].server = NULL; - return; + memset(server->backup->servers[i].ip.data, 0, + sizeof(server->backup->servers[i].ip.data)); } } } +/* Frees all data allocated for backup routers. Call this after deleting + all backup routers and when new routers are added no more, for example + when shutting down the server. */ + +void silc_server_backup_free(SilcServer server) +{ + int i; + + if (!server->backup) + return; + + /* Delete existing servers if caller didn't do it */ + for (i = 0; i < server->backup->servers_count; i++) { + if (server->backup->servers[i].server) + silc_server_backup_del(server, server->backup->servers[i].server); + } + + silc_free(server->backup->servers); + silc_free(server->backup); + server->backup = NULL; +} + /* Marks the IP address and port from the `server_id' as being replaced by backup router indicated by the `server'. If the router connects at a later time we can check whether it has been replaced by an backup @@ -166,8 +200,6 @@ void silc_server_backup_replaced_add(SilcServer server, int i; SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));; - SILC_LOG_DEBUG(("Start")); - if (!server->backup) server->backup = silc_calloc(1, sizeof(*server->backup)); if (!server->backup->replaced) { @@ -176,11 +208,11 @@ void silc_server_backup_replaced_add(SilcServer server, server->backup->replaced_count = 1; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Replaced added")); + SILC_LOG_DEBUG(("Replacing router %s with %s", + silc_id_render(server_id, SILC_ID_SERVER), + server_entry->server_name)); memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip)); - //r->port = server_id->port; r->server = server_entry; for (i = 0; i < server->backup->replaced_count; i++) { @@ -209,29 +241,25 @@ bool silc_server_backup_replaced_get(SilcServer server, { int i; - SILC_LOG_DEBUG(("Start")); - - SILC_LOG_DEBUG(("*************************************")); - if (!server->backup || !server->backup->replaced) return FALSE; for (i = 0; i < server->backup->replaced_count; i++) { if (!server->backup->replaced[i]) continue; - SILC_LOG_HEXDUMP(("IP"), server_id->ip.data, server_id->ip.data_len); - SILC_LOG_HEXDUMP(("IP"), server->backup->replaced[i]->ip.data, - server->backup->replaced[i]->ip.data_len); - if (!memcmp(&server->backup->replaced[i]->ip, &server_id->ip.data, + if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data, sizeof(server_id->ip.data))) { if (server_entry) *server_entry = server->backup->replaced[i]->server; - SILC_LOG_DEBUG(("REPLACED")); + SILC_LOG_DEBUG(("Router %s is replaced by %s", + silc_id_render(server_id, SILC_ID_SERVER), + server->backup->replaced[i]->server->server_name)); return TRUE; } } - SILC_LOG_DEBUG(("NOT REPLACED")); + SILC_LOG_DEBUG(("Router %s is not replaced by backup router", + silc_id_render(server_id, SILC_ID_SERVER))); return FALSE; } @@ -242,8 +270,6 @@ void silc_server_backup_replaced_del(SilcServer server, { int i; - SILC_LOG_DEBUG(("Start")); - if (!server->backup || !server->backup->replaced) return; @@ -270,6 +296,7 @@ void silc_server_backup_broadcast(SilcServer server, SilcServerEntry backup; SilcSocketConnection sock; SilcBuffer buffer; + const SilcBufferStruct p; SilcIDListData idata; int i; @@ -287,20 +314,31 @@ void silc_server_backup_broadcast(SilcServer server, if (!backup || backup->connection == sender || server->backup->servers[i].local == FALSE) continue; + if (server->backup->servers[i].server == server->id_entry) + continue; idata = (SilcIDListData)backup; sock = backup->connection; - silc_packet_send_prepare(sock, 0, 0, buffer->len); - silc_buffer_put(sock->outbuf, buffer->data, buffer->len); + if (!silc_packet_send_prepare(sock, 0, 0, buffer->len, idata->hmac_send, + (const SilcBuffer)&p)) { + SILC_LOG_ERROR(("Cannot send packet")); + return; + } + silc_buffer_put((SilcBuffer)&p, buffer->data, buffer->len); silc_packet_encrypt(idata->send_key, idata->hmac_send, idata->psn_send++, - sock->outbuf, sock->outbuf->len); + (SilcBuffer)&p, p.len); - SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", sock->outbuf->len), - sock->outbuf->data, sock->outbuf->len); + SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", p.len), p.data, p.len); /* Now actually send the packet */ silc_server_packet_send_real(server, sock, FALSE); + + /* Check for mandatory rekey */ + if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD) + silc_schedule_task_add(server->schedule, sender->sock, + silc_server_rekey_callback, sender, 0, 1, + SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL); } } @@ -319,7 +357,7 @@ void silc_server_backup_send(SilcServer server, SilcPacketType type, SilcPacketFlags flags, unsigned char *data, - uint32 data_len, + SilcUInt32 data_len, bool force_send, bool local) { @@ -330,20 +368,20 @@ void silc_server_backup_send(SilcServer server, if (!server->backup || server->server_type != SILC_ROUTER) return; - SILC_LOG_DEBUG(("Start")); - for (i = 0; i < server->backup->servers_count; i++) { backup = server->backup->servers[i].server; - if (!backup) - continue; - - if (sender == backup) + if (!backup || sender == backup) continue; - if (local && server->backup->servers[i].local == FALSE) continue; + if (server->backup->servers[i].server == server->id_entry) + continue; sock = backup->connection; + + SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)", + silc_get_packet_name(type), sock->hostname, sock->ip)); + silc_server_packet_send(server, backup->connection, type, flags, data, data_len, force_send); } @@ -361,7 +399,7 @@ void silc_server_backup_send_dest(SilcServer server, void *dst_id, SilcIdType dst_id_type, unsigned char *data, - uint32 data_len, + SilcUInt32 data_len, bool force_send, bool local) { @@ -372,26 +410,37 @@ void silc_server_backup_send_dest(SilcServer server, if (!server->backup || server->server_type != SILC_ROUTER) return; - SILC_LOG_DEBUG(("Start")); - for (i = 0; i < server->backup->servers_count; i++) { backup = server->backup->servers[i].server; - if (!backup) - continue; - - if (sender == backup) + if (!backup || sender == backup) continue; - if (local && server->backup->servers[i].local == FALSE) continue; + if (server->backup->servers[i].server == server->id_entry) + continue; sock = backup->connection; + + SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)", + silc_get_packet_name(type), sock->hostname, sock->ip)); + silc_server_packet_send_dest(server, backup->connection, type, flags, dst_id, dst_id_type, data, data_len, force_send); } } +SILC_TASK_CALLBACK(silc_server_backup_timeout) +{ + SilcProtocol protocol = context; + SilcServer server = app_context; + + SILC_LOG_INFO(("Timeout occurred during backup resuming protocol")); + silc_protocol_cancel(protocol, server->schedule); + protocol->state = SILC_PROTOCOL_STATE_ERROR; + silc_protocol_execute_final(protocol, server->schedule); +} + /* Processes incoming RESUME_ROUTER packet. This can give the packet for processing to the protocol handler or allocate new protocol if start command is received. */ @@ -400,22 +449,24 @@ void silc_server_backup_resume_router(SilcServer server, SilcSocketConnection sock, SilcPacketContext *packet) { - uint8 type, session; + SilcUInt8 type, session; SilcServerBackupProtocolContext ctx; int i, ret; if (sock->type == SILC_SOCKET_TYPE_CLIENT || - sock->type == SILC_SOCKET_TYPE_UNKNOWN) + sock->type == SILC_SOCKET_TYPE_UNKNOWN) { + SILC_LOG_DEBUG(("Bad packet received")); return; - - SILC_LOG_DEBUG(("Start")); + } ret = silc_buffer_unformat(packet->buffer, SILC_STR_UI_CHAR(&type), SILC_STR_UI_CHAR(&session), SILC_STR_END); - if (ret < 0) + if (ret < 0) { + SILC_LOG_ERROR(("Malformed resume router packet received")); return; + } /* Activate the protocol for this socket if necessary */ if ((type == SILC_SERVER_BACKUP_RESUMED || @@ -447,9 +498,6 @@ void silc_server_backup_resume_router(SilcServer server, ctx = sock->protocol->context; ctx->type = type; - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Continuing protocol, type %d", type)); - if (type != SILC_SERVER_BACKUP_RESUMED && type != SILC_SERVER_BACKUP_RESUMED_GLOBAL) { for (i = 0; i < ctx->sessions_count; i++) { @@ -464,7 +512,7 @@ void silc_server_backup_resume_router(SilcServer server, return; } - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", type)); return; } @@ -474,17 +522,14 @@ void silc_server_backup_resume_router(SilcServer server, immediately after we've connected to our primary router. */ if (sock->type == SILC_SOCKET_TYPE_ROUTER && - server->router == sock->user_data && + sock && SILC_PRIMARY_ROUTE(server) == sock && type == SILC_SERVER_BACKUP_REPLACED) { /* We have been replaced by an backup router in our cell. We must mark our primary router connection disabled since we are not allowed to use it at this moment. */ SilcIDListData idata = (SilcIDListData)sock->user_data; - SILC_LOG_INFO(("We are replaced by an backup router in this cell, will " "wait until backup resuming protocol is executed")); - - SILC_LOG_DEBUG(("We are replaced by an backup router in this cell")); idata->status |= SILC_IDLIST_STATUS_DISABLED; return; } @@ -503,12 +548,17 @@ void silc_server_backup_resume_router(SilcServer server, proto_ctx->start = time(0); SILC_LOG_DEBUG(("Starting backup resuming protocol as responder")); + SILC_LOG_INFO(("Starting backup resuming protocol")); /* Run the backup resuming protocol */ silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP, &sock->protocol, proto_ctx, silc_server_protocol_backup_done); silc_protocol_execute(sock->protocol, server->schedule, 0, 0); + silc_schedule_task_add(server->schedule, sock->sock, + silc_server_backup_timeout, + sock->protocol, 30, 0, SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); } } @@ -516,16 +566,18 @@ void silc_server_backup_resume_router(SilcServer server, SILC_TASK_CALLBACK(silc_server_backup_connect_to_router) { + SilcServer server = app_context; SilcServerConnection sconn = (SilcServerConnection)context; - SilcServer server = sconn->server; int sock; + const char *server_ip; SILC_LOG_DEBUG(("Connecting to router %s:%d", sconn->remote_host, sconn->remote_port)); /* Connect to remote host */ - sock = silc_net_create_connection(server->config->listen_port->local_ip, - sconn->remote_port, + server_ip = server->config->server_info->primary == NULL ? NULL : + server->config->server_info->primary->server_ip; + sock = silc_net_create_connection(server_ip, sconn->remote_port, sconn->remote_host); if (sock < 0) { silc_schedule_task_add(server->schedule, 0, @@ -544,24 +596,29 @@ SILC_TASK_CALLBACK(silc_server_backup_connect_to_router) connection is created. */ void silc_server_backup_reconnect(SilcServer server, - const char *ip, uint16 port, + const char *ip, SilcUInt16 port, SilcServerConnectRouterCallback callback, void *context) { SilcServerConnection sconn; + SILC_LOG_INFO(("Attempting to reconnect to primary router")); + sconn = silc_calloc(1, sizeof(*sconn)); - sconn->server = server; sconn->remote_host = strdup(ip); sconn->remote_port = port; sconn->callback = callback; sconn->callback_context = context; + sconn->no_reconnect = TRUE; silc_schedule_task_add(server->schedule, 0, silc_server_backup_connect_to_router, sconn, 1, 0, SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL); } +/* Task that is called after backup router has connected back to + primary router and we are starting the resuming protocol */ + SILC_TASK_CALLBACK(silc_server_backup_connected_later) { SilcServerBackupProtocolContext proto_ctx = @@ -570,12 +627,18 @@ SILC_TASK_CALLBACK(silc_server_backup_connected_later) SilcSocketConnection sock = proto_ctx->sock; SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator")); + SILC_LOG_INFO(("Starting backup resuming protocol")); /* Run the backup resuming protocol */ silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP, &sock->protocol, proto_ctx, silc_server_protocol_backup_done); silc_protocol_execute(sock->protocol, server->schedule, 0, 0); + + silc_schedule_task_add(server->schedule, sock->sock, + silc_server_backup_timeout, + sock->protocol, 30, 0, SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); } /* Called when we've established connection back to our primary router @@ -587,8 +650,21 @@ void silc_server_backup_connected(SilcServer server, void *context) { SilcServerBackupProtocolContext proto_ctx; - SilcSocketConnection sock = (SilcSocketConnection)server_entry->connection; + SilcSocketConnection sock; + + if (!server_entry) { + /* Try again */ + SilcServerConfigRouter *primary; + primary = silc_server_config_get_primary_router(server); + if (primary) + silc_server_backup_reconnect(server, + primary->host, primary->port, + silc_server_backup_connected, + context); + return; + } + sock = (SilcSocketConnection)server_entry->connection; proto_ctx = silc_calloc(1, sizeof(*proto_ctx)); proto_ctx->server = server; proto_ctx->sock = sock; @@ -614,16 +690,30 @@ static void silc_server_backup_connect_primary(SilcServer server, void *context) { SilcSocketConnection backup_router = (SilcSocketConnection)context; - SilcSocketConnection sock = (SilcSocketConnection)server_entry->connection; - SilcIDListData idata = (SilcIDListData)server_entry; - SilcServerBackupProtocolContext ctx = - (SilcServerBackupProtocolContext)backup_router->protocol->context; + SilcServerBackupProtocolContext ctx; + SilcSocketConnection sock; + SilcIDListData idata; SilcBuffer buffer; - SILC_LOG_DEBUG(("Start")); + if (!server_entry) { + /* Try again */ + SilcServerConfigRouter *primary; + primary = silc_server_config_get_primary_router(server); + if (primary) + silc_server_backup_reconnect(server, + primary->host, primary->port, + silc_server_backup_connect_primary, + context); + return; + } + + ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context; + sock = (SilcSocketConnection)server_entry->connection; + idata = (SilcIDListData)server_entry; - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending CONNECTED packet, session %d", ctx->session)); + SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session)); + SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router", + ctx->session)); /* Send the CONNECTED packet back to the backup router. */ buffer = silc_buffer_alloc(2); @@ -650,59 +740,33 @@ static void silc_server_backup_connect_primary(SilcServer server, backup_router->protocol = NULL; } -/* Resume protocol with RESUME_ROUTER packet: - - SILC_PACKET_RESUME_ROUTER: - - - - = the protocol opcode - = Identifier for this packet and any subsequent reply - packets must include this identifier. - - Types: - - 1 = To router: Comensing backup resuming protocol. This will - indicate that the sender is backup router acting as primary - and the receiver is primary router that has been replaced by - the backup router. - - To server. Comensing backup resuming protocol. This will - indicate that the sender is backup router and the receiver - must reconnect to the real primary router of the cell. - - 2 = To Router: Comesning backup resuming protocol in another - cell. The receiver will connect to its primary router - (the router that is now online again) but will not use - the link. If the receiver is not configured to connect - to any router it does as locally configured. The sender - is always backup router. - - To server: this is never sent to server. - - 3 = To backup router: Sender is normal server or router and it - tells to backup router that they have connected to the - primary router. Backup router never sends this type. - - 4 = To router: Ending backup resuming protocol. This is sent - to the real primary router to tell that it can take over - the task as being primary router. - - To server: same as sending for router. - - Backup router sends this also to the primary route but only - after it has sent them to normal servers and has purged all - traffic coming from normal servers. - - 5 = To router: Sender is the real primary router after it has - received type 4 from backup router. To tell that it is again - primary router of the cell. - - 20 = To router: This is sent only when router is connecting to - another router and has been replaced by an backup router. - The sender knows that the connectee has been replaced. +SILC_TASK_CALLBACK(silc_server_backup_send_resumed) +{ + SilcProtocol protocol = (SilcProtocol)context; + SilcServerBackupProtocolContext ctx = protocol->context; + SilcServer server = ctx->server; + SilcBuffer packet; + int i; - */ + for (i = 0; i < ctx->sessions_count; i++) + if (ctx->sessions[i].server_entry == ctx->sock->user_data) + ctx->session = ctx->sessions[i].session; + + /* We've received all the CONNECTED packets and now we'll send the + ENDING packet to the new primary router. */ + packet = silc_buffer_alloc(2); + silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); + silc_buffer_format(packet, + SILC_STR_UI_CHAR(SILC_SERVER_BACKUP_ENDING), + SILC_STR_UI_CHAR(ctx->session), + SILC_STR_END); + silc_server_packet_send(server, ctx->sock, + SILC_PACKET_RESUME_ROUTER, 0, + packet->data, packet->len, FALSE); + silc_buffer_free(packet); + + protocol->state = SILC_PROTOCOL_STATE_END; +} /* Backup resuming protocol. This protocol is executed when the primary router wants to resume its position as being primary router. */ @@ -718,13 +782,9 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) SilcServerEntry server_entry; int i; - SILC_LOG_DEBUG(("Start")); - if (protocol->state == SILC_PROTOCOL_STATE_UNKNOWN) protocol->state = SILC_PROTOCOL_STATE_START; - SILC_LOG_DEBUG(("State=%d", protocol->state)); - switch(protocol->state) { case SILC_PROTOCOL_STATE_START: if (ctx->responder == FALSE) { @@ -733,9 +793,6 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) packet = silc_buffer_alloc(2); silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending START packets")); - /* Send the START packet to primary router and normal servers. */ if (silc_idcache_get_all(server->local_list->servers, &list)) { if (silc_idcache_list_first(list, &id_cache)) { @@ -757,9 +814,10 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) ctx->sessions[ctx->sessions_count].connected = FALSE; ctx->sessions[ctx->sessions_count].server_entry = server_entry; - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("START (local) for session %d", - ctx->sessions_count)); + SILC_LOG_DEBUG(("Sending START to %s (session %d)", + server_entry->server_name, ctx->sessions_count)); + SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)", + server_entry->server_name, ctx->sessions_count)); /* This connection is performing this protocol too now */ ((SilcSocketConnection)server_entry->connection)->protocol = @@ -803,9 +861,10 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) ctx->sessions[ctx->sessions_count].connected = FALSE; ctx->sessions[ctx->sessions_count].server_entry = server_entry; - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("START (global) for session %d", - ctx->sessions_count)); + SILC_LOG_DEBUG(("Sending START to %s (session %d)", + server_entry->server_name, ctx->sessions_count)); + SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)", + server_entry->server_name, ctx->sessions_count)); /* This connection is performing this protocol too now */ ((SilcSocketConnection)server_entry->connection)->protocol = @@ -831,41 +890,53 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) silc_buffer_free(packet); - /* Announce all of our information */ - silc_server_announce_servers(server, TRUE, 0, ctx->sock); - silc_server_announce_clients(server, 0, ctx->sock); - silc_server_announce_channels(server, 0, ctx->sock); + /* If we are not standalone and our primary is not the one we've + talking to now, then announce our information to it since we + haven't done that yet. Standalone backup router announces + these during connecting to the primary. */ + if (!server->standalone && SILC_PRIMARY_ROUTE(server) != ctx->sock) { + silc_server_announce_servers(server, TRUE, 0, ctx->sock); + silc_server_announce_clients(server, 0, ctx->sock); + silc_server_announce_channels(server, 0, ctx->sock); + } protocol->state++; } else { /* Responder of the protocol. */ - SilcServerConfigSectionServerConnection *primary; + SilcServerConfigRouter *primary; /* We should have received START or START_GLOBAL packet */ if (ctx->type != SILC_SERVER_BACKUP_START && ctx->type != SILC_SERVER_BACKUP_START_GLOBAL) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received START packet, reconnecting to router")); - /* Connect to the primary router that was down that is now supposed to be back online. We send the CONNECTED packet after we've established the connection to the primary router. */ - primary = silc_server_config_get_primary_router(server->config); - if (primary && server->backup_primary) { + primary = silc_server_config_get_primary_router(server); + if (primary && server->backup_primary && + !silc_server_num_sockets_by_remote(server, + silc_net_is_ip(primary->host) ? + primary->host : NULL, + silc_net_is_ip(primary->host) ? + NULL : primary->host, + primary->port, + SILC_SOCKET_TYPE_ROUTER)) { + SILC_LOG_DEBUG(("Received START (session %d), reconnect to router", + ctx->session)); silc_server_backup_reconnect(server, primary->host, primary->port, silc_server_backup_connect_primary, ctx->sock); } else { /* Nowhere to connect just return the CONNECTED packet */ + SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back", + ctx->session)); + SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router", + ctx->session)); - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending CONNECTED packet, session %d", ctx->session)); - /* Send the CONNECTED packet back to the backup router. */ packet = silc_buffer_alloc(2); silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); @@ -900,16 +971,17 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) /* We should have received CONNECTED packet */ if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received CONNECTED packet, session %d", ctx->session)); - for (i = 0; i < ctx->sessions_count; i++) { if (ctx->sessions[i].session == ctx->session) { ctx->sessions[i].connected = TRUE; + SILC_LOG_INFO(("Received CONNECTED from %s (session %d)", + ctx->sessions[i].server_entry->server_name, + ctx->session)); + SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session)); break; } } @@ -919,38 +991,26 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) return; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Sending ENDING packet to primary")); - - for (i = 0; i < ctx->sessions_count; i++) - if (ctx->sessions[i].server_entry == ctx->sock->user_data) - ctx->session = ctx->sessions[i].session; + SILC_LOG_INFO(("All sessions have returned CONNECTED packets, " + "continuing")); + SILC_LOG_DEBUG(("Sending ENDING packet to primary router")); - /* We've received all the CONNECTED packets and now we'll send the - ENDING packet to the new primary router. */ - packet = silc_buffer_alloc(2); - silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); - silc_buffer_format(packet, - SILC_STR_UI_CHAR(SILC_SERVER_BACKUP_ENDING), - SILC_STR_UI_CHAR(ctx->session), - SILC_STR_END); - silc_server_packet_send(server, ctx->sock, - SILC_PACKET_RESUME_ROUTER, 0, - packet->data, packet->len, FALSE); - silc_buffer_free(packet); - - protocol->state = SILC_PROTOCOL_STATE_END; + /* Send with a timeout */ + silc_schedule_task_add(server->schedule, 0, + silc_server_backup_send_resumed, + protocol, 1, 0, SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); + return; } else { /* Responder */ /* We should have been received ENDING packet */ if (ctx->type != SILC_SERVER_BACKUP_ENDING) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received ENDING packet, sending RESUMED packets")); + SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now")); /* This state is received by the primary router but also servers and perhaps other routers so check that if we are the primary @@ -959,7 +1019,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) to next state. */ if (server->router && !(server->router->data.status & SILC_IDLIST_STATUS_DISABLED) && - silc_server_config_is_primary_route(server->config)) { + silc_server_config_is_primary_route(server)) { /* We'll wait for RESUMED packet */ protocol->state = SILC_PROTOCOL_STATE_END; break; @@ -967,8 +1027,14 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) /* Switch announced informations to our primary router of using the backup router. */ + silc_server_local_servers_toggle_enabled(server, TRUE); + silc_server_update_servers_by_server(server, ctx->sock->user_data, + server->router); silc_server_update_clients_by_server(server, ctx->sock->user_data, - server->router, TRUE, FALSE); + server->router, TRUE); + if (server->server_type == SILC_SERVER) + silc_server_update_channels_by_server(server, ctx->sock->user_data, + server->router); packet = silc_buffer_alloc(2); silc_buffer_pull_tail(packet, SILC_BUFFER_END(packet)); @@ -986,8 +1052,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) continue; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("RESUMED packet (local)")); + SILC_LOG_DEBUG(("Sending RESUMED to %s", + server_entry->server_name)); server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; @@ -1023,8 +1089,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) continue; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("RESUMED packet (global)")); + SILC_LOG_DEBUG(("Sending RESUMED to %s", + server_entry->server_name)); server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; @@ -1051,6 +1117,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) silc_buffer_free(packet); SILC_LOG_INFO(("We are now the primary router of our cell again")); + server->wait_backup = FALSE; /* For us this is the end of this protocol. */ if (protocol->final_callback) @@ -1062,66 +1129,68 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) case SILC_PROTOCOL_STATE_END: { - SilcIDListData idata; SilcServerEntry router, backup_router; /* We should have been received RESUMED packet from our primary router. */ if (ctx->type != SILC_SERVER_BACKUP_RESUMED && ctx->type != SILC_SERVER_BACKUP_RESUMED_GLOBAL) { - SILC_LOG_DEBUG(("Bad resume router packet")); + SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type)); break; } - SILC_LOG_DEBUG(("********************************")); - SILC_LOG_DEBUG(("Received RESUMED packet")); + SILC_LOG_INFO(("Received RESUMED from new primary router")); - /* We have now new primary router. All traffic goes there from now on. */ if (server->backup_router) server->server_type = SILC_BACKUP_ROUTER; + /* We have now new primary router. All traffic goes there from now on. */ router = (SilcServerEntry)ctx->sock->user_data; if (silc_server_backup_replaced_get(server, router->id, &backup_router)) { if (backup_router == server->router) { + /* We have new primary router now */ server->id_entry->router = router; server->router = router; SILC_LOG_INFO(("Switching back to primary router %s", server->router->server_name)); - SILC_LOG_DEBUG(("Switching back to primary router %s", - server->router->server_name)); - idata = (SilcIDListData)server->router; - idata->status &= ~SILC_IDLIST_STATUS_DISABLED; } else { - SILC_LOG_INFO(("Resuming the use of router %s", + /* We are connected to new primary and now continue using it */ + SILC_LOG_INFO(("Resuming the use of primary router %s", router->server_name)); - SILC_LOG_DEBUG(("Resuming the use of router %s", - router->server_name)); - idata = (SilcIDListData)router; - idata->status &= ~SILC_IDLIST_STATUS_DISABLED; } + server->backup_primary = FALSE; /* Update the client entries of the backup router to the new router */ - silc_server_update_clients_by_server(server, backup_router, - router, TRUE, FALSE); + silc_server_local_servers_toggle_enabled(server, FALSE); + router->data.status &= ~SILC_IDLIST_STATUS_DISABLED; silc_server_update_servers_by_server(server, backup_router, router); - silc_server_backup_replaced_del(server, backup_router); - silc_server_backup_add(server, backup_router, - ctx->sock->ip, ctx->sock->port, - backup_router->server_type != SILC_ROUTER ? - TRUE : FALSE); + silc_server_update_clients_by_server(server, NULL, router, FALSE); + if (server->server_type == SILC_SERVER) + silc_server_update_channels_by_server(server, backup_router, router); + silc_server_backup_replaced_del(server, backup_router); /* Announce all of our information to the router. */ if (server->server_type == SILC_ROUTER) - silc_server_announce_servers(server, FALSE, 0, router->connection); + silc_server_announce_servers(server, FALSE, ctx->start, + router->connection); /* Announce our clients and channels to the router */ - silc_server_announce_clients(server, 0, router->connection); - silc_server_announce_channels(server, 0, router->connection); + silc_server_announce_clients(server, ctx->start, + router->connection); + silc_server_announce_channels(server, ctx->start, + router->connection); } + /* Send notify about primary router going down to local operators */ + SILC_SERVER_SEND_OPERS(server, FALSE, TRUE, + SILC_NOTIFY_TYPE_NONE, + ("%s resumed the use of primary router %s", + server->server_name, + server->router->server_name)); + /* Protocol has ended, call the final callback */ if (protocol->final_callback) silc_protocol_execute_final(protocol, server->schedule); @@ -1140,6 +1209,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup) case SILC_PROTOCOL_STATE_FAILURE: /* Protocol has ended, call the final callback */ + SILC_LOG_ERROR(("Error during backup resume: received Failure")); if (protocol->final_callback) silc_protocol_execute_final(protocol, server->schedule); else @@ -1161,13 +1231,16 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done) SilcIDCacheList list; SilcIDCacheEntry id_cache; - SILC_LOG_DEBUG(("Start")); + silc_schedule_task_del_by_context(server->schedule, protocol); if (protocol->state == SILC_PROTOCOL_STATE_ERROR || protocol->state == SILC_PROTOCOL_STATE_FAILURE) { SILC_LOG_ERROR(("Error occurred during backup router resuming protcool")); } + if (server->server_shutdown) + return; + /* Remove this protocol from all server entries that has it */ if (silc_idcache_get_all(server->local_list->servers, &list)) { if (silc_idcache_list_first(list, &id_cache)) { @@ -1178,6 +1251,29 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done) if (sock->protocol == protocol) { sock->protocol = NULL; + /* Backup closes connection and reconnects if error occurred */ + if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) { + if (protocol->state == SILC_PROTOCOL_STATE_ERROR || + protocol->state == SILC_PROTOCOL_STATE_FAILURE) { + server->backup_noswitch = TRUE; + server->server_type = SILC_BACKUP_ROUTER; + + if (sock->user_data) + silc_server_free_sock_user_data(server, sock, NULL); + silc_server_close_connection(server, sock); + + silc_schedule_task_add(server->schedule, 0, + silc_server_connect_to_router, + server, 1, 0, + SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); + + if (!silc_idcache_list_next(list, &id_cache)) + break; + continue; + } + } + if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED) server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; } @@ -1198,6 +1294,29 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done) if (sock->protocol == protocol) { sock->protocol = NULL; + /* Backup closes connection and reconnects if error occurred */ + if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) { + if (protocol->state == SILC_PROTOCOL_STATE_ERROR || + protocol->state == SILC_PROTOCOL_STATE_FAILURE) { + server->backup_noswitch = TRUE; + server->server_type = SILC_BACKUP_ROUTER; + + if (sock->user_data) + silc_server_free_sock_user_data(server, sock, NULL); + silc_server_close_connection(server, sock); + + silc_schedule_task_add(server->schedule, 0, + silc_server_connect_to_router, + server, 1, 0, + SILC_TASK_TIMEOUT, + SILC_TASK_PRI_NORMAL); + + if (!silc_idcache_list_next(list, &id_cache)) + break; + continue; + } + } + if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED) server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED; } @@ -1209,6 +1328,10 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done) silc_idcache_list_free(list); } + if (protocol->state != SILC_PROTOCOL_STATE_ERROR && + protocol->state != SILC_PROTOCOL_STATE_FAILURE) + SILC_LOG_INFO(("Backup resuming protocol ended successfully")); + if (ctx->sock->protocol) ctx->sock->protocol = NULL; silc_protocol_free(protocol);