Fixed a bug in backup server IP comparison in silc_server_backup_get.
[silc.git] / apps / silcd / server_backup.c
index 1383e6ceb201dc2e78409ed015965a1cb0e0236a..12479fff6c49866e925aaa14e9fe9dfc46ec3438 100644 (file)
@@ -22,6 +22,9 @@
 #include "server_internal.h"
 
 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
+static void silc_server_backup_connect_primary(SilcServer server,
+                                              SilcServerEntry server_entry,
+                                              void *context);
 
 /* Backup router */
 typedef struct {
@@ -93,6 +96,7 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
     if (!server->backup->servers[i].server) {
       server->backup->servers[i].server = backup_server;
       server->backup->servers[i].local = local;
+      server->backup->servers[i].port = htons(port);
       memset(server->backup->servers[i].ip.data, 0,
             sizeof(server->backup->servers[i].ip.data));
       silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
@@ -107,6 +111,7 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
                                         (i + 1));
   server->backup->servers[i].server = backup_server;
   server->backup->servers[i].local = local;
+  server->backup->servers[i].port = htons(port);
   memset(server->backup->servers[i].ip.data, 0,
         sizeof(server->backup->servers[i].ip.data));
   silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
@@ -127,7 +132,8 @@ SilcServerEntry silc_server_backup_get(SilcServer server,
 
   for (i = 0; i < server->backup->servers_count; i++) {
     if (server->backup->servers[i].server &&
-       !memcmp(&server->backup->servers[i].ip, &server_id->ip.data,
+       server->backup->servers[i].port == server_id->port &&
+       !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
                sizeof(server_id->ip.data))) {
       SILC_LOG_DEBUG(("Found backup router %s for %s",
                      server->backup->servers[i].server->server_name,
@@ -146,7 +152,7 @@ void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
   int i;
 
   if (!server->backup)
-    return ;
+    return;
 
   for (i = 0; i < server->backup->servers_count; i++) {
     if (server->backup->servers[i].server == server_entry) {
@@ -160,6 +166,28 @@ void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
   }
 }
 
+/* Frees all data allocated for backup routers.  Call this after deleting
+   all backup routers and when new routers are added no more, for example
+   when shutting down the server. */
+
+void silc_server_backup_free(SilcServer server)
+{
+  int i;
+
+  if (!server->backup)
+    return;
+
+  /* Delete existing servers if caller didn't do it */
+  for (i = 0; i < server->backup->servers_count; i++) {
+    if (server->backup->servers[i].server)
+      silc_server_backup_del(server, server->backup->servers[i].server);
+  }
+
+  silc_free(server->backup->servers);
+  silc_free(server->backup);
+  server->backup = NULL;
+}
+
 /* Marks the IP address and port from the `server_id' as  being replaced
    by backup router indicated by the `server'. If the router connects at
    a later time we can check whether it has been replaced by an backup
@@ -180,7 +208,7 @@ void silc_server_backup_replaced_add(SilcServer server,
     server->backup->replaced_count = 1;
   }
 
-  SILC_LOG_DEBUG(("Replacing router %s with %s backup",
+  SILC_LOG_DEBUG(("Replacing router %s with %s",
                  silc_id_render(server_id, SILC_ID_SERVER),
                  server_entry->server_name));
 
@@ -219,7 +247,7 @@ bool silc_server_backup_replaced_get(SilcServer server,
   for (i = 0; i < server->backup->replaced_count; i++) {
     if (!server->backup->replaced[i])
       continue;
-    if (!memcmp(&server->backup->replaced[i]->ip, &server_id->ip.data,
+    if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
                sizeof(server_id->ip.data))) {
       if (server_entry)
        *server_entry = server->backup->replaced[i]->server;
@@ -336,12 +364,8 @@ void silc_server_backup_send(SilcServer server,
 
   for (i = 0; i < server->backup->servers_count; i++) {
     backup = server->backup->servers[i].server;
-    if (!backup)
+    if (!backup || sender == backup)
       continue;
-
-    if (sender == backup)
-      continue;
-
     if (local && server->backup->servers[i].local == FALSE)
       continue;
     if (server->backup->servers[i].server == server->id_entry)
@@ -382,12 +406,8 @@ void silc_server_backup_send_dest(SilcServer server,
 
   for (i = 0; i < server->backup->servers_count; i++) {
     backup = server->backup->servers[i].server;
-    if (!backup)
-      continue;
-
-    if (sender == backup)
+    if (!backup || sender == backup)
       continue;
-
     if (local && server->backup->servers[i].local == FALSE)
       continue;
     if (server->backup->servers[i].server == server->id_entry)
@@ -427,7 +447,7 @@ void silc_server_backup_resume_router(SilcServer server,
                             SILC_STR_UI_CHAR(&session),
                             SILC_STR_END);
   if (ret < 0) {
-    SILC_LOG_DEBUG(("Malformed packet received"));
+    SILC_LOG_ERROR(("Malformed resume router packet received"));
     return;
   }
   
@@ -475,7 +495,7 @@ void silc_server_backup_resume_router(SilcServer server,
       return;
     }
 
-    SILC_LOG_DEBUG(("Bad resume router packet"));
+    SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", type));
     return;
   }
 
@@ -511,6 +531,7 @@ void silc_server_backup_resume_router(SilcServer server,
     proto_ctx->start = time(0);
 
     SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
+    SILC_LOG_INFO(("Starting backup resuming protocol"));
 
     /* Run the backup resuming protocol */
     silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
@@ -524,8 +545,8 @@ void silc_server_backup_resume_router(SilcServer server,
 
 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
 {
+  SilcServer server = app_context;
   SilcServerConnection sconn = (SilcServerConnection)context;
-  SilcServer server = sconn->server;
   int sock;
   const char *server_ip;
 
@@ -534,7 +555,7 @@ SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
 
   /* Connect to remote host */
   server_ip = server->config->server_info->primary == NULL ? NULL :
-               server->config->server_info->primary->server_ip;
+    server->config->server_info->primary->server_ip;
   sock = silc_net_create_connection(server_ip, sconn->remote_port,
                                    sconn->remote_host);
   if (sock < 0) {
@@ -560,12 +581,14 @@ void silc_server_backup_reconnect(SilcServer server,
 {
   SilcServerConnection sconn;
 
+  SILC_LOG_INFO(("Attempting to reconnect to primary router"));
+
   sconn = silc_calloc(1, sizeof(*sconn));
-  sconn->server = server;
   sconn->remote_host = strdup(ip);
   sconn->remote_port = port;
   sconn->callback = callback;
   sconn->callback_context = context;
+  sconn->no_reconnect = TRUE;
   silc_schedule_task_add(server->schedule, 0, 
                         silc_server_backup_connect_to_router,
                         sconn, 1, 0, SILC_TASK_TIMEOUT,
@@ -583,6 +606,7 @@ SILC_TASK_CALLBACK(silc_server_backup_connected_later)
   SilcSocketConnection sock = proto_ctx->sock;
 
   SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
+  SILC_LOG_INFO(("Starting backup resuming protocol"));
 
   /* Run the backup resuming protocol */
   silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
@@ -600,8 +624,21 @@ void silc_server_backup_connected(SilcServer server,
                                  void *context)
 {
   SilcServerBackupProtocolContext proto_ctx;
-  SilcSocketConnection sock = (SilcSocketConnection)server_entry->connection;
+  SilcSocketConnection sock;
+
+  if (!server_entry) {
+    /* Try again */
+    SilcServerConfigRouter *primary;
+    primary = silc_server_config_get_primary_router(server);
+    if (primary)
+      silc_server_backup_reconnect(server,
+                                  primary->host, primary->port,
+                                  silc_server_backup_connected,
+                                  context);
+    return;
+  }
 
+  sock = (SilcSocketConnection)server_entry->connection;
   proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
   proto_ctx->server = server;
   proto_ctx->sock = sock;
@@ -627,12 +664,27 @@ static void silc_server_backup_connect_primary(SilcServer server,
                                               void *context)
 {
   SilcSocketConnection backup_router = (SilcSocketConnection)context;
-  SilcSocketConnection sock = (SilcSocketConnection)server_entry->connection;
-  SilcIDListData idata = (SilcIDListData)server_entry;
-  SilcServerBackupProtocolContext ctx = 
-    (SilcServerBackupProtocolContext)backup_router->protocol->context;
+  SilcServerBackupProtocolContext ctx;
+  SilcSocketConnection sock;
+  SilcIDListData idata;
   SilcBuffer buffer;
 
+  if (!server_entry) {
+    /* Try again */
+    SilcServerConfigRouter *primary;
+    primary = silc_server_config_get_primary_router(server);
+    if (primary)
+      silc_server_backup_reconnect(server,
+                                  primary->host, primary->port,
+                                  silc_server_backup_connect_primary,
+                                  context);
+    return;
+  }
+
+  ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context;
+  sock = (SilcSocketConnection)server_entry->connection;
+  idata = (SilcIDListData)server_entry;
+
   SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
 
   /* Send the CONNECTED packet back to the backup router. */
@@ -806,10 +858,15 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       silc_buffer_free(packet);
 
-      /* Announce all of our information */
-      silc_server_announce_servers(server, TRUE, 0, ctx->sock);
-      silc_server_announce_clients(server, 0, ctx->sock);
-      silc_server_announce_channels(server, 0, ctx->sock);
+      /* If we are not standalone and our primary is not the one we've
+        talking to now, then announce our information to it since we
+        haven't done that yet.  Standalone backup router announces
+        these during connecting to the primary. */
+      if (!server->standalone && SILC_PRIMARY_ROUTE(server) != ctx->sock) {
+       silc_server_announce_servers(server, TRUE, 0, ctx->sock);
+       silc_server_announce_clients(server, 0, ctx->sock);
+       silc_server_announce_channels(server, 0, ctx->sock);
+      }
 
       protocol->state++;
     } else {
@@ -819,7 +876,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
       /* We should have received START or START_GLOBAL packet */
       if (ctx->type != SILC_SERVER_BACKUP_START &&
          ctx->type != SILC_SERVER_BACKUP_START_GLOBAL) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
        break;
       }
 
@@ -873,7 +930,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       /* We should have received CONNECTED packet */
       if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
        break;
       }
 
@@ -905,7 +962,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       /* We should have been received ENDING packet */
       if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
        break;
       }
 
@@ -926,10 +983,11 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       /* Switch announced informations to our primary router of using the
         backup router. */
+      silc_server_local_servers_toggle_enabled(server, TRUE);
       silc_server_update_servers_by_server(server, ctx->sock->user_data, 
-                                          server->router, TRUE);
+                                          server->router);
       silc_server_update_clients_by_server(server, ctx->sock->user_data,
-                                          server->router, TRUE, FALSE);
+                                          server->router, TRUE);
       if (server->server_type == SILC_SERVER)
        silc_server_update_channels_by_server(server, ctx->sock->user_data, 
                                              server->router);
@@ -1015,6 +1073,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
       silc_buffer_free(packet);
 
       SILC_LOG_INFO(("We are now the primary router of our cell again"));
+      server->wait_backup = FALSE;
 
       /* For us this is the end of this protocol. */
       if (protocol->final_callback)
@@ -1032,7 +1091,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
         router. */
       if (ctx->type != SILC_SERVER_BACKUP_RESUMED &&
          ctx->type != SILC_SERVER_BACKUP_RESUMED_GLOBAL) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
        break;
       }
 
@@ -1050,38 +1109,34 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
          /* We have new primary router now */
          server->id_entry->router = router;
          server->router = router;
-         server->router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
-
          SILC_LOG_INFO(("Switching back to primary router %s",
                         server->router->server_name));
-
-         /* We cannot talk to backup router connection anymore, it's
-            enabled again if primary goes down. */
-         backup_router->data.status |= SILC_IDLIST_STATUS_DISABLED;
        } else {
          /* We are connected to new primary and now continue using it */
-         router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
          SILC_LOG_INFO(("Resuming the use of primary router %s",
                         router->server_name));
        }
 
        /* Update the client entries of the backup router to the new 
           router */
-       silc_server_update_servers_by_server(server, backup_router, router,
-                                            FALSE);
-       silc_server_update_clients_by_server(server, NULL, router, 
-                                            FALSE, FALSE);
+       silc_server_local_servers_toggle_enabled(server, FALSE);
+       router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
+       silc_server_update_servers_by_server(server, backup_router, router);
+       silc_server_update_clients_by_server(server, NULL, router, FALSE);
        if (server->server_type == SILC_SERVER)
          silc_server_update_channels_by_server(server, backup_router, router);
        silc_server_backup_replaced_del(server, backup_router);
 
        /* Announce all of our information to the router. */
        if (server->server_type == SILC_ROUTER)
-         silc_server_announce_servers(server, FALSE, 0, router->connection);
+         silc_server_announce_servers(server, FALSE, ctx->start,
+                                      router->connection);
 
        /* Announce our clients and channels to the router */
-       silc_server_announce_clients(server, 0, router->connection);
-       silc_server_announce_channels(server, 0, router->connection);
+       silc_server_announce_clients(server, ctx->start,
+                                    router->connection);
+       silc_server_announce_channels(server, ctx->start,
+                                     router->connection);
       }
 
       /* Send notify about primary router going down to local operators */
@@ -1109,6 +1164,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
   case SILC_PROTOCOL_STATE_FAILURE:
     /* Protocol has ended, call the final callback */
+    SILC_LOG_ERROR(("Error during backup resume: received Failure"));
     if (protocol->final_callback)
       silc_protocol_execute_final(protocol, server->schedule);
     else