Merged from silc_1_0_branch.
[silc.git] / apps / silcd / server_backup.c
index 5ec8945f68cd439d64c0d21e9fc91eb9c1db021e..78634fe2e02fcc35a8b58da88df56808590256ce 100644 (file)
@@ -96,6 +96,7 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
     if (!server->backup->servers[i].server) {
       server->backup->servers[i].server = backup_server;
       server->backup->servers[i].local = local;
+      server->backup->servers[i].port = htons(port);
       memset(server->backup->servers[i].ip.data, 0,
             sizeof(server->backup->servers[i].ip.data));
       silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
@@ -110,6 +111,7 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
                                         (i + 1));
   server->backup->servers[i].server = backup_server;
   server->backup->servers[i].local = local;
+  server->backup->servers[i].port = htons(port);
   memset(server->backup->servers[i].ip.data, 0,
         sizeof(server->backup->servers[i].ip.data));
   silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
@@ -130,7 +132,8 @@ SilcServerEntry silc_server_backup_get(SilcServer server,
 
   for (i = 0; i < server->backup->servers_count; i++) {
     if (server->backup->servers[i].server &&
-       !memcmp(&server->backup->servers[i].ip, &server_id->ip.data,
+       server->backup->servers[i].port == server_id->port &&
+       !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
                sizeof(server_id->ip.data))) {
       SILC_LOG_DEBUG(("Found backup router %s for %s",
                      server->backup->servers[i].server->server_name,
@@ -149,7 +152,7 @@ void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
   int i;
 
   if (!server->backup)
-    return ;
+    return;
 
   for (i = 0; i < server->backup->servers_count; i++) {
     if (server->backup->servers[i].server == server_entry) {
@@ -163,6 +166,28 @@ void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
   }
 }
 
+/* Frees all data allocated for backup routers.  Call this after deleting
+   all backup routers and when new routers are added no more, for example
+   when shutting down the server. */
+
+void silc_server_backup_free(SilcServer server)
+{
+  int i;
+
+  if (!server->backup)
+    return;
+
+  /* Delete existing servers if caller didn't do it */
+  for (i = 0; i < server->backup->servers_count; i++) {
+    if (server->backup->servers[i].server)
+      silc_server_backup_del(server, server->backup->servers[i].server);
+  }
+
+  silc_free(server->backup->servers);
+  silc_free(server->backup);
+  server->backup = NULL;
+}
+
 /* Marks the IP address and port from the `server_id' as  being replaced
    by backup router indicated by the `server'. If the router connects at
    a later time we can check whether it has been replaced by an backup
@@ -222,7 +247,7 @@ bool silc_server_backup_replaced_get(SilcServer server,
   for (i = 0; i < server->backup->replaced_count; i++) {
     if (!server->backup->replaced[i])
       continue;
-    if (!memcmp(&server->backup->replaced[i]->ip, &server_id->ip.data,
+    if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
                sizeof(server_id->ip.data))) {
       if (server_entry)
        *server_entry = server->backup->replaced[i]->server;
@@ -308,6 +333,12 @@ void silc_server_backup_broadcast(SilcServer server,
 
     /* Now actually send the packet */
     silc_server_packet_send_real(server, sock, FALSE);
+
+    /* Check for mandatory rekey */
+    if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD)
+      silc_schedule_task_add(server->schedule, sender->sock,
+                            silc_server_rekey_callback, sender, 0, 1,
+                            SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
   }
 }
 
@@ -339,12 +370,8 @@ void silc_server_backup_send(SilcServer server,
 
   for (i = 0; i < server->backup->servers_count; i++) {
     backup = server->backup->servers[i].server;
-    if (!backup)
+    if (!backup || sender == backup)
       continue;
-
-    if (sender == backup)
-      continue;
-
     if (local && server->backup->servers[i].local == FALSE)
       continue;
     if (server->backup->servers[i].server == server->id_entry)
@@ -385,12 +412,8 @@ void silc_server_backup_send_dest(SilcServer server,
 
   for (i = 0; i < server->backup->servers_count; i++) {
     backup = server->backup->servers[i].server;
-    if (!backup)
-      continue;
-
-    if (sender == backup)
+    if (!backup || sender == backup)
       continue;
-
     if (local && server->backup->servers[i].local == FALSE)
       continue;
     if (server->backup->servers[i].server == server->id_entry)
@@ -407,6 +430,17 @@ void silc_server_backup_send_dest(SilcServer server,
   }
 }
 
+SILC_TASK_CALLBACK(silc_server_backup_timeout)
+{
+  SilcProtocol protocol = context;
+  SilcServer server = app_context;
+
+  SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
+  silc_protocol_cancel(protocol, server->schedule);
+  protocol->state = SILC_PROTOCOL_STATE_ERROR;
+  silc_protocol_execute_final(protocol, server->schedule);
+}
+
 /* Processes incoming RESUME_ROUTER packet. This can give the packet
    for processing to the protocol handler or allocate new protocol if
    start command is received. */
@@ -430,7 +464,7 @@ void silc_server_backup_resume_router(SilcServer server,
                             SILC_STR_UI_CHAR(&session),
                             SILC_STR_END);
   if (ret < 0) {
-    SILC_LOG_DEBUG(("Malformed packet received"));
+    SILC_LOG_ERROR(("Malformed resume router packet received"));
     return;
   }
   
@@ -478,7 +512,7 @@ void silc_server_backup_resume_router(SilcServer server,
       return;
     }
 
-    SILC_LOG_DEBUG(("Bad resume router packet"));
+    SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", type));
     return;
   }
 
@@ -514,12 +548,17 @@ void silc_server_backup_resume_router(SilcServer server,
     proto_ctx->start = time(0);
 
     SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
+    SILC_LOG_INFO(("Starting backup resuming protocol"));
 
     /* Run the backup resuming protocol */
     silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
                        &sock->protocol, proto_ctx, 
                        silc_server_protocol_backup_done);
     silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
+    silc_schedule_task_add(server->schedule, sock->sock,
+                          silc_server_backup_timeout,
+                          sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
+                          SILC_TASK_PRI_NORMAL);
   }
 }
 
@@ -527,8 +566,8 @@ void silc_server_backup_resume_router(SilcServer server,
 
 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
 {
+  SilcServer server = app_context;
   SilcServerConnection sconn = (SilcServerConnection)context;
-  SilcServer server = sconn->server;
   int sock;
   const char *server_ip;
 
@@ -563,8 +602,9 @@ void silc_server_backup_reconnect(SilcServer server,
 {
   SilcServerConnection sconn;
 
+  SILC_LOG_INFO(("Attempting to reconnect to primary router"));
+
   sconn = silc_calloc(1, sizeof(*sconn));
-  sconn->server = server;
   sconn->remote_host = strdup(ip);
   sconn->remote_port = port;
   sconn->callback = callback;
@@ -587,12 +627,18 @@ SILC_TASK_CALLBACK(silc_server_backup_connected_later)
   SilcSocketConnection sock = proto_ctx->sock;
 
   SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
+  SILC_LOG_INFO(("Starting backup resuming protocol"));
 
   /* Run the backup resuming protocol */
   silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
                      &sock->protocol, proto_ctx, 
                      silc_server_protocol_backup_done);
   silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
+
+  silc_schedule_task_add(server->schedule, sock->sock,
+                        silc_server_backup_timeout,
+                        sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
+                        SILC_TASK_PRI_NORMAL);
 }
 
 /* Called when we've established connection back to our primary router
@@ -666,6 +712,8 @@ static void silc_server_backup_connect_primary(SilcServer server,
   idata = (SilcIDListData)server_entry;
 
   SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
+  SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
+               ctx->session));
 
   /* Send the CONNECTED packet back to the backup router. */
   buffer = silc_buffer_alloc(2);
@@ -766,7 +814,9 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
            ctx->sessions[ctx->sessions_count].connected = FALSE;
            ctx->sessions[ctx->sessions_count].server_entry = server_entry;
 
-           SILC_LOG_DEBUG(("Sending START to %s (session %d)", 
+           SILC_LOG_DEBUG(("Sending START to %s (session %d)",
+                           server_entry->server_name, ctx->sessions_count));
+           SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
                            server_entry->server_name, ctx->sessions_count));
 
            /* This connection is performing this protocol too now */
@@ -813,6 +863,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
            SILC_LOG_DEBUG(("Sending START to %s (session %d)", 
                            server_entry->server_name, ctx->sessions_count));
+           SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
+                           server_entry->server_name, ctx->sessions_count));
 
            /* This connection is performing this protocol too now */
            ((SilcSocketConnection)server_entry->connection)->protocol =
@@ -838,10 +890,15 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       silc_buffer_free(packet);
 
-      /* Announce all of our information */
-      silc_server_announce_servers(server, TRUE, 0, ctx->sock);
-      silc_server_announce_clients(server, 0, ctx->sock);
-      silc_server_announce_channels(server, 0, ctx->sock);
+      /* If we are not standalone and our primary is not the one we've
+        talking to now, then announce our information to it since we
+        haven't done that yet.  Standalone backup router announces
+        these during connecting to the primary. */
+      if (!server->standalone && SILC_PRIMARY_ROUTE(server) != ctx->sock) {
+       silc_server_announce_servers(server, TRUE, 0, ctx->sock);
+       silc_server_announce_clients(server, 0, ctx->sock);
+       silc_server_announce_channels(server, 0, ctx->sock);
+      }
 
       protocol->state++;
     } else {
@@ -851,7 +908,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
       /* We should have received START or START_GLOBAL packet */
       if (ctx->type != SILC_SERVER_BACKUP_START &&
          ctx->type != SILC_SERVER_BACKUP_START_GLOBAL) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
        break;
       }
 
@@ -859,7 +916,14 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
         to be back online. We send the CONNECTED packet after we've
         established the connection to the primary router. */
       primary = silc_server_config_get_primary_router(server);
-      if (primary && server->backup_primary) {
+      if (primary && server->backup_primary &&
+         !silc_server_num_sockets_by_remote(server,
+                                            silc_net_is_ip(primary->host) ?
+                                            primary->host : NULL,
+                                            silc_net_is_ip(primary->host) ?
+                                            NULL : primary->host,
+                                            primary->port,
+                                            SILC_SOCKET_TYPE_ROUTER)) {
        SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
                        ctx->session));
        silc_server_backup_reconnect(server,
@@ -870,6 +934,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
        /* Nowhere to connect just return the CONNECTED packet */
        SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
                        ctx->session));
+       SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
+                     ctx->session));
 
        /* Send the CONNECTED packet back to the backup router. */
        packet = silc_buffer_alloc(2);
@@ -905,15 +971,17 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       /* We should have received CONNECTED packet */
       if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
        break;
       }
 
-      SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
-
       for (i = 0; i < ctx->sessions_count; i++) {
        if (ctx->sessions[i].session == ctx->session) {
          ctx->sessions[i].connected = TRUE;
+         SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
+                        ctx->sessions[i].server_entry->server_name,
+                        ctx->session));
+         SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
          break;
        }
       }
@@ -923,7 +991,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
          return;
       }
 
-      SILC_LOG_DEBUG(("All sessions has returned CONNECTED packets"));
+      SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
+                    "continuing"));
       SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
 
       /* Send with a timeout */
@@ -937,7 +1006,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       /* We should have been received ENDING packet */
       if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
        break;
       }
 
@@ -962,7 +1031,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
       silc_server_update_servers_by_server(server, ctx->sock->user_data, 
                                           server->router);
       silc_server_update_clients_by_server(server, ctx->sock->user_data,
-                                          server->router, TRUE, FALSE);
+                                          server->router, TRUE);
       if (server->server_type == SILC_SERVER)
        silc_server_update_channels_by_server(server, ctx->sock->user_data, 
                                              server->router);
@@ -1048,6 +1117,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
       silc_buffer_free(packet);
 
       SILC_LOG_INFO(("We are now the primary router of our cell again"));
+      server->wait_backup = FALSE;
 
       /* For us this is the end of this protocol. */
       if (protocol->final_callback)
@@ -1065,11 +1135,11 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
         router. */
       if (ctx->type != SILC_SERVER_BACKUP_RESUMED &&
          ctx->type != SILC_SERVER_BACKUP_RESUMED_GLOBAL) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
        break;
       }
 
-      SILC_LOG_DEBUG(("Received RESUMED from new primary router"));
+      SILC_LOG_INFO(("Received RESUMED from new primary router"));
 
       if (server->backup_router)
        server->server_type = SILC_BACKUP_ROUTER;
@@ -1083,34 +1153,35 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
          /* We have new primary router now */
          server->id_entry->router = router;
          server->router = router;
-         server->router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
-
          SILC_LOG_INFO(("Switching back to primary router %s",
                         server->router->server_name));
        } else {
          /* We are connected to new primary and now continue using it */
-         router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
          SILC_LOG_INFO(("Resuming the use of primary router %s",
                         router->server_name));
        }
+       server->backup_primary = FALSE;
 
        /* Update the client entries of the backup router to the new 
           router */
        silc_server_local_servers_toggle_enabled(server, FALSE);
+       router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
        silc_server_update_servers_by_server(server, backup_router, router);
-       silc_server_update_clients_by_server(server, NULL, router, 
-                                            FALSE, FALSE);
+       silc_server_update_clients_by_server(server, NULL, router, FALSE);
        if (server->server_type == SILC_SERVER)
          silc_server_update_channels_by_server(server, backup_router, router);
        silc_server_backup_replaced_del(server, backup_router);
 
        /* Announce all of our information to the router. */
        if (server->server_type == SILC_ROUTER)
-         silc_server_announce_servers(server, FALSE, 0, router->connection);
+         silc_server_announce_servers(server, FALSE, ctx->start,
+                                      router->connection);
 
        /* Announce our clients and channels to the router */
-       silc_server_announce_clients(server, 0, router->connection);
-       silc_server_announce_channels(server, 0, router->connection);
+       silc_server_announce_clients(server, ctx->start,
+                                    router->connection);
+       silc_server_announce_channels(server, ctx->start,
+                                     router->connection);
       }
 
       /* Send notify about primary router going down to local operators */
@@ -1138,6 +1209,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
   case SILC_PROTOCOL_STATE_FAILURE:
     /* Protocol has ended, call the final callback */
+    SILC_LOG_ERROR(("Error during backup resume: received Failure"));
     if (protocol->final_callback)
       silc_protocol_execute_final(protocol, server->schedule);
     else
@@ -1159,11 +1231,16 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
   SilcIDCacheList list;
   SilcIDCacheEntry id_cache;
 
+  silc_schedule_task_del_by_context(server->schedule, protocol);
+
   if (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
       protocol->state == SILC_PROTOCOL_STATE_FAILURE) {
     SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
   }
 
+  if (server->server_shutdown)
+    return;
+
   /* Remove this protocol from all server entries that has it */
   if (silc_idcache_get_all(server->local_list->servers, &list)) {
     if (silc_idcache_list_first(list, &id_cache)) {
@@ -1171,17 +1248,32 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
        server_entry = (SilcServerEntry)id_cache->context;
        sock = (SilcSocketConnection)server_entry->connection;
 
-       /* XXXX */
-       if (!sock) {
-         SILC_LOG_DEBUG(("******** REMOVE THIS TEST, IT ALLOWS A BUG"));
-         if (!silc_idcache_list_next(list, &id_cache))
-           break;
-         continue;
-       }
-
        if (sock->protocol == protocol) {
          sock->protocol = NULL;
 
+         /* Backup closes connection and reconnects if error occurred */
+         if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
+           if (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
+               protocol->state == SILC_PROTOCOL_STATE_FAILURE) {
+             server->backup_noswitch = TRUE;
+             server->server_type = SILC_BACKUP_ROUTER;
+
+             if (sock->user_data)
+               silc_server_free_sock_user_data(server, sock, NULL);
+             silc_server_close_connection(server, sock);
+
+             silc_schedule_task_add(server->schedule, 0,
+                                    silc_server_connect_to_router,
+                                    server, 1, 0,
+                                    SILC_TASK_TIMEOUT,
+                                    SILC_TASK_PRI_NORMAL);
+
+             if (!silc_idcache_list_next(list, &id_cache))
+               break;
+             continue;
+           }
+         }
+
          if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
            server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
        }
@@ -1199,17 +1291,32 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
        server_entry = (SilcServerEntry)id_cache->context;
        sock = (SilcSocketConnection)server_entry->connection;
 
-       /* XXXX */
-       if (!sock) {
-         SILC_LOG_DEBUG(("******** REMOVE THIS TEST, IT ALLOWS A BUG"));
-         if (!silc_idcache_list_next(list, &id_cache))
-           break;
-         continue;
-       }
-
        if (sock->protocol == protocol) {
          sock->protocol = NULL;
 
+         /* Backup closes connection and reconnects if error occurred */
+         if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
+           if (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
+               protocol->state == SILC_PROTOCOL_STATE_FAILURE) {
+             server->backup_noswitch = TRUE;
+             server->server_type = SILC_BACKUP_ROUTER;
+
+             if (sock->user_data)
+               silc_server_free_sock_user_data(server, sock, NULL);
+             silc_server_close_connection(server, sock);
+
+             silc_schedule_task_add(server->schedule, 0,
+                                    silc_server_connect_to_router,
+                                    server, 1, 0,
+                                    SILC_TASK_TIMEOUT,
+                                    SILC_TASK_PRI_NORMAL);
+
+             if (!silc_idcache_list_next(list, &id_cache))
+               break;
+             continue;
+           }
+         }
+
          if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
            server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
        }
@@ -1221,7 +1328,9 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
     silc_idcache_list_free(list);
   }
 
-  SILC_LOG_DEBUG(("Backup resuming protocol has ended"));
+  if (protocol->state != SILC_PROTOCOL_STATE_ERROR &&
+      protocol->state != SILC_PROTOCOL_STATE_FAILURE)
+    SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
 
   if (ctx->sock->protocol)
     ctx->sock->protocol = NULL;