Merged from silc_1_0_branch.
[silc.git] / apps / silcd / server_backup.c
index ae8d0a762a049c922d1efff04126d61a95888b38..78634fe2e02fcc35a8b58da88df56808590256ce 100644 (file)
@@ -96,6 +96,7 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
     if (!server->backup->servers[i].server) {
       server->backup->servers[i].server = backup_server;
       server->backup->servers[i].local = local;
+      server->backup->servers[i].port = htons(port);
       memset(server->backup->servers[i].ip.data, 0,
             sizeof(server->backup->servers[i].ip.data));
       silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
@@ -110,6 +111,7 @@ void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
                                         (i + 1));
   server->backup->servers[i].server = backup_server;
   server->backup->servers[i].local = local;
+  server->backup->servers[i].port = htons(port);
   memset(server->backup->servers[i].ip.data, 0,
         sizeof(server->backup->servers[i].ip.data));
   silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
@@ -130,7 +132,8 @@ SilcServerEntry silc_server_backup_get(SilcServer server,
 
   for (i = 0; i < server->backup->servers_count; i++) {
     if (server->backup->servers[i].server &&
-       !memcmp(&server->backup->servers[i].ip, &server_id->ip.data,
+       server->backup->servers[i].port == server_id->port &&
+       !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
                sizeof(server_id->ip.data))) {
       SILC_LOG_DEBUG(("Found backup router %s for %s",
                      server->backup->servers[i].server->server_name,
@@ -244,7 +247,7 @@ bool silc_server_backup_replaced_get(SilcServer server,
   for (i = 0; i < server->backup->replaced_count; i++) {
     if (!server->backup->replaced[i])
       continue;
-    if (!memcmp(&server->backup->replaced[i]->ip, &server_id->ip.data,
+    if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
                sizeof(server_id->ip.data))) {
       if (server_entry)
        *server_entry = server->backup->replaced[i]->server;
@@ -330,6 +333,12 @@ void silc_server_backup_broadcast(SilcServer server,
 
     /* Now actually send the packet */
     silc_server_packet_send_real(server, sock, FALSE);
+
+    /* Check for mandatory rekey */
+    if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD)
+      silc_schedule_task_add(server->schedule, sender->sock,
+                            silc_server_rekey_callback, sender, 0, 1,
+                            SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
   }
 }
 
@@ -421,6 +430,17 @@ void silc_server_backup_send_dest(SilcServer server,
   }
 }
 
+SILC_TASK_CALLBACK(silc_server_backup_timeout)
+{
+  SilcProtocol protocol = context;
+  SilcServer server = app_context;
+
+  SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
+  silc_protocol_cancel(protocol, server->schedule);
+  protocol->state = SILC_PROTOCOL_STATE_ERROR;
+  silc_protocol_execute_final(protocol, server->schedule);
+}
+
 /* Processes incoming RESUME_ROUTER packet. This can give the packet
    for processing to the protocol handler or allocate new protocol if
    start command is received. */
@@ -444,7 +464,7 @@ void silc_server_backup_resume_router(SilcServer server,
                             SILC_STR_UI_CHAR(&session),
                             SILC_STR_END);
   if (ret < 0) {
-    SILC_LOG_DEBUG(("Malformed packet received"));
+    SILC_LOG_ERROR(("Malformed resume router packet received"));
     return;
   }
   
@@ -492,7 +512,7 @@ void silc_server_backup_resume_router(SilcServer server,
       return;
     }
 
-    SILC_LOG_DEBUG(("Bad resume router packet"));
+    SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", type));
     return;
   }
 
@@ -528,12 +548,17 @@ void silc_server_backup_resume_router(SilcServer server,
     proto_ctx->start = time(0);
 
     SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
+    SILC_LOG_INFO(("Starting backup resuming protocol"));
 
     /* Run the backup resuming protocol */
     silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
                        &sock->protocol, proto_ctx, 
                        silc_server_protocol_backup_done);
     silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
+    silc_schedule_task_add(server->schedule, sock->sock,
+                          silc_server_backup_timeout,
+                          sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
+                          SILC_TASK_PRI_NORMAL);
   }
 }
 
@@ -541,8 +566,8 @@ void silc_server_backup_resume_router(SilcServer server,
 
 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
 {
+  SilcServer server = app_context;
   SilcServerConnection sconn = (SilcServerConnection)context;
-  SilcServer server = sconn->server;
   int sock;
   const char *server_ip;
 
@@ -577,8 +602,9 @@ void silc_server_backup_reconnect(SilcServer server,
 {
   SilcServerConnection sconn;
 
+  SILC_LOG_INFO(("Attempting to reconnect to primary router"));
+
   sconn = silc_calloc(1, sizeof(*sconn));
-  sconn->server = server;
   sconn->remote_host = strdup(ip);
   sconn->remote_port = port;
   sconn->callback = callback;
@@ -601,12 +627,18 @@ SILC_TASK_CALLBACK(silc_server_backup_connected_later)
   SilcSocketConnection sock = proto_ctx->sock;
 
   SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
+  SILC_LOG_INFO(("Starting backup resuming protocol"));
 
   /* Run the backup resuming protocol */
   silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
                      &sock->protocol, proto_ctx, 
                      silc_server_protocol_backup_done);
   silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
+
+  silc_schedule_task_add(server->schedule, sock->sock,
+                        silc_server_backup_timeout,
+                        sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
+                        SILC_TASK_PRI_NORMAL);
 }
 
 /* Called when we've established connection back to our primary router
@@ -680,6 +712,8 @@ static void silc_server_backup_connect_primary(SilcServer server,
   idata = (SilcIDListData)server_entry;
 
   SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
+  SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
+               ctx->session));
 
   /* Send the CONNECTED packet back to the backup router. */
   buffer = silc_buffer_alloc(2);
@@ -780,7 +814,9 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
            ctx->sessions[ctx->sessions_count].connected = FALSE;
            ctx->sessions[ctx->sessions_count].server_entry = server_entry;
 
-           SILC_LOG_DEBUG(("Sending START to %s (session %d)", 
+           SILC_LOG_DEBUG(("Sending START to %s (session %d)",
+                           server_entry->server_name, ctx->sessions_count));
+           SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
                            server_entry->server_name, ctx->sessions_count));
 
            /* This connection is performing this protocol too now */
@@ -827,6 +863,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
            SILC_LOG_DEBUG(("Sending START to %s (session %d)", 
                            server_entry->server_name, ctx->sessions_count));
+           SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
+                           server_entry->server_name, ctx->sessions_count));
 
            /* This connection is performing this protocol too now */
            ((SilcSocketConnection)server_entry->connection)->protocol =
@@ -870,7 +908,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
       /* We should have received START or START_GLOBAL packet */
       if (ctx->type != SILC_SERVER_BACKUP_START &&
          ctx->type != SILC_SERVER_BACKUP_START_GLOBAL) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
        break;
       }
 
@@ -878,7 +916,14 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
         to be back online. We send the CONNECTED packet after we've
         established the connection to the primary router. */
       primary = silc_server_config_get_primary_router(server);
-      if (primary && server->backup_primary) {
+      if (primary && server->backup_primary &&
+         !silc_server_num_sockets_by_remote(server,
+                                            silc_net_is_ip(primary->host) ?
+                                            primary->host : NULL,
+                                            silc_net_is_ip(primary->host) ?
+                                            NULL : primary->host,
+                                            primary->port,
+                                            SILC_SOCKET_TYPE_ROUTER)) {
        SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
                        ctx->session));
        silc_server_backup_reconnect(server,
@@ -889,6 +934,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
        /* Nowhere to connect just return the CONNECTED packet */
        SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
                        ctx->session));
+       SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
+                     ctx->session));
 
        /* Send the CONNECTED packet back to the backup router. */
        packet = silc_buffer_alloc(2);
@@ -924,15 +971,17 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       /* We should have received CONNECTED packet */
       if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
        break;
       }
 
-      SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
-
       for (i = 0; i < ctx->sessions_count; i++) {
        if (ctx->sessions[i].session == ctx->session) {
          ctx->sessions[i].connected = TRUE;
+         SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
+                        ctx->sessions[i].server_entry->server_name,
+                        ctx->session));
+         SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
          break;
        }
       }
@@ -942,7 +991,8 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
          return;
       }
 
-      SILC_LOG_DEBUG(("All sessions has returned CONNECTED packets"));
+      SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
+                    "continuing"));
       SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
 
       /* Send with a timeout */
@@ -956,7 +1006,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
       /* We should have been received ENDING packet */
       if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
        break;
       }
 
@@ -1085,11 +1135,11 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
         router. */
       if (ctx->type != SILC_SERVER_BACKUP_RESUMED &&
          ctx->type != SILC_SERVER_BACKUP_RESUMED_GLOBAL) {
-       SILC_LOG_DEBUG(("Bad resume router packet"));
+       SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
        break;
       }
 
-      SILC_LOG_DEBUG(("Received RESUMED from new primary router"));
+      SILC_LOG_INFO(("Received RESUMED from new primary router"));
 
       if (server->backup_router)
        server->server_type = SILC_BACKUP_ROUTER;
@@ -1110,6 +1160,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
          SILC_LOG_INFO(("Resuming the use of primary router %s",
                         router->server_name));
        }
+       server->backup_primary = FALSE;
 
        /* Update the client entries of the backup router to the new 
           router */
@@ -1158,6 +1209,7 @@ SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
 
   case SILC_PROTOCOL_STATE_FAILURE:
     /* Protocol has ended, call the final callback */
+    SILC_LOG_ERROR(("Error during backup resume: received Failure"));
     if (protocol->final_callback)
       silc_protocol_execute_final(protocol, server->schedule);
     else
@@ -1179,11 +1231,16 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
   SilcIDCacheList list;
   SilcIDCacheEntry id_cache;
 
+  silc_schedule_task_del_by_context(server->schedule, protocol);
+
   if (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
       protocol->state == SILC_PROTOCOL_STATE_FAILURE) {
     SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
   }
 
+  if (server->server_shutdown)
+    return;
+
   /* Remove this protocol from all server entries that has it */
   if (silc_idcache_get_all(server->local_list->servers, &list)) {
     if (silc_idcache_list_first(list, &id_cache)) {
@@ -1194,6 +1251,29 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
        if (sock->protocol == protocol) {
          sock->protocol = NULL;
 
+         /* Backup closes connection and reconnects if error occurred */
+         if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
+           if (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
+               protocol->state == SILC_PROTOCOL_STATE_FAILURE) {
+             server->backup_noswitch = TRUE;
+             server->server_type = SILC_BACKUP_ROUTER;
+
+             if (sock->user_data)
+               silc_server_free_sock_user_data(server, sock, NULL);
+             silc_server_close_connection(server, sock);
+
+             silc_schedule_task_add(server->schedule, 0,
+                                    silc_server_connect_to_router,
+                                    server, 1, 0,
+                                    SILC_TASK_TIMEOUT,
+                                    SILC_TASK_PRI_NORMAL);
+
+             if (!silc_idcache_list_next(list, &id_cache))
+               break;
+             continue;
+           }
+         }
+
          if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
            server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
        }
@@ -1214,6 +1294,29 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
        if (sock->protocol == protocol) {
          sock->protocol = NULL;
 
+         /* Backup closes connection and reconnects if error occurred */
+         if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
+           if (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
+               protocol->state == SILC_PROTOCOL_STATE_FAILURE) {
+             server->backup_noswitch = TRUE;
+             server->server_type = SILC_BACKUP_ROUTER;
+
+             if (sock->user_data)
+               silc_server_free_sock_user_data(server, sock, NULL);
+             silc_server_close_connection(server, sock);
+
+             silc_schedule_task_add(server->schedule, 0,
+                                    silc_server_connect_to_router,
+                                    server, 1, 0,
+                                    SILC_TASK_TIMEOUT,
+                                    SILC_TASK_PRI_NORMAL);
+
+             if (!silc_idcache_list_next(list, &id_cache))
+               break;
+             continue;
+           }
+         }
+
          if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
            server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
        }
@@ -1225,7 +1328,9 @@ SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
     silc_idcache_list_free(list);
   }
 
-  SILC_LOG_DEBUG(("Backup resuming protocol has ended"));
+  if (protocol->state != SILC_PROTOCOL_STATE_ERROR &&
+      protocol->state != SILC_PROTOCOL_STATE_FAILURE)
+    SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
 
   if (ctx->sock->protocol)
     ctx->sock->protocol = NULL;