Merged silc_1_0_branch to trunk.
[silc.git] / apps / silcd / server_backup.c
1 /*
2
3   server_backup.c
4
5   Author: Pekka Riikonen <priikone@silcnet.org>
6
7   Copyright (C) 2001 - 2005 Pekka Riikonen
8
9   This program is free software; you can redistribute it and/or modify
10   it under the terms of the GNU General Public License as published by
11   the Free Software Foundation; version 2 of the License.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18 */
19 /* $Id$ */
20
21 #include "serverincludes.h"
22 #include "server_internal.h"
23
24 SILC_TASK_CALLBACK(silc_server_protocol_backup_done);
25 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router);
26 SILC_TASK_CALLBACK(silc_server_backup_announce_watches);
27
28 static void silc_server_backup_connect_primary(SilcServer server,
29                                                SilcServerEntry server_entry,
30                                                void *context);
31
32
33 /************************** Types and Definitions ***************************/
34
35 /* Backup router */
36 typedef struct {
37   SilcServerEntry server;
38   SilcIDIP ip;
39   SilcUInt16 port;
40   bool local;
41 } SilcServerBackupEntry;
42
43 /* Holds IP address and port of the primary router that was replaced
44    by backup router. */
45 typedef struct {
46   SilcIDIP ip;
47   SilcUInt16 port;
48   SilcServerEntry server;       /* Backup router that replaced the primary */
49 } SilcServerBackupReplaced;
50
51 /* Backup context */
52 struct SilcServerBackupStruct {
53   SilcServerBackupEntry *servers;
54   SilcUInt32 servers_count;
55   SilcServerBackupReplaced **replaced;
56   SilcUInt32 replaced_count;
57 };
58
59 typedef struct {
60   SilcUInt8 session;
61   bool connected;
62   SilcServerEntry server_entry;
63 } SilcServerBackupProtocolSession;
64
65 /* Backup resuming protocol context  */
66 typedef struct {
67   SilcServer server;
68   SilcSocketConnection sock;
69   SilcUInt8 type;
70   SilcUInt8 session;
71   SilcServerBackupProtocolSession *sessions;
72   SilcUInt32 sessions_count;
73   SilcUInt32 initiator_restart;
74   long start;
75   unsigned int responder        : 1;
76   unsigned int received_failure : 1;
77   unsigned int timeout          : 1;
78 } *SilcServerBackupProtocolContext;
79
80
81 /********************* Backup Configuration Routines ************************/
82
83 /* Adds the `backup_server' to be one of our backup router. This can be
84    called multiple times to set multiple backup routers. The `ip' and `port'
85    is the IP and port that the `backup_router' will replace if the `ip'
86    will become unresponsive. If `local' is TRUE then the `backup_server' is
87    in the local cell, if FALSE it is in some other cell. */
88
89 void silc_server_backup_add(SilcServer server, SilcServerEntry backup_server,
90                             const char *ip, int port, bool local)
91 {
92   int i;
93
94   if (!ip)
95     return;
96
97   if (!server->backup) {
98     server->backup = silc_calloc(1, sizeof(*server->backup));
99     if (!server->backup)
100       return;
101   }
102
103   /* See if already added */
104   for (i = 0; i < server->backup->servers_count; i++) {
105     if (server->backup->servers[i].server == backup_server)
106       return;
107   }
108
109   SILC_LOG_DEBUG(("Backup router %s will replace %s",
110                   ((SilcSocketConnection)backup_server->connection)->ip,
111                   ip, port));
112
113   for (i = 0; i < server->backup->servers_count; i++) {
114     if (!server->backup->servers[i].server) {
115       server->backup->servers[i].server = backup_server;
116       server->backup->servers[i].local = local;
117       server->backup->servers[i].port = SILC_SWAB_16(port);
118       memset(server->backup->servers[i].ip.data, 0,
119              sizeof(server->backup->servers[i].ip.data));
120       silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
121                         sizeof(server->backup->servers[i].ip.data));
122       return;
123     }
124   }
125
126   i = server->backup->servers_count;
127   server->backup->servers = silc_realloc(server->backup->servers,
128                                          sizeof(*server->backup->servers) *
129                                          (i + 1));
130   server->backup->servers[i].server = backup_server;
131   server->backup->servers[i].local = local;
132   server->backup->servers[i].port = SILC_SWAB_16(port);
133   memset(server->backup->servers[i].ip.data, 0,
134          sizeof(server->backup->servers[i].ip.data));
135   silc_net_addr2bin(ip, server->backup->servers[i].ip.data,
136                     sizeof(server->backup->servers[i].ip.data));
137   server->backup->servers_count++;
138 }
139
140 /* Returns backup router for IP and port in `server_id' or NULL if there
141    does not exist backup router. */
142
143 SilcServerEntry silc_server_backup_get(SilcServer server,
144                                        SilcServerID *server_id)
145 {
146   int i;
147
148   if (!server->backup)
149     return NULL;
150
151   for (i = 0; i < server->backup->servers_count; i++) {
152     if (server->backup->servers[i].server &&
153         server->backup->servers[i].port == server_id->port &&
154         !memcmp(server->backup->servers[i].ip.data, server_id->ip.data,
155                 sizeof(server_id->ip.data))) {
156       SILC_LOG_DEBUG(("Found backup router %s for %s",
157                       server->backup->servers[i].server->server_name,
158                       silc_id_render(server_id, SILC_ID_SERVER)));
159       return server->backup->servers[i].server;
160     }
161   }
162
163   return NULL;
164 }
165
166 /* Deletes the backup server `server_entry'. */
167
168 void silc_server_backup_del(SilcServer server, SilcServerEntry server_entry)
169 {
170   int i;
171
172   if (!server->backup)
173     return;
174
175   for (i = 0; i < server->backup->servers_count; i++) {
176     if (server->backup->servers[i].server == server_entry) {
177       SILC_LOG_DEBUG(("Removing %s as backup router",
178                       silc_id_render(server->backup->servers[i].server->id,
179                                      SILC_ID_SERVER)));
180       server->backup->servers[i].server = NULL;
181       memset(server->backup->servers[i].ip.data, 0,
182              sizeof(server->backup->servers[i].ip.data));
183     }
184   }
185 }
186
187 /* Frees all data allocated for backup routers.  Call this after deleting
188    all backup routers and when new routers are added no more, for example
189    when shutting down the server. */
190
191 void silc_server_backup_free(SilcServer server)
192 {
193   int i;
194
195   if (!server->backup)
196     return;
197
198   /* Delete existing servers if caller didn't do it */
199   for (i = 0; i < server->backup->servers_count; i++) {
200     if (server->backup->servers[i].server)
201       silc_server_backup_del(server, server->backup->servers[i].server);
202   }
203
204   silc_free(server->backup->servers);
205   silc_free(server->backup);
206   server->backup = NULL;
207 }
208
209 /* Marks the IP address and port from the `server_id' as  being replaced
210    by backup router indicated by the `server'. If the router connects at
211    a later time we can check whether it has been replaced by an backup
212    router. */
213
214 void silc_server_backup_replaced_add(SilcServer server,
215                                      SilcServerID *server_id,
216                                      SilcServerEntry server_entry)
217 {
218   int i;
219   SilcServerBackupReplaced *r = silc_calloc(1, sizeof(*r));;
220
221   if (!server->backup)
222     server->backup = silc_calloc(1, sizeof(*server->backup));
223   if (!server->backup->replaced) {
224     server->backup->replaced =
225       silc_calloc(1, sizeof(*server->backup->replaced));
226     server->backup->replaced_count = 1;
227   }
228
229   SILC_LOG_DEBUG(("Replacing router %s with %s",
230                   silc_id_render(server_id, SILC_ID_SERVER),
231                   server_entry->server_name));
232
233   memcpy(&r->ip, &server_id->ip, sizeof(server_id->ip));
234   r->server = server_entry;
235
236   for (i = 0; i < server->backup->replaced_count; i++) {
237     if (!server->backup->replaced[i]) {
238       server->backup->replaced[i] = r;
239       return;
240     }
241   }
242
243   i = server->backup->replaced_count;
244   server->backup->replaced = silc_realloc(server->backup->replaced,
245                                           sizeof(*server->backup->replaced) *
246                                           (i + 1));
247   server->backup->replaced[i] = r;
248   server->backup->replaced_count++;
249 }
250
251 /* Checks whether the IP address and port from the `server_id' has been
252    replaced by an backup router. If it has been then this returns TRUE
253    and the bacup router entry to the `server' pointer if non-NULL. Returns
254    FALSE if the router is not replaced by backup router. */
255
256 bool silc_server_backup_replaced_get(SilcServer server,
257                                      SilcServerID *server_id,
258                                      SilcServerEntry *server_entry)
259 {
260   int i;
261
262   if (!server->backup || !server->backup->replaced)
263     return FALSE;
264
265   for (i = 0; i < server->backup->replaced_count; i++) {
266     if (!server->backup->replaced[i])
267       continue;
268     if (!memcmp(server->backup->replaced[i]->ip.data, server_id->ip.data,
269                 sizeof(server_id->ip.data))) {
270       if (server_entry)
271         *server_entry = server->backup->replaced[i]->server;
272       SILC_LOG_DEBUG(("Router %s is replaced by %s",
273                       silc_id_render(server_id, SILC_ID_SERVER),
274                       server->backup->replaced[i]->server->server_name));
275       return TRUE;
276     }
277   }
278
279   SILC_LOG_DEBUG(("Router %s is not replaced by backup router",
280                   silc_id_render(server_id, SILC_ID_SERVER)));
281   return FALSE;
282 }
283
284 /* Deletes a replaced host by the set `server_entry. */
285
286 void silc_server_backup_replaced_del(SilcServer server,
287                                      SilcServerEntry server_entry)
288 {
289   int i;
290
291   if (!server->backup || !server->backup->replaced)
292     return;
293
294   for (i = 0; i < server->backup->replaced_count; i++) {
295     if (!server->backup->replaced[i])
296       continue;
297     if (server->backup->replaced[i]->server == server_entry) {
298       silc_free(server->backup->replaced[i]);
299       server->backup->replaced[i] = NULL;
300     }
301   }
302 }
303
304 /* Broadcast the received packet indicated by `packet' to all of our backup
305    routers. All router wide information is passed using broadcast packets.
306    That is why all backup routers need to get this data too. It is expected
307    that the caller already knows that the `packet' is broadcast packet. */
308
309 void silc_server_backup_broadcast(SilcServer server,
310                                   SilcSocketConnection sender,
311                                   SilcPacketContext *packet)
312 {
313   SilcServerEntry backup;
314   SilcSocketConnection sock;
315   SilcBuffer buffer;
316   const SilcBufferStruct p;
317   SilcIDListData idata;
318   int i;
319
320   if (!server->backup || server->server_type != SILC_ROUTER)
321     return;
322
323   SILC_LOG_DEBUG(("Broadcasting received packet to backup routers"));
324
325   buffer = packet->buffer;
326   silc_buffer_push(buffer, buffer->data - buffer->head);
327
328   for (i = 0; i < server->backup->servers_count; i++) {
329     backup = server->backup->servers[i].server;
330
331     if (!backup || backup->connection == sender ||
332         server->backup->servers[i].local == FALSE)
333       continue;
334     if (server->backup->servers[i].server == server->id_entry)
335       continue;
336
337     idata = (SilcIDListData)backup;
338     sock = backup->connection;
339
340     if (!silc_packet_send_prepare(sock, 0, 0, buffer->len, idata->hmac_send,
341                                   (const SilcBuffer)&p)) {
342       SILC_LOG_ERROR(("Cannot send packet"));
343       return;
344     }
345     silc_buffer_put((SilcBuffer)&p, buffer->data, buffer->len);
346     silc_packet_encrypt(idata->send_key, idata->hmac_send, idata->psn_send++,
347                         (SilcBuffer)&p, p.len);
348
349     SILC_LOG_HEXDUMP(("Broadcasted packet, len %d", p.len), p.data, p.len);
350
351     /* Now actually send the packet */
352     silc_server_packet_send_real(server, sock, FALSE);
353
354     /* Check for mandatory rekey */
355     if (idata->psn_send == SILC_SERVER_REKEY_THRESHOLD)
356       silc_schedule_task_add(server->schedule, sender->sock,
357                              silc_server_rekey_callback, sender, 0, 1,
358                              SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
359   }
360 }
361
362 /* A generic routine to send data to all backup routers. If the `sender'
363    is provided it will indicate the original sender of the packet and the
364    packet won't be resent to that entity. The `data' is the data that will
365    be assembled to packet context before sending. The packet will be
366    encrypted this function. If the `force_send' is TRUE the data is sent
367    immediately and not put to queue. If `local' is TRUE then the packet
368    will be sent only to local backup routers inside the cell. If false the
369    packet can go from one cell to the other. This function has no effect
370    if there are no any backup routers. */
371
372 void silc_server_backup_send(SilcServer server,
373                              SilcServerEntry sender,
374                              SilcPacketType type,
375                              SilcPacketFlags flags,
376                              unsigned char *data,
377                              SilcUInt32 data_len,
378                              bool force_send,
379                              bool local)
380 {
381   SilcServerEntry backup;
382   SilcSocketConnection sock;
383   int i;
384
385   if (!server->backup || server->server_type != SILC_ROUTER)
386     return;
387
388   for (i = 0; i < server->backup->servers_count; i++) {
389     backup = server->backup->servers[i].server;
390     if (!backup || sender == backup)
391       continue;
392     if (local && server->backup->servers[i].local == FALSE)
393       continue;
394     if (server->backup->servers[i].server == server->id_entry)
395       continue;
396
397     sock = backup->connection;
398
399     SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
400                     silc_get_packet_name(type), sock->hostname, sock->ip));
401
402     silc_server_packet_send(server, backup->connection, type, flags,
403                             data, data_len, force_send);
404   }
405 }
406
407 /* Same as silc_server_backup_send but sets a specific Destination ID to
408    the packet. The Destination ID is indicated by the `dst_id' and the
409    ID type `dst_id_type'. For example, packets destined to channels must
410    be sent using this function. */
411
412 void silc_server_backup_send_dest(SilcServer server,
413                                   SilcServerEntry sender,
414                                   SilcPacketType type,
415                                   SilcPacketFlags flags,
416                                   void *dst_id,
417                                   SilcIdType dst_id_type,
418                                   unsigned char *data,
419                                   SilcUInt32 data_len,
420                                   bool force_send,
421                                   bool local)
422 {
423   SilcServerEntry backup;
424   SilcSocketConnection sock;
425   int i;
426
427   if (!server->backup || server->server_type != SILC_ROUTER)
428     return;
429
430   for (i = 0; i < server->backup->servers_count; i++) {
431     backup = server->backup->servers[i].server;
432     if (!backup || sender == backup)
433       continue;
434     if (local && server->backup->servers[i].local == FALSE)
435       continue;
436     if (server->backup->servers[i].server == server->id_entry)
437       continue;
438
439     sock = backup->connection;
440
441     SILC_LOG_DEBUG(("Sending %s packet to backup router %s (%s)",
442                     silc_get_packet_name(type), sock->hostname, sock->ip));
443
444     silc_server_packet_send_dest(server, backup->connection, type, flags,
445                                  dst_id, dst_id_type, data, data_len,
446                                  force_send);
447   }
448 }
449
450 /* Send the START_USE indication to remote connection.  If `failure' is
451    TRUE then this sends SILC_PACKET_FAILURE.  Otherwise it sends
452    SILC_PACKET_RESUME_ROUTER. */
453
454 void silc_server_backup_send_start_use(SilcServer server,
455                                        SilcSocketConnection sock,
456                                        bool failure)
457 {
458   unsigned char data[4];
459
460   SILC_LOG_DEBUG(("Sending START_USE (%s) to %s",
461                   failure ? "failure" : "success", sock->ip));
462
463   if (failure) {
464     SILC_PUT32_MSB(SILC_SERVER_BACKUP_START_USE, data);
465     silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
466                             data, 4, FALSE);
467   } else {
468     data[0] = SILC_SERVER_BACKUP_START_USE;
469     data[1] = 0;
470     silc_server_packet_send(server, sock,
471                             SILC_PACKET_RESUME_ROUTER, 0,
472                             data, 2, FALSE);
473   }
474 }
475
476 /* Send the REPLACED indication to remote router.  This is send by the
477    primary router (remote router) of the primary router that came back
478    online.  This is not sent by backup router or any other server. */
479
480 void silc_server_backup_send_replaced(SilcServer server,
481                                       SilcSocketConnection sock)
482 {
483   unsigned char data[4];
484
485   SILC_LOG_DEBUG(("Sending REPLACED (%s) to %s", sock->ip));
486
487   data[0] = SILC_SERVER_BACKUP_REPLACED;
488   data[1] = 0;
489   silc_server_packet_send(server, sock,
490                           SILC_PACKET_RESUME_ROUTER, 0,
491                           data, 2, FALSE);
492 }
493
494
495 /************************ Backup Resuming Protocol **************************/
496
497 /* Timeout callback for protocol */
498
499 SILC_TASK_CALLBACK(silc_server_backup_timeout)
500 {
501   SilcProtocol protocol = context;
502   SilcServerBackupProtocolContext ctx = protocol->context;
503   SilcServer server = app_context;
504
505   SILC_LOG_INFO(("Timeout occurred during backup resuming protocol"));
506   ctx->timeout = TRUE;
507   silc_protocol_cancel(protocol, server->schedule);
508   protocol->state = SILC_PROTOCOL_STATE_ERROR;
509   silc_protocol_execute_final(protocol, server->schedule);
510 }
511
512 /* Callback to start the protocol as responder */
513
514 SILC_TASK_CALLBACK(silc_server_backup_responder_start)
515 {
516   SilcServerBackupProtocolContext proto_ctx = context;
517   SilcSocketConnection sock = proto_ctx->sock;
518   SilcServer server = app_context;
519
520   /* If other protocol is executing at the same time, start with timeout. */
521   if (sock->protocol) {
522     SILC_LOG_DEBUG(("Other protocol is executing, wait for it to finish"));
523     silc_schedule_task_add(server->schedule, sock->sock,
524                            silc_server_backup_responder_start,
525                            proto_ctx, 2, 0,
526                            SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
527     return;
528   }
529
530   /* Run the backup resuming protocol */
531   silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
532                       &sock->protocol, proto_ctx,
533                       silc_server_protocol_backup_done);
534   silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
535   silc_schedule_task_add(server->schedule, sock->sock,
536                          silc_server_backup_timeout,
537                          sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
538                          SILC_TASK_PRI_NORMAL);
539 }
540
541 /* Callback to send START_USE to backup to check whether using backup
542    is ok. */
543
544 SILC_TASK_CALLBACK(silc_server_backup_check_status)
545 {
546   SilcSocketConnection sock = context;
547   SilcServer server = app_context;
548
549   /* Check whether we are still using backup */
550   if (!server->backup_primary)
551     return;
552
553   silc_server_backup_send_start_use(server, sock, FALSE);
554   silc_socket_free(sock);       /* unref */
555 }
556
557 typedef struct {
558   SilcServer server;
559   SilcSocketConnection sock;
560   SilcPacketContext *packet;
561 } *SilcServerBackupPing;
562
563 /* PING command reply callback */
564
565 void silc_server_backup_ping_reply(void *context, void *reply)
566 {
567   SilcServerBackupPing pc = context;
568   SilcServerCommandReplyContext cmdr = reply;
569
570   if (cmdr && !silc_command_get_status(cmdr->payload, NULL, NULL)) {
571     /* Timeout error occurred, the primary is really down. */
572     SilcSocketConnection primary = SILC_PRIMARY_ROUTE(pc->server);
573
574     SILC_LOG_DEBUG(("PING timeout, primary is down"));
575
576     if (primary) {
577       if (primary->user_data)
578         silc_server_free_sock_user_data(pc->server, primary, NULL);
579       SILC_SET_DISCONNECTING(primary);
580       silc_server_close_connection(pc->server, primary);
581     }
582
583     /* Reprocess the RESUME_ROUTER packet */
584     silc_server_backup_resume_router(pc->server, pc->sock, pc->packet);
585   } else {
586     /* The primary is not down, refuse to serve the server as primary */
587     SILC_LOG_DEBUG(("PING received, primary is up"));
588     silc_server_backup_send_start_use(pc->server, pc->sock, TRUE);
589   }
590
591   silc_socket_free(pc->sock);
592   silc_packet_context_free(pc->packet);
593   silc_free(pc);
594 }
595
596 /* Processes incoming RESUME_ROUTER packet. This can give the packet
597    for processing to the protocol handler or allocate new protocol if
598    start command is received. */
599
600 void silc_server_backup_resume_router(SilcServer server,
601                                       SilcSocketConnection sock,
602                                       SilcPacketContext *packet)
603 {
604   SilcUInt8 type, session;
605   SilcServerBackupProtocolContext ctx;
606   SilcIDListData idata;
607   int i, ret;
608
609   SILC_LOG_DEBUG(("Received RESUME_ROUTER packet"));
610
611   if (sock->type == SILC_SOCKET_TYPE_CLIENT ||
612       sock->type == SILC_SOCKET_TYPE_UNKNOWN) {
613     SILC_LOG_DEBUG(("Bad packet received"));
614     return;
615   }
616
617   idata = (SilcIDListData)sock->user_data;
618
619   ret = silc_buffer_unformat(packet->buffer,
620                              SILC_STR_UI_CHAR(&type),
621                              SILC_STR_UI_CHAR(&session),
622                              SILC_STR_END);
623   if (ret < 0) {
624     SILC_LOG_ERROR(("Malformed resume router packet received"));
625     return;
626   }
627
628   /* Check whether this packet is used to tell us that server will start
629      using us as primary router. */
630   if (type == SILC_SERVER_BACKUP_START_USE) {
631     SilcBuffer idp;
632     SilcServerBackupPing pc;
633
634     /* If we are normal server then backup router has sent us back
635        this reply and we use the backup as primary router now. */
636     if (server->server_type == SILC_SERVER) {
637       /* Nothing to do here actually, since we have switched already. */
638       SILC_LOG_DEBUG(("Received successful START_USE from backup router"));
639       return;
640     }
641
642     /* Backup router following. */
643
644     /* If we are marked as router then the primary is down and we send
645        success START_USE back to the server. */
646     if (server->server_type == SILC_ROUTER) {
647       SILC_LOG_DEBUG(("Sending success START_USE back to %s", sock->ip));
648       silc_server_backup_send_start_use(server, sock, FALSE);
649       return;
650     }
651
652     /* We have just lost primary, send success START_USE back */
653     if (server->standalone) {
654       SILC_LOG_DEBUG(("We are stanalone, sending success START_USE back to %s",
655                       sock->ip));
656       silc_server_backup_send_start_use(server, sock, FALSE);
657       return;
658     }
659
660     /* We are backup router. This server claims that our primary is down.
661        We will check this ourselves by sending PING command to the primary. */
662     SILC_LOG_DEBUG(("Sending PING to detect status of primary router"));
663     idp = silc_id_payload_encode(server->router->id, SILC_ID_SERVER);
664     silc_server_send_command(server, SILC_PRIMARY_ROUTE(server),
665                              SILC_COMMAND_PING, ++server->cmd_ident, 1,
666                              1, idp->data, idp->len);
667     silc_buffer_free(idp);
668
669     /* Reprocess this packet after received reply from router */
670     pc = silc_calloc(1, sizeof(*pc));
671     pc->server = server;
672     pc->sock = silc_socket_dup(sock);
673     pc->packet = silc_packet_context_dup(packet);
674     silc_server_command_pending_timed(server, SILC_COMMAND_PING,
675                                       server->cmd_ident,
676                                       silc_server_backup_ping_reply, pc, 15);
677     return;
678   }
679
680
681   /* Start the resuming protocol if requested. */
682   if (type == SILC_SERVER_BACKUP_START) {
683     /* We have received a start for resuming protocol.  We are either
684        primary router that came back online or normal server. */
685     SilcServerBackupProtocolContext proto_ctx;
686
687     /* If backup had closed the connection earlier we won't allow resuming
688        since we (primary router) have never gone away. */
689     if (server->server_type == SILC_ROUTER && !server->backup_router &&
690         server->backup_closed) {
691       unsigned char data[4];
692       SILC_LOG_DEBUG(("Backup resuming not allowed since we are still "
693                       "primary router"));
694       SILC_LOG_INFO(("Backup resuming not allowed since we are still "
695                      "primary router"));
696       SILC_PUT32_MSB(SILC_SERVER_BACKUP_START, data);
697       silc_server_packet_send(server, sock, SILC_PACKET_FAILURE, 0,
698                               data, 4, FALSE);
699       server->backup_closed = FALSE;
700       return;
701     }
702
703     proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
704     proto_ctx->server = server;
705     proto_ctx->sock = silc_socket_dup(sock);
706     proto_ctx->responder = TRUE;
707     proto_ctx->type = type;
708     proto_ctx->session = session;
709     proto_ctx->start = time(0);
710
711     SILC_LOG_DEBUG(("Starting backup resuming protocol as responder"));
712     SILC_LOG_INFO(("Starting backup resuming protocol"));
713
714     /* Start protocol immediately */
715     silc_schedule_task_add(server->schedule, sock->sock,
716                            silc_server_backup_responder_start,
717                            proto_ctx, 0, 1,
718                            SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
719     return;
720   }
721
722
723   /* If we are router and the packet is coming from our primary router
724      then it means we have been replaced by an backup router in our cell. */
725   if (type == SILC_SERVER_BACKUP_REPLACED &&
726       server->server_type == SILC_ROUTER &&
727       sock->type == SILC_SOCKET_TYPE_ROUTER &&
728       SILC_PRIMARY_ROUTE(server) == sock) {
729     /* We have been replaced by an backup router in our cell. We must
730        mark our primary router connection disabled since we are not allowed
731        to use it at this moment. */
732     SILC_LOG_INFO(("We are replaced by an backup router in this cell, will "
733                    "wait until backup resuming protocol is executed"));
734     idata->status |= SILC_IDLIST_STATUS_DISABLED;
735     return;
736   }
737
738
739   /* Activate the shared protocol context for this socket connection
740      if necessary */
741   if (type == SILC_SERVER_BACKUP_RESUMED &&
742       sock->type == SILC_SOCKET_TYPE_ROUTER && !sock->protocol &&
743       idata->status & SILC_IDLIST_STATUS_DISABLED) {
744     SilcServerEntry backup_router;
745
746     if (silc_server_backup_replaced_get(server, ((SilcServerEntry)idata)->id,
747                                         &backup_router)) {
748       SilcSocketConnection bsock =
749         (SilcSocketConnection)backup_router->connection;
750       if (bsock->protocol && bsock->protocol->protocol &&
751           bsock->protocol->protocol->type == SILC_PROTOCOL_SERVER_BACKUP) {
752         sock->protocol = bsock->protocol;
753         ctx = sock->protocol->context;
754         if (ctx->sock)
755           silc_socket_free(ctx->sock); /* unref */
756         ctx->sock = silc_socket_dup(sock);
757       }
758     }
759   }
760
761
762   /* Call the resuming protocol if the protocol is active. */
763   if (SILC_SERVER_IS_BACKUP(sock)) {
764     ctx = sock->protocol->context;
765     ctx->type = type;
766
767     for (i = 0; i < ctx->sessions_count; i++) {
768       if (session == ctx->sessions[i].session) {
769         ctx->session = session;
770         silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
771         return;
772       }
773     }
774
775     /* If RESUMED received the session ID is zero, execute the protocol. */
776     if (type == SILC_SERVER_BACKUP_RESUMED) {
777       silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
778       return;
779     }
780
781     SILC_LOG_ERROR(("Unknown backup resuming session %d", session));
782     return;
783   }
784 }
785
786 /* callback for async connection to remote router */
787
788 SILC_TASK_CALLBACK(silc_server_backup_connection_established)
789 {
790   SilcServer server = app_context;
791   SilcServerConnection sconn = (SilcServerConnection)context;
792   int sock = fd;
793   int opt = EINVAL, optlen = sizeof(opt);
794
795   silc_schedule_task_del_by_fd(server->schedule, sock);
796   silc_schedule_unset_listen_fd(server->schedule, sock);
797
798   if (silc_net_get_socket_opt(sock, SOL_SOCKET, SO_ERROR, &opt, &optlen) ||
799       (opt != 0)) {
800     SILC_LOG_DEBUG(("Could not connect to router %s:%d: %s", sconn->remote_host,
801                     sconn->remote_port, strerror(opt)));
802
803     if (server->server_type == SILC_SERVER) {
804       sconn->retry_count++;
805       if (sconn->retry_count > 3) {
806         silc_free(sconn->remote_host);
807         silc_free(sconn);
808         return;
809       }
810     }
811     silc_schedule_task_add(server->schedule, 0,
812                            silc_server_backup_connect_to_router,
813                            context, 10, 0, SILC_TASK_TIMEOUT,
814                            SILC_TASK_PRI_NORMAL);
815     return;
816   }
817
818   SILC_LOG_DEBUG(("Connection to router %s:%d established", sconn->remote_host,
819                   sconn->remote_port));
820
821   /* Continue with key exchange protocol */
822   silc_server_start_key_exchange(server, sconn, sock);
823 }
824
825
826 /* Timeout task callback to connect to remote router */
827
828 SILC_TASK_CALLBACK(silc_server_backup_connect_to_router)
829 {
830   SilcServer server = app_context;
831   SilcServerConnection sconn = (SilcServerConnection)context;
832   int sock;
833   const char *server_ip;
834
835   SILC_LOG_DEBUG(("Connecting to router %s:%d", sconn->remote_host,
836                   sconn->remote_port));
837
838   /* Connect to remote host */
839   server_ip = server->config->server_info->primary == NULL ? NULL :
840     server->config->server_info->primary->server_ip;
841   sock = silc_net_create_connection_async(server_ip, sconn->remote_port,
842                                           sconn->remote_host);
843   if (sock < 0) {
844     if (server->server_type == SILC_SERVER) {
845       sconn->retry_count++;
846       if (sconn->retry_count > 3) {
847         silc_free(sconn->remote_host);
848         silc_free(sconn);
849         return;
850       }
851     }
852     silc_schedule_task_add(server->schedule, 0,
853                            silc_server_backup_connect_to_router,
854                            context, 10, 0, SILC_TASK_TIMEOUT,
855                            SILC_TASK_PRI_NORMAL);
856     return;
857   }
858
859   /* wait for the connection to be established */
860   silc_schedule_task_add(server->schedule, sock,
861                          silc_server_backup_connection_established,
862                          context, 0, 0, SILC_TASK_FD,
863                          SILC_TASK_PRI_NORMAL);
864   silc_schedule_set_listen_fd(server->schedule, sock,
865                               SILC_TASK_WRITE, FALSE);
866 }
867
868 /* Constantly tries to reconnect to a primary router indicated by the
869    `ip' and `port'. The `connected' callback will be called when the
870    connection is created. */
871
872 void silc_server_backup_reconnect(SilcServer server,
873                                   const char *ip, SilcUInt16 port,
874                                   SilcServerConnectRouterCallback callback,
875                                   void *context)
876 {
877   SilcServerConnection sconn;
878
879   SILC_LOG_INFO(("Attempting to reconnect to primary router"));
880
881   sconn = silc_calloc(1, sizeof(*sconn));
882   sconn->remote_host = strdup(ip);
883   sconn->remote_port = port;
884   sconn->callback = callback;
885   sconn->callback_context = context;
886   sconn->no_reconnect = TRUE;
887   sconn->retry_count = 0;
888   silc_schedule_task_add(server->schedule, 0,
889                          silc_server_backup_connect_to_router,
890                          sconn, 1, 0, SILC_TASK_TIMEOUT,
891                          SILC_TASK_PRI_NORMAL);
892 }
893
894 /* Task that is called after backup router has connected back to
895    primary router and we are starting the resuming protocol */
896
897 SILC_TASK_CALLBACK(silc_server_backup_connected_later)
898 {
899   SilcServerBackupProtocolContext proto_ctx =
900     (SilcServerBackupProtocolContext)context;
901   SilcServer server = proto_ctx->server;
902   SilcSocketConnection sock = proto_ctx->sock;
903
904   /* If running other protocol already run this one a bit later. */
905   if (sock->protocol) {
906     SILC_LOG_DEBUG(("Other protocol is running, wait for it to finish"));
907     silc_schedule_task_add(server->schedule, 0,
908                            silc_server_backup_connected_later,
909                            proto_ctx, 15, 0,
910                            SILC_TASK_TIMEOUT,
911                            SILC_TASK_PRI_NORMAL);
912     return;
913   }
914
915   SILC_LOG_DEBUG(("Starting backup resuming protocol as initiator"));
916   SILC_LOG_INFO(("Starting backup resuming protocol"));
917
918   /* Run the backup resuming protocol */
919   silc_protocol_alloc(SILC_PROTOCOL_SERVER_BACKUP,
920                       &sock->protocol, proto_ctx,
921                       silc_server_protocol_backup_done);
922   silc_protocol_execute(sock->protocol, server->schedule, 0, 0);
923
924   silc_schedule_task_add(server->schedule, sock->sock,
925                          silc_server_backup_timeout,
926                          sock->protocol, 30, 0, SILC_TASK_TIMEOUT,
927                          SILC_TASK_PRI_NORMAL);
928 }
929
930 /* Called when we've established connection back to our primary router
931    when we've acting as backup router and have replaced the primary router
932    in the cell. This function will start the backup resuming protocol. */
933
934 void silc_server_backup_connected(SilcServer server,
935                                   SilcServerEntry server_entry,
936                                   void *context)
937 {
938   SilcServerBackupProtocolContext proto_ctx;
939   SilcSocketConnection sock;
940
941   if (!server_entry) {
942     /* Try again */
943     SilcServerConfigRouter *primary;
944     primary = silc_server_config_get_primary_router(server);
945     if (primary) {
946       if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
947                                            primary->host, primary->port))
948         silc_server_backup_reconnect(server,
949                                      primary->host, primary->port,
950                                      silc_server_backup_connected,
951                                      context);
952     }
953     return;
954   }
955
956   sock = (SilcSocketConnection)server_entry->connection;
957   proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
958   proto_ctx->server = server;
959   proto_ctx->sock = silc_socket_dup(sock);
960   proto_ctx->responder = FALSE;
961   proto_ctx->type = SILC_SERVER_BACKUP_START;
962   proto_ctx->start = time(0);
963
964   /* Start through scheduler */
965   silc_schedule_task_add(server->schedule, 0,
966                          silc_server_backup_connected_later,
967                          proto_ctx, 0, 1,
968                          SILC_TASK_TIMEOUT,
969                          SILC_TASK_PRI_NORMAL);
970 }
971
972 /* Called when normal server has connected to its primary router after
973    backup router has sent the START packet in reusming protocol. We will
974    move the protocol context from the backup router connection to the
975    primary router. */
976
977 static void silc_server_backup_connect_primary(SilcServer server,
978                                                SilcServerEntry server_entry,
979                                                void *context)
980 {
981   SilcSocketConnection backup_router = (SilcSocketConnection)context;
982   SilcServerBackupProtocolContext ctx;
983   SilcSocketConnection sock;
984   SilcIDListData idata;
985   unsigned char data[2];
986
987   if (SILC_IS_DISCONNECTING(backup_router) ||
988       SILC_IS_DISCONNECTED(backup_router)) {
989     silc_socket_free(backup_router);
990     return;
991   }
992
993   if (!server_entry) {
994     /* Try again */
995     SilcServerConfigRouter *primary;
996     primary = silc_server_config_get_primary_router(server);
997     if (primary)
998       if (!silc_server_find_socket_by_host(server, SILC_SOCKET_TYPE_ROUTER,
999                                            primary->host, primary->port))
1000         silc_server_backup_reconnect(server,
1001                                      primary->host, primary->port,
1002                                      silc_server_backup_connect_primary,
1003                                      context);
1004     return;
1005   }
1006
1007   /* Unref */
1008   silc_socket_free(backup_router);
1009
1010   if (!backup_router->protocol)
1011     return;
1012   if (!server_entry->connection)
1013     return;
1014
1015   ctx = (SilcServerBackupProtocolContext)backup_router->protocol->context;
1016   sock = (SilcSocketConnection)server_entry->connection;
1017   idata = (SilcIDListData)server_entry;
1018
1019   SILC_LOG_DEBUG(("Sending CONNECTED packet (session %d)", ctx->session));
1020   SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1021                 ctx->session));
1022
1023   /* Send the CONNECTED packet back to the backup router. */
1024   data[0] = SILC_SERVER_BACKUP_CONNECTED;
1025   data[1] = ctx->session;
1026   silc_server_packet_send(server, backup_router,
1027                           SILC_PACKET_RESUME_ROUTER, 0, data, 2, FALSE);
1028
1029   /* The primary connection is disabled until it sends the RESUMED packet
1030      to us. */
1031   idata->status |= SILC_IDLIST_STATUS_DISABLED;
1032
1033   /* Move this protocol context from this backup router connection to
1034      the primary router connection since it will send the subsequent
1035      packets in this protocol. We don't talk with backup router
1036      anymore. */
1037   sock->protocol = backup_router->protocol;
1038   if (ctx->sock)
1039     silc_socket_free(ctx->sock); /* unref */
1040   ctx->sock = silc_socket_dup(server_entry->connection);
1041   backup_router->protocol = NULL;
1042 }
1043
1044 /* Timeout callback used by the backup router to send the ENDING packet
1045    to primary router to indicate that it can now resume as being primary
1046    router. All CONNECTED packets has been received when we reach this. */
1047
1048 SILC_TASK_CALLBACK(silc_server_backup_send_resumed)
1049 {
1050   SilcProtocol protocol = (SilcProtocol)context;
1051   SilcServerBackupProtocolContext ctx = protocol->context;
1052   SilcServer server = ctx->server;
1053   unsigned char data[2];
1054   int i;
1055
1056   SILC_LOG_DEBUG(("Start"));
1057
1058   for (i = 0; i < ctx->sessions_count; i++)
1059     if (ctx->sessions[i].server_entry == ctx->sock->user_data)
1060       ctx->session = ctx->sessions[i].session;
1061
1062   /* We've received all the CONNECTED packets and now we'll send the
1063      ENDING packet to the new primary router. */
1064   data[0] = SILC_SERVER_BACKUP_ENDING;
1065   data[1] = ctx->session;
1066   silc_server_packet_send(server, ctx->sock, SILC_PACKET_RESUME_ROUTER, 0,
1067                           data, sizeof(data), FALSE);
1068
1069   /* The protocol will go to END state. */
1070   protocol->state = SILC_PROTOCOL_STATE_END;
1071 }
1072
1073 /* Backup resuming protocol. This protocol is executed when the primary
1074    router wants to resume its position as being primary router. */
1075
1076 SILC_TASK_CALLBACK_GLOBAL(silc_server_protocol_backup)
1077 {
1078   SilcProtocol protocol = (SilcProtocol)context;
1079   SilcServerBackupProtocolContext ctx = protocol->context;
1080   SilcServer server = ctx->server;
1081   SilcServerEntry server_entry;
1082   SilcSocketConnection sock = NULL;
1083   unsigned char data[2];
1084   int i;
1085
1086   if (protocol->state == SILC_PROTOCOL_STATE_UNKNOWN)
1087     protocol->state = SILC_PROTOCOL_STATE_START;
1088
1089   switch(protocol->state) {
1090   case SILC_PROTOCOL_STATE_START:
1091     if (ctx->responder == FALSE) {
1092       /*
1093        * Initiator (backup router)
1094        */
1095
1096       /* Send the START packet to primary router and normal servers. The
1097          packet will indicate to the primary router that it has been replaced
1098          by us.  For normal servers it means that we will be resigning as
1099          being primary router shortly. */
1100       for (i = 0; i < server->config->param.connections_max; i++) {
1101         sock = server->sockets[i];
1102         if (!sock || !sock->user_data ||
1103             sock->user_data == server->id_entry ||
1104             (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1105              sock->type != SILC_SOCKET_TYPE_SERVER))
1106           continue;
1107
1108         server_entry = sock->user_data;
1109         if (server_entry->data.status & SILC_IDLIST_STATUS_DISABLED)
1110           continue;
1111
1112         ctx->sessions = silc_realloc(ctx->sessions,
1113                                      sizeof(*ctx->sessions) *
1114                                      (ctx->sessions_count + 1));
1115         ctx->sessions[ctx->sessions_count].session = ctx->sessions_count;
1116         ctx->sessions[ctx->sessions_count].connected = FALSE;
1117         ctx->sessions[ctx->sessions_count].server_entry = server_entry;
1118
1119         SILC_LOG_DEBUG(("Sending START to %s (session %d)",
1120                         server_entry->server_name, ctx->sessions_count));
1121         SILC_LOG_INFO(("Expecting CONNECTED from %s (session %d)",
1122                        server_entry->server_name, ctx->sessions_count));
1123
1124         /* This connection is performing this protocol too now */
1125         sock->protocol = protocol;
1126
1127         data[0] = SILC_SERVER_BACKUP_START;
1128         data[1] = ctx->sessions_count;
1129         silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1130                                 data, sizeof(data), FALSE);
1131         ctx->sessions_count++;
1132       }
1133
1134       /* Announce data to the new primary to be. */
1135       silc_server_announce_servers(server, TRUE, 0, ctx->sock);
1136       silc_server_announce_clients(server, 0, ctx->sock);
1137       silc_server_announce_channels(server, 0, ctx->sock);
1138
1139       protocol->state++;
1140
1141     } else {
1142       /*
1143        * Responder (all servers and routers)
1144        */
1145       SilcServerConfigRouter *primary;
1146
1147       /* We should have received START packet */
1148       if (ctx->type != SILC_SERVER_BACKUP_START) {
1149         SILC_LOG_ERROR(("Bad resume router packet START %d", ctx->type));
1150         break;
1151       }
1152
1153       /* Connect to the primary router that was down that is now supposed
1154          to be back online. We send the CONNECTED packet after we've
1155          established the connection to the primary router. */
1156       primary = silc_server_config_get_primary_router(server);
1157       if (primary && server->backup_primary &&
1158           !silc_server_num_sockets_by_remote(server,
1159                                              silc_net_is_ip(primary->host) ?
1160                                              primary->host : NULL,
1161                                              silc_net_is_ip(primary->host) ?
1162                                              NULL : primary->host,
1163                                              primary->port,
1164                                              SILC_SOCKET_TYPE_ROUTER)) {
1165         SILC_LOG_DEBUG(("Received START (session %d), reconnect to router",
1166                         ctx->session));
1167         silc_server_backup_reconnect(server,
1168                                      primary->host, primary->port,
1169                                      silc_server_backup_connect_primary,
1170                                      silc_socket_dup(ctx->sock));
1171       } else {
1172         /* Nowhere to connect just return the CONNECTED packet */
1173         SILC_LOG_DEBUG(("Received START (session %d), send CONNECTED back",
1174                         ctx->session));
1175         SILC_LOG_INFO(("Sending CONNECTED (session %d) to backup router",
1176                       ctx->session));
1177
1178         /* Send the CONNECTED packet back to the backup router. */
1179         data[0] = SILC_SERVER_BACKUP_CONNECTED;
1180         data[1] = ctx->session;
1181         silc_server_packet_send(server, ctx->sock,
1182                                 SILC_PACKET_RESUME_ROUTER, 0,
1183                                 data, sizeof(data), FALSE);
1184       }
1185
1186       /* Add this resuming session */
1187       ctx->sessions = silc_realloc(ctx->sessions,
1188                                    sizeof(*ctx->sessions) *
1189                                    (ctx->sessions_count + 1));
1190       ctx->sessions[ctx->sessions_count].session = ctx->session;
1191       ctx->sessions_count++;
1192
1193       /* Normal server goes directly to the END state. */
1194       if (server->server_type == SILC_ROUTER &&
1195           (!server->router ||
1196            server->router->data.status & SILC_IDLIST_STATUS_DISABLED))
1197         protocol->state++;
1198       else
1199         protocol->state = SILC_PROTOCOL_STATE_END;
1200     }
1201     break;
1202
1203   case 2:
1204     if (ctx->responder == FALSE) {
1205       /*
1206        * Initiator (backup router)
1207        */
1208
1209       /* We should have received CONNECTED packet */
1210       if (ctx->type != SILC_SERVER_BACKUP_CONNECTED) {
1211         SILC_LOG_ERROR(("Bad resume router packet CONNECTED %d", ctx->type));
1212         break;
1213       }
1214
1215       for (i = 0; i < ctx->sessions_count; i++) {
1216         if (ctx->sessions[i].session == ctx->session) {
1217           ctx->sessions[i].connected = TRUE;
1218           SILC_LOG_INFO(("Received CONNECTED from %s (session %d)",
1219                          ctx->sessions[i].server_entry->server_name,
1220                          ctx->session));
1221           SILC_LOG_DEBUG(("Received CONNECTED (session %d)", ctx->session));
1222           break;
1223         }
1224       }
1225
1226       /* See if all returned CONNECTED, if not, then continue waiting. */
1227       for (i = 0; i < ctx->sessions_count; i++) {
1228         if (!ctx->sessions[i].connected)
1229           return;
1230       }
1231
1232       SILC_LOG_INFO(("All sessions have returned CONNECTED packets, "
1233                      "continuing"));
1234       SILC_LOG_DEBUG(("Sending ENDING packet to primary router"));
1235
1236       /* The ENDING is sent with timeout, and then we continue to the
1237          END state in the protocol. */
1238       silc_schedule_task_add(server->schedule, 0,
1239                              silc_server_backup_send_resumed,
1240                              protocol, 1, 0, SILC_TASK_TIMEOUT,
1241                              SILC_TASK_PRI_NORMAL);
1242       return;
1243
1244     } else {
1245       /*
1246        * Responder (primary router)
1247        */
1248
1249       /* We should have been received ENDING packet */
1250       if (ctx->type != SILC_SERVER_BACKUP_ENDING) {
1251         SILC_LOG_ERROR(("Bad resume router packet ENDING %d", ctx->type));
1252         break;
1253       }
1254
1255       SILC_LOG_DEBUG(("Received ENDING packet, we are going to resume now"));
1256
1257       /* Switch announced informations to our primary router of using the
1258          backup router. */
1259       silc_server_local_servers_toggle_enabled(server, TRUE);
1260       silc_server_update_servers_by_server(server, ctx->sock->user_data,
1261                                            server->router);
1262       silc_server_update_clients_by_server(server, ctx->sock->user_data,
1263                                            server->router, TRUE);
1264
1265       /* We as primary router now must send RESUMED packets to all servers
1266          and routers so that they know we are back.   For backup router we
1267          send the packet last so that we give the backup as much time as
1268          possible to deal with message routing at this critical moment. */
1269       for (i = 0; i < server->config->param.connections_max; i++) {
1270         sock = server->sockets[i];
1271         if (!sock || !sock->user_data ||
1272             sock->user_data == server->id_entry ||
1273             (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1274              sock->type != SILC_SOCKET_TYPE_SERVER))
1275           continue;
1276
1277         /* Send to backup last */
1278         if (sock == ctx->sock)
1279           continue;
1280
1281       send_to_backup:
1282         server_entry = sock->user_data;
1283         server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1284
1285         SILC_LOG_DEBUG(("Sending RESUMED to %s", server_entry->server_name));
1286         SILC_LOG_INFO(("Sending RESUMED to %s", server_entry->server_name));
1287
1288         /* This connection is performing this protocol too now */
1289         sock->protocol = protocol;
1290
1291         data[0] = SILC_SERVER_BACKUP_RESUMED;
1292         data[1] = 0;
1293         silc_server_packet_send(server, sock, SILC_PACKET_RESUME_ROUTER, 0,
1294                                 data, sizeof(data), FALSE);
1295         silc_server_packet_queue_purge(server, sock);
1296       }
1297
1298       /* Now send the same packet to backup */
1299       if (sock != ctx->sock) {
1300         sleep(1);
1301         sock = ctx->sock;
1302         goto send_to_backup;
1303       }
1304
1305       /* We are now resumed and are back as primary router in the cell. */
1306       SILC_LOG_INFO(("We are now the primary router of our cell again"));
1307       server->wait_backup = FALSE;
1308
1309       /* Announce WATCH list a little later */
1310       silc_schedule_task_add(server->schedule, 0,
1311                              silc_server_backup_announce_watches,
1312                              silc_socket_dup(ctx->sock), 4, 0,
1313                              SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
1314
1315       /* For us this is the end of this protocol. */
1316       if (protocol->final_callback)
1317         silc_protocol_execute_final(protocol, server->schedule);
1318       else
1319         silc_protocol_free(protocol);
1320     }
1321     break;
1322
1323   case SILC_PROTOCOL_STATE_END:
1324     {
1325       /*
1326        * Responder (backup router, servers, and remote router)
1327        */
1328       SilcServerEntry router, backup_router;
1329
1330       /* We should have been received RESUMED from our primary router. */
1331       if (ctx->type != SILC_SERVER_BACKUP_RESUMED) {
1332         SILC_LOG_ERROR(("Bad resume router packet RESUMED %d", ctx->type));
1333         break;
1334       }
1335
1336       SILC_LOG_INFO(("Received RESUMED from new primary router"));
1337
1338       /* If we are the backup router, mark that we are no longer primary
1339          but are back to backup router status. */
1340       if (server->backup_router)
1341         server->server_type = SILC_BACKUP_ROUTER;
1342
1343       /* We have now new primary router. All traffic goes there from now on. */
1344       router = ctx->sock->user_data;
1345       if (silc_server_backup_replaced_get(server, router->id,
1346                                           &backup_router)) {
1347
1348         if (backup_router == server->router) {
1349           /* We have new primary router now */
1350           server->id_entry->router = router;
1351           server->router = router;
1352           SILC_LOG_INFO(("Switching back to primary router %s",
1353                          server->router->server_name));
1354         } else {
1355           /* We are connected to new primary and now continue using it */
1356           SILC_LOG_INFO(("Resuming the use of primary router %s",
1357                          router->server_name));
1358         }
1359         server->backup_primary = FALSE;
1360         sock = router->connection;
1361
1362         /* Update the client entries of the backup router to the new
1363            router */
1364         silc_server_local_servers_toggle_enabled(server, FALSE);
1365         router->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1366         silc_server_update_servers_by_server(server, backup_router, router);
1367         silc_server_update_clients_by_server(
1368                                    server, NULL, router,
1369                                    server->server_type == SILC_BACKUP_ROUTER);
1370         if (server->server_type == SILC_SERVER)
1371           silc_server_update_channels_by_server(server, backup_router, router);
1372         silc_server_backup_replaced_del(server, backup_router);
1373       }
1374
1375       /* Send notify about primary router going down to local operators */
1376       SILC_SERVER_SEND_OPERS(server, FALSE, TRUE,
1377                              SILC_NOTIFY_TYPE_NONE,
1378                              ("%s resumed the use of primary router %s",
1379                               server->server_name,
1380                               server->router->server_name));
1381
1382       /* Protocol has ended, call the final callback */
1383       if (protocol->final_callback)
1384         silc_protocol_execute_final(protocol, server->schedule);
1385       else
1386         silc_protocol_free(protocol);
1387     }
1388     break;
1389
1390   case SILC_PROTOCOL_STATE_ERROR:
1391     /* Protocol has ended, call the final callback */
1392     if (protocol->final_callback)
1393       silc_protocol_execute_final(protocol, server->schedule);
1394     else
1395       silc_protocol_free(protocol);
1396     break;
1397
1398   case SILC_PROTOCOL_STATE_FAILURE:
1399     /* Protocol has ended, call the final callback */
1400     SILC_LOG_ERROR(("Error during backup resume: received Failure"));
1401     ctx->received_failure = TRUE;
1402     if (protocol->final_callback)
1403       silc_protocol_execute_final(protocol, server->schedule);
1404     else
1405       silc_protocol_free(protocol);
1406     break;
1407
1408   case SILC_PROTOCOL_STATE_UNKNOWN:
1409     break;
1410   }
1411 }
1412
1413 /* Final resuming protocol completion callback */
1414
1415 SILC_TASK_CALLBACK(silc_server_protocol_backup_done)
1416 {
1417   SilcProtocol protocol = (SilcProtocol)context;
1418   SilcServerBackupProtocolContext ctx = protocol->context;
1419   SilcServer server = ctx->server;
1420   SilcServerEntry server_entry;
1421   SilcSocketConnection sock;
1422   bool error;
1423   int i;
1424
1425   silc_schedule_task_del_by_context(server->schedule, protocol);
1426
1427   error = (protocol->state == SILC_PROTOCOL_STATE_ERROR ||
1428            protocol->state == SILC_PROTOCOL_STATE_FAILURE);
1429
1430   if (error) {
1431     SILC_LOG_ERROR(("Error occurred during backup router resuming protcool"));
1432     if (server->server_type == SILC_SERVER)
1433       silc_schedule_task_del_by_callback(server->schedule,
1434                                          silc_server_backup_connect_to_router);
1435   }
1436
1437   if (server->server_shutdown)
1438     return;
1439
1440   /* Remove this protocol from all server entries that has it */
1441   for (i = 0; i < server->config->param.connections_max; i++) {
1442     sock = server->sockets[i];
1443     if (!sock || !sock->user_data ||
1444         (sock->type != SILC_SOCKET_TYPE_ROUTER &&
1445          sock->type != SILC_SOCKET_TYPE_SERVER))
1446       continue;
1447
1448     server_entry = sock->user_data;
1449
1450     /* The SilcProtocol context was shared between all connections, clear
1451        it from all connections. */
1452     if (sock->protocol == protocol) {
1453       silc_server_packet_queue_purge(server, sock);
1454       sock->protocol = NULL;
1455
1456       if (error) {
1457
1458         if (server->server_type == SILC_SERVER &&
1459             server_entry->server_type == SILC_ROUTER)
1460           continue;
1461
1462         /* Backup router */
1463         if (SILC_PRIMARY_ROUTE(server) == sock && server->backup_router) {
1464           if (ctx->sock == sock) {
1465             silc_socket_free(sock); /* unref */
1466             ctx->sock = NULL;
1467           }
1468
1469           /* If failed after 10 attempts, it won't work, give up */
1470           if (ctx->initiator_restart > 10)
1471             ctx->received_failure = TRUE;
1472
1473           if (!ctx->received_failure) {
1474             /* Protocol error, probably timeout. Just restart the protocol. */
1475             SilcServerBackupProtocolContext proto_ctx;
1476
1477             /* Restart the protocol. */
1478             proto_ctx = silc_calloc(1, sizeof(*proto_ctx));
1479             proto_ctx->server = server;
1480             proto_ctx->sock = silc_socket_dup(sock);
1481             proto_ctx->responder = FALSE;
1482             proto_ctx->type = SILC_SERVER_BACKUP_START;
1483             proto_ctx->start = time(0);
1484             proto_ctx->initiator_restart = ctx->initiator_restart + 1;
1485
1486             /* Start through scheduler */
1487             silc_schedule_task_add(server->schedule, 0,
1488                                    silc_server_backup_connected_later,
1489                                    proto_ctx, 5, 0,
1490                                    SILC_TASK_TIMEOUT,
1491                                    SILC_TASK_PRI_NORMAL);
1492           } else {
1493             /* If failure was received, switch back to normal backup router.
1494                For some reason primary wouldn't accept that we were supposed
1495                to perfom resuming protocol. */
1496             server->server_type = SILC_BACKUP_ROUTER;
1497             silc_server_local_servers_toggle_enabled(server, FALSE);
1498             server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1499             silc_server_update_servers_by_server(server, server->id_entry,
1500                                                  sock->user_data);
1501             silc_server_update_clients_by_server(server, NULL,
1502                                                  sock->user_data, TRUE);
1503
1504             /* Announce our clients and channels to the router */
1505             silc_server_announce_clients(server, 0, sock);
1506             silc_server_announce_channels(server, 0, sock);
1507
1508             /* Announce WATCH list a little later */
1509             silc_schedule_task_add(server->schedule, 0,
1510                                    silc_server_backup_announce_watches,
1511                                    silc_socket_dup(sock), 5, 0,
1512                                    SILC_TASK_TIMEOUT,
1513                                    SILC_TASK_PRI_NORMAL);
1514           }
1515
1516           continue;
1517         }
1518       }
1519
1520       server_entry->data.status &= ~SILC_IDLIST_STATUS_DISABLED;
1521     }
1522   }
1523
1524   if (!error) {
1525     SILC_LOG_INFO(("Backup resuming protocol ended successfully"));
1526
1527     if (ctx->type == SILC_SERVER_BACKUP_RESUMED && server->router) {
1528       /* Announce all of our information to the router. */
1529       if (server->server_type == SILC_ROUTER)
1530         silc_server_announce_servers(server, FALSE, 0,
1531                                      server->router->connection);
1532
1533       /* Announce our clients and channels to the router */
1534       silc_server_announce_clients(server, 0, server->router->connection);
1535       silc_server_announce_channels(server, 0, server->router->connection);
1536
1537       /* Announce WATCH list a little later */
1538       silc_schedule_task_add(server->schedule, 0,
1539                              silc_server_backup_announce_watches,
1540                              silc_socket_dup(server->router->connection), 4, 0,
1541                              SILC_TASK_TIMEOUT, SILC_TASK_PRI_NORMAL);
1542     }
1543   } else {
1544     /* Error */
1545
1546     if (server->server_type == SILC_SERVER) {
1547       /* If we are still using backup router Send confirmation to backup
1548          that using it is still ok and continue sending traffic there.
1549          The backup will reply with error if it's not ok. */
1550       if (server->router && server->backup_primary) {
1551         /* Send START_USE just in case using backup wouldn't be ok. */
1552         silc_server_backup_send_start_use(server, server->router->connection,
1553                                           FALSE);
1554
1555         /* Check couple of times same START_USE just in case. */
1556         silc_schedule_task_add(server->schedule, 0,
1557                                silc_server_backup_check_status,
1558                                silc_socket_dup(server->router->connection),
1559                                5, 1, SILC_TASK_TIMEOUT,
1560                                SILC_TASK_PRI_NORMAL);
1561         silc_schedule_task_add(server->schedule, 0,
1562                                silc_server_backup_check_status,
1563                                silc_socket_dup(server->router->connection),
1564                                20, 1, SILC_TASK_TIMEOUT,
1565                                SILC_TASK_PRI_NORMAL);
1566         silc_schedule_task_add(server->schedule, 0,
1567                                silc_server_backup_check_status,
1568                                silc_socket_dup(server->router->connection),
1569                                60, 1, SILC_TASK_TIMEOUT,
1570                                SILC_TASK_PRI_NORMAL);
1571       }
1572     }
1573   }
1574
1575   if (ctx->sock && ctx->sock->protocol)
1576     ctx->sock->protocol = NULL;
1577   if (ctx->sock)
1578     silc_socket_free(ctx->sock); /* unref */
1579   silc_protocol_free(protocol);
1580   silc_free(ctx->sessions);
1581   silc_free(ctx);
1582 }
1583
1584 SILC_TASK_CALLBACK(silc_server_backup_announce_watches)
1585 {
1586   SilcSocketConnection sock = context;
1587   SilcServer server = app_context;
1588   if (sock->users > 1)
1589     silc_server_announce_watches(server, sock);
1590   silc_socket_free(sock);
1591 }