X-Git-Url: http://git.silcnet.org/gitweb/?p=crypto.git;a=blobdiff_plain;f=lib%2Fsilcacc%2Fsoftacc_cipher.c;fp=lib%2Fsilcacc%2Fsoftacc_cipher.c;h=a8473f0cd4f5c75abe74e4bd34865a7fbce5eabd;hp=0000000000000000000000000000000000000000;hb=80dc2a39c614ea1376a2e19ebe2460af11c9afee;hpb=9f20f0382b6229eca740925a73f96294f6dcedc6 diff --git a/lib/silcacc/softacc_cipher.c b/lib/silcacc/softacc_cipher.c new file mode 100644 index 00000000..a8473f0c --- /dev/null +++ b/lib/silcacc/softacc_cipher.c @@ -0,0 +1,817 @@ +/* + + softacc_cipher.c + + Author: Pekka Riikonen + + Copyright (C) 2008 Pekka Riikonen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + +*/ + +/* #define SILC_SOFTACC_DEBUG_ON 1 */ + +#include "silccrypto.h" +#include "softacc.h" +#include "softacc_i.h" +#include "aes_internal.h" + +/* Version 1.0 */ + +/* Cipher accelerator accelerates ciphers using counter mode by precomputing + the CTR key stream in threads. Encryption and decryption uses the + precomputed key stream and gets significant speed improvement in the + process. The threads are reserved from the thread pool and they remain + reserved as long as the cipher is accelerated. + + As a queue we use SilcThreadQueue from SRT which handles locking and + waiting automatically and supports multiple pipes for multiple key + streams, so it makes this whole thing very simple. + + This can accelerate any cipher but AES is especially optimized. + + Problems: + + To get the absolutely maximum performance out one must assign lots of + RAM to softacc. + +*/ + +/* + Benchmarks (version 1.0): + + 4-core: 2 x dual-core Xeon 5160 3GHz (Woodcrest), 4 GB RAM + ----------------------------------------------------------------------- + cipher_threads = 4, cipher_blocks = 65536, cipher_streams = 32: + aes-128-ctr: 728042.34 KB 710.98 MB 5687.83 Mbit / sec + aes-192-ctr: 634662.85 KB 619.79 MB 4958.30 Mbit / sec + aes-256-ctr: 555215.22 KB 542.20 MB 4337.62 Mbit / sec + + default settings, cipher_threads = 4: + aes-128-ctr: 625568.94 KB 610.91 MB 4887.26 Mbit / sec + aes-192-ctr: 572719.08 KB 559.30 MB 4474.37 Mbit / sec + aes-256-ctr: 506930.88 KB 495.05 MB 3960.40 Mbit / sec + + 8-core: 2 x quad-core Xeon E5345 2.33GHz (Clovertown), 4 GB RAM + ----------------------------------------------------------------------- + cipher_threads = 8, cipher_blocks = 65536, cipher_streams = 64: + aes-128-ctr: 1162373.93 KB 1135.13 MB 9081.05 Mbit / sec + aes-192-ctr: 994808.64 KB 971.49 MB 7771.94 Mbit / sec + aes-256-ctr: 874370.93 KB 853.88 MB 6831.02 Mbit / sec + + default settings, cipher_threads = 8: + aes-128-ctr: 805157.74 KB 786.29 MB 6290.29 Mbit / sec + aes-192-ctr: 733164.28 KB 715.98 MB 5727.85 Mbit / sec + aes-256-ctr: 664677.98 KB 649.10 MB 5192.80 Mbit / sec + + Test setup: + - Linux 2.6.20 x86-64 + - GCC 4.1.2 + - Yasm 0.5.0.1591 + - nice -n -20 lib/silcacc/tests/test_softacc_cipher + +*/ + +/************************** Types and definitions ***************************/ + +/* Software accelerator cipher operations */ +const SilcCipherObject softacc_cipher[] = +{ + /* AES */ + { + "aes", "aes", + silc_softacc_cipher_aes_set_key, + silc_softacc_cipher_aes_set_iv, + silc_softacc_cipher_aes_encrypt, + silc_softacc_cipher_aes_encrypt, + silc_softacc_cipher_init, + silc_softacc_cipher_uninit, + 0, 0, 0, + SILC_CIPHER_MODE_CTR, /* Only CTR mode can be accelerated */ + }, + + /* All other ciphers */ + { + "any", "any", + silc_softacc_cipher_set_key, + silc_softacc_cipher_set_iv, + silc_softacc_cipher_encrypt, + silc_softacc_cipher_encrypt, + silc_softacc_cipher_init, + silc_softacc_cipher_uninit, + 0, 0, 0, + SILC_CIPHER_MODE_CTR, /* Only CTR mode can be accelerated */ + }, + + { + NULL, NULL, NULL, NULL, NULL, + NULL, NULL, 0, 0, 0, 0, + } +}; + +/* Block size */ +#define SILC_KEYSTREAM_BLOCK SILC_CIPHER_MAX_IV_SIZE + +/* Thread stop signal */ +#define SILC_KEYSTREAM_STOP (void *)0x01 + +/* Key stream context */ +typedef struct { + SilcUInt32 key_index; /* Key index in queue */ + unsigned char ctr[SILC_CIPHER_MAX_IV_SIZE]; /* Counter */ + unsigned char key[0]; /* Key stream begins here */ +} *SilcSoftaccCipherKeyStream; + +/* Accelerator cipher context */ +typedef struct SilcSoftaccCipherStruct { + union { + AesContext aes; /* AES */ + SilcCipher ecb; /* Other ciphers in ECB mode */ + } c; + + SilcThreadQueue queue; /* Key stream queue */ + unsigned char iv[SILC_CIPHER_MAX_IV_SIZE]; /* Current counter */ + SilcSoftaccCipherKeyStream *key_stream; /* Key streams */ + SilcSoftaccCipherKeyStream cur; /* Current key stream */ + SilcUInt32 cur_block; /* Current block in key stream */ + SilcUInt32 cur_index; /* Current key stream index */ + SilcUInt32 pad; /* Partial block offset */ + SilcUInt32 num_key_stream; /* Number of key streams */ + SilcUInt32 cipher_blocks; /* Number of cipher blocks */ + unsigned int cipher_threads : 31; /* Number of cipher threads */ + unsigned int key_set : 1; /* Set when key is set */ +} *SilcSoftaccCipher; + +/************************** Static utility functions ************************/ + +/* Add value to MSB ordered counter. */ + +static inline +void silc_softacc_add_ctr(unsigned char *ctr, SilcUInt32 block_len, + SilcUInt32 val) +{ + SilcUInt16 q = 0; + int i; + + if (!val) + return; + + for (i = block_len - 1; i >= 0; i--) { + q += ctr[i] + (val & 0xff); + ctr[i] = (q & 0xff); + val >>= 8; + q >>= 8; + if (!val && !q) + return; + } +} + +/*********************************** AES ************************************/ + +#define SILC_AES_BLOCK 16 + +/* Thread destructor */ + +static SILC_TASK_CALLBACK(silc_softacc_cipher_aes_completion) +{ + SilcSoftaccCipher c = context; + int i; + + /* Disconnect from key stream queue */ + if (silc_thread_queue_disconnect(c->queue)) + return; + + for (i = 0; i < c->num_key_stream; i++) + silc_free(c->key_stream[i]); + silc_free(c->key_stream); + memset(c, 0, sizeof(*c)); + silc_free(c); +} + +/* Key stream computation thread */ + +void silc_softacc_cipher_aes_thread(SilcSchedule schedule, void *context) +{ + SilcSoftaccCipher c = context; + SilcThreadQueue queue = c->queue; + SilcSoftaccCipherKeyStream key; + SilcUInt32 i, num_key_stream = c->num_key_stream; + SilcUInt32 cipher_blocks = c->cipher_blocks; + SilcInt32 k; + unsigned char *enc_ctr; + + SILC_SOFTACC_DEBUG(("Start CTR precomputation thread")); + + /* Connect to the key stream queue */ + silc_thread_queue_connect(queue); + + /* Process key streams. We wait for empty key streams to come from the + last pipe in the queue. Here we precompute the key stream and put them + back to the queue. */ + while (1) { + key = silc_thread_queue_pop(queue, num_key_stream, TRUE); + if (key == SILC_KEYSTREAM_STOP) + break; + + SILC_SOFTACC_DEBUG(("Precompute key stream %p, index %d", key, + key->key_index)); + + /* Encrypt */ + enc_ctr = key->key; + for (i = 0; i < cipher_blocks; i++) { + for (k = SILC_AES_BLOCK - 1; k >= 0; k--) + if (++key->ctr[k]) + break; + aes_encrypt(key->ctr, enc_ctr, &c->c.aes.u.enc); + enc_ctr += SILC_AES_BLOCK; + } + + SILC_SOFTACC_DEBUG(("Precomputed key stream %p, index %d", key, + key->key_index)); + + /* Update counter */ + silc_softacc_add_ctr(key->ctr, SILC_AES_BLOCK, + (num_key_stream - 1) * cipher_blocks); + + /* Put it back to queue */ + silc_thread_queue_push(queue, key->key_index, key, FALSE); + } + + SILC_SOFTACC_DEBUG(("End CTR precomputation thread")); +} + +/* Set IV. Also, reset current block, discarding any remaining unused bits in + the current key block. */ + +SILC_CIPHER_API_SET_IV(softacc_cipher_aes) +{ + SilcSoftaccCipher c = context; + SilcSoftaccCipherKeyStream key; + SilcUInt32 i; + + /* If IV is NULL we start new block */ + if (!iv) { + SILC_SOFTACC_DEBUG(("Start new block")); + + if (c->pad < SILC_AES_BLOCK) { + c->pad = SILC_AES_BLOCK; + + /* Start new block */ + if (++c->cur_block == c->cipher_blocks) { + SILC_SOFTACC_DEBUG(("Push empty key stream %p index %d back to queue", + c->cur, c->cur->key_index)); + silc_thread_queue_push(c->queue, c->num_key_stream, c->cur, FALSE); + c->cur_index = (c->cur_index + 1) % c->cipher_blocks; + c->cur_block = 0; + c->cur = NULL; + } + } + } else { + /* Start new IV */ + SILC_SOFTACC_DEBUG(("Start new counter")); + + memcpy(c->iv, iv, SILC_AES_BLOCK); + + if (!c->key_set) + return; + + /* Push current key stream back to queue. We need all of them there + below. */ + if (c->cur) + silc_thread_queue_push(c->queue, c->cur->key_index, c->cur, FALSE); + + /* We must get all key streams and update them */ + for (i = 0; i < c->num_key_stream; i++) { + key = silc_thread_queue_pop(c->queue, i, TRUE); + memcpy(key->ctr, c->iv, SILC_AES_BLOCK); + silc_softacc_add_ctr(key->ctr, SILC_AES_BLOCK, i * c->cipher_blocks); + silc_thread_queue_push(c->queue, c->num_key_stream, key, FALSE); + } + + c->cur = NULL; + c->cur_index = 0; + c->cur_block = 0; + c->pad = SILC_AES_BLOCK; + } +} + +/* Accelerate cipher */ + +SILC_CIPHER_API_SET_KEY(softacc_cipher_aes) +{ + SilcSoftaccCipher c = context; + SilcSoftacc sa; + SilcUInt32 i; + + /* If key is present set it. If it is NULL this is initialization call. */ + if (key) { + SILC_SOFTACC_DEBUG(("Set key for accelerator %s %p", ops->alg_name, c)); + + aes_encrypt_key(key, keylen, &c->c.aes.u.enc); + c->key_set = TRUE; + + /* Set the counters for each key stream and push them to the queue for + precompuptation. */ + for (i = 0; i < c->num_key_stream; i++) { + memcpy(c->key_stream[i]->ctr, c->iv, SILC_AES_BLOCK); + silc_softacc_add_ctr(c->key_stream[i]->ctr, SILC_AES_BLOCK, + i * c->cipher_blocks); + silc_thread_queue_push(c->queue, c->num_key_stream, c->key_stream[i], + FALSE); + } + + return TRUE; + } + + /* Initialize the accelerator for this cipher */ + SILC_LOG_DEBUG(("Initialize accelerator for %s %p", ops->alg_name, c)); + + sa = silc_global_get_var("softacc", FALSE); + if (!sa) { + SILC_LOG_ERROR(("Software accelerator not initialized")); + return FALSE; + } + + /* Start the queue with sa->cipher_blocks many key streams. One extra pipe + in the queue is used as a return pipe for empty key streams. */ + c->cipher_blocks = sa->cipher_blocks; + c->cipher_threads = sa->cipher_threads; + c->num_key_stream = sa->cipher_streams; + c->key_stream = silc_calloc(c->num_key_stream, sizeof(*c->key_stream)); + if (!c->key_stream) + return FALSE; + for (i = 0; i < c->num_key_stream; i++) { + c->key_stream[i] = silc_malloc(sizeof(**c->key_stream) + + (c->cipher_blocks * SILC_AES_BLOCK)); + if (!c->key_stream[i]) + return FALSE; + c->key_stream[i]->key_index = i; + } + c->queue = silc_thread_queue_alloc(c->num_key_stream + 1, TRUE); + if (!c->queue) + return FALSE; + + /* Start the threads. If thread starting fails, we can't accelerate the + cipher. The uninit operation will clean up any started threads. */ + for (i = 0; i < sa->cipher_threads; i++) + if (!silc_thread_pool_run(sa->tp, FALSE, NULL, + silc_softacc_cipher_aes_thread, + c, silc_softacc_cipher_aes_completion, c)) + return FALSE; + + return TRUE; +} + +/* Accelerated encryption/decryption in CTR mode */ + +SILC_CIPHER_API_ENCRYPT(softacc_cipher_aes) +{ + SilcSoftaccCipher c = context; + SilcSoftaccCipherKeyStream key; + SilcUInt32 pad = c->pad, block = c->cur_block; + SilcUInt32 blocks, cipher_blocks = c->cipher_blocks; + unsigned char *enc_ctr; + + key = c->cur; + if (!key) { + c->cur = key = silc_thread_queue_pop(c->queue, c->cur_index, TRUE); + SILC_SOFTACC_DEBUG(("Got key stream %p, index %d", key, key->key_index)); + } + + enc_ctr = key->key + (block << 4); + + /* Compute partial block */ + if (pad < SILC_AES_BLOCK) { + while (len-- > 0) { + *dst++ = *src++ ^ enc_ctr[pad++]; + if (pad == SILC_AES_BLOCK) { + enc_ctr += SILC_AES_BLOCK; + if (++block == cipher_blocks) { + /* Push the used up key stream back to the queue */ + SILC_SOFTACC_DEBUG(("Push empty key stream %p index %d back to queue", + key, key->key_index)); + silc_thread_queue_push(c->queue, c->num_key_stream, key, FALSE); + + /* Get new key stream from queue */ + c->cur_index = (c->cur_index + 1) % c->num_key_stream; + c->cur = key = silc_thread_queue_pop(c->queue, c->cur_index, TRUE); + SILC_SOFTACC_DEBUG(("Got key stream %p, index %d", key, + key->key_index)); + enc_ctr = key->key; + block = 0; + } + break; + } + } + } + + /* Compute full blocks */ + blocks = len >> 4; + len -= (blocks << 4); + while (blocks--) { + /* CTR mode */ +#ifndef WORDS_BIGENDIAN + *(SilcUInt64 *)dst = (*(SilcUInt64 *)src ^ + *(SilcUInt64 *)enc_ctr); + *(SilcUInt64 *)(dst + 8) = (*(SilcUInt64 *)(src + 8) ^ + *(SilcUInt64 *)(enc_ctr + 8)); +#else + SilcUInt64 dst_tmp, src_tmp, enc_ctr_tmp; + + SILC_GET64_MSB(src_tmp, src); + SILC_GET64_MSB(enc_ctr_tmp, enc_ctr); + dst_tmp = src_tmp ^ enc_ctr_tmp; + SILC_PUT64_MSB(dst_tmp, dst); + + SILC_GET64_MSB(src_tmp, src + 8); + SILC_GET64_MSB(enc_ctr_tmp, enc_ctr + 8); + dst_tmp = src_tmp ^ enc_ctr_tmp; + SILC_PUT64_MSB(dst_tmp, dst + 8); +#endif /* !WORDS_BIGENDIAN */ + + src += SILC_AES_BLOCK; + dst += SILC_AES_BLOCK; + enc_ctr += SILC_AES_BLOCK; + + if (++block == cipher_blocks) { + /* Push the used up key stream back to the queue */ + SILC_SOFTACC_DEBUG(("Push empty key stream %p index %d back to queue", + key, key->key_index)); + silc_thread_queue_push(c->queue, c->num_key_stream, key, FALSE); + + /* Get new key stream from queue */ + c->cur_index = (c->cur_index + 1) % c->num_key_stream; + c->cur = key = silc_thread_queue_pop(c->queue, c->cur_index, TRUE); + SILC_SOFTACC_DEBUG(("Got key stream %p, index %d", key, key->key_index)); + enc_ctr = key->key; + block = 0; + } + } + + /* Compute partial block */ + if (len > 0) { + pad = 0; + while (len-- > 0) + *dst++ = *src++ ^ enc_ctr[pad++]; + } + + c->cur_block = block; + c->pad = pad; + + return TRUE; +} + +/****************************** Other ciphers *******************************/ + +/* Thread destructor */ + +static SILC_TASK_CALLBACK(silc_softacc_cipher_completion) +{ + SilcSoftaccCipher c = context; + int i; + + /* Disconnect from key stream queue */ + if (silc_thread_queue_disconnect(c->queue)) + return; + + silc_cipher_free(c->c.ecb); + for (i = 0; i < c->num_key_stream; i++) + silc_free(c->key_stream[i]); + silc_free(c->key_stream); + memset(c, 0, sizeof(*c)); + silc_free(c); +} + +/* Key stream computation thread */ + +void silc_softacc_cipher_thread(SilcSchedule schedule, void *context) +{ + SilcSoftaccCipher c = context; + SilcThreadQueue queue = c->queue; + SilcSoftaccCipherKeyStream key = NULL; + SilcUInt32 i, block_len, num_key_stream = c->num_key_stream; + SilcUInt32 cipher_blocks = c->cipher_blocks; + SilcInt32 k; + unsigned char *enc_ctr; + + SILC_SOFTACC_DEBUG(("Start CTR precomputation thread")); + + block_len = silc_cipher_get_block_len(c->c.ecb); + + /* Connect to the key stream queue */ + silc_thread_queue_connect(queue); + + /* Process key streams. We wait for empty key streams to come from the + last pipe in the queue. Here we precompute the key stream and put them + back to the queue. */ + while (1) { + key = silc_thread_queue_pop(queue, num_key_stream, TRUE); + if (key == SILC_KEYSTREAM_STOP) + break; + + SILC_SOFTACC_DEBUG(("Precompute key stream %p, index %d", key, + key->key_index)); + + /* Encrypt */ + enc_ctr = key->key; + for (i = 0; i < cipher_blocks; i++) { + for (k = block_len - 1; k >= 0; k--) + if (++key->ctr[k]) + break; + c->c.ecb->cipher->encrypt(c->c.ecb, c->c.ecb->cipher, c->c.ecb->context, + key->ctr, enc_ctr, block_len, NULL); + enc_ctr += block_len; + } + + SILC_SOFTACC_DEBUG(("Precomputed key stream %p, index %d", key, + key->key_index)); + + /* Update counter */ + silc_softacc_add_ctr(key->ctr, block_len, + (num_key_stream - 1) * cipher_blocks); + + /* Put it back to queue */ + silc_thread_queue_push(queue, key->key_index, key, FALSE); + } + + SILC_SOFTACC_DEBUG(("End CTR precomputation thread")); +} + +/* Accelerate cipher */ + +SILC_CIPHER_API_SET_KEY(softacc_cipher) +{ + SilcSoftaccCipher c = context; + SilcSoftacc sa; + SilcUInt32 i; + + /* If key is present set it. If it is NULL this is initialization call. */ + if (key) { + SILC_SOFTACC_DEBUG(("Set key for accelerator %s %p", ops->alg_name, c)); + + SILC_VERIFY(c->c.ecb && c->queue); + + if (!silc_cipher_set_key(c->c.ecb, key, keylen, TRUE)) + return FALSE; + c->key_set = TRUE; + + /* Set the counters for each key stream and push them to the queue for + precompuptation. */ + for (i = 0; i < c->num_key_stream; i++) { + memcpy(c->key_stream[i]->ctr, c->iv, silc_cipher_get_iv_len(c->c.ecb)); + silc_softacc_add_ctr(c->key_stream[i]->ctr, + silc_cipher_get_block_len(c->c.ecb), + i * c->cipher_blocks); + silc_thread_queue_push(c->queue, c->num_key_stream, c->key_stream[i], + FALSE); + } + + return TRUE; + } + + /* Initialize the accelerator for this cipher */ + SILC_LOG_DEBUG(("Initialize accelerator for %s %p", ops->alg_name, c)); + + sa = silc_global_get_var("softacc", FALSE); + if (!sa) { + SILC_LOG_ERROR(("Software accelerator not initialized")); + return FALSE; + } + + /* Allocate cipher in ECB mode. It is used to encrypt the key stream. */ + if (!silc_cipher_alloc_full(ops->alg_name, ops->key_len, + SILC_CIPHER_MODE_ECB, &c->c.ecb)) + return FALSE; + + /* Start the queue with sa->cipher_blocks many key streams. One extra pipe + in the queue is used as a return pipe for empty key streams. */ + c->cipher_blocks = sa->cipher_blocks; + c->cipher_threads = sa->cipher_threads; + c->num_key_stream = sa->cipher_streams; + c->key_stream = silc_calloc(c->num_key_stream, sizeof(*c->key_stream)); + if (!c->key_stream) + return FALSE; + for (i = 0; i < c->num_key_stream; i++) { + c->key_stream[i] = silc_malloc(sizeof(**c->key_stream) + + (c->cipher_blocks * + silc_cipher_get_block_len(c->c.ecb))); + if (!c->key_stream[i]) + return FALSE; + c->key_stream[i]->key_index = i; + } + c->queue = silc_thread_queue_alloc(c->num_key_stream + 1, TRUE); + if (!c->queue) + return FALSE; + + /* Start the threads. If thread starting fails, we can't accelerate the + cipher. The uninit operation will clean up any started threads. */ + for (i = 0; i < sa->cipher_threads; i++) + if (!silc_thread_pool_run(sa->tp, FALSE, NULL, silc_softacc_cipher_thread, + c, silc_softacc_cipher_completion, c)) + return FALSE; + + return TRUE; +} + +/* Set IV. Also, reset current block, discarding any remaining unused bits in + the current key block. */ + +SILC_CIPHER_API_SET_IV(softacc_cipher) +{ + SilcSoftaccCipher c = context; + SilcSoftaccCipherKeyStream key; + SilcUInt32 i, block_len, iv_len; + + block_len = silc_cipher_get_block_len(c->c.ecb); + iv_len = silc_cipher_get_iv_len(c->c.ecb); + + if (c->pad > block_len) + c->pad = block_len; + + /* If IV is NULL we start new block */ + if (!iv) { + SILC_SOFTACC_DEBUG(("Start new block")); + + if (c->pad < block_len) { + c->pad = block_len; + + /* Start new block */ + if (++c->cur_block == c->cipher_blocks) { + SILC_SOFTACC_DEBUG(("Push empty key stream %p index %d back to queue", + c->cur, c->cur->key_index)); + silc_thread_queue_push(c->queue, c->num_key_stream, c->cur, FALSE); + c->cur_index = (c->cur_index + 1) % c->cipher_blocks; + c->cur_block = 0; + c->cur = NULL; + } + } + } else { + /* Start new IV */ + SILC_SOFTACC_DEBUG(("Start new counter")); + + memcpy(c->iv, iv, iv_len); + + if (!c->key_set) + return; + + /* Push current key stream back to queue. We need all of them there + below. */ + if (c->cur) + silc_thread_queue_push(c->queue, c->cur->key_index, c->cur, FALSE); + + /* We must get all key streams and update them. */ + for (i = 0; i < c->num_key_stream; i++) { + key = silc_thread_queue_pop(c->queue, i, TRUE); + memcpy(key->ctr, c->iv, iv_len); + silc_softacc_add_ctr(key->ctr, iv_len, i * c->cipher_blocks); + silc_thread_queue_push(c->queue, c->num_key_stream, key, FALSE); + } + + c->cur = NULL; + c->cur_index = 0; + c->cur_block = 0; + c->pad = block_len; + } +} + +SILC_CIPHER_API_ENCRYPT(softacc_cipher) +{ + SilcSoftaccCipher c = context; + SilcSoftaccCipherKeyStream key; + SilcUInt32 pad = c->pad, block = c->cur_block; + SilcUInt32 cipher_blocks = c->cipher_blocks; + SilcUInt32 blocks, block_len, i; + unsigned char *enc_ctr; + + key = c->cur; + if (!key) { + c->cur = key = silc_thread_queue_pop(c->queue, c->cur_index, TRUE); + SILC_SOFTACC_DEBUG(("Got key stream %p, index %d", key, key->key_index)); + } + + block_len = c->c.ecb->cipher->block_len; + enc_ctr = key->key + (block * block_len); + + /* Compute partial block */ + if (pad < block_len) { + while (len-- > 0) { + *dst++ = *src++ ^ enc_ctr[pad++]; + if (pad == block_len) { + enc_ctr += block_len; + if (++block == cipher_blocks) { + /* Push the used up key stream back to the queue */ + SILC_SOFTACC_DEBUG(("Push empty key stream %p index %d back to queue", + key, key->key_index)); + silc_thread_queue_push(c->queue, c->num_key_stream, key, FALSE); + + /* Get new key stream from queue */ + c->cur_index = (c->cur_index + 1) % c->num_key_stream; + c->cur = key = silc_thread_queue_pop(c->queue, c->cur_index, TRUE); + SILC_SOFTACC_DEBUG(("Got key stream %p, index %d", key, + xskey->key_index)); + enc_ctr = key->key; + block = 0; + } + break; + } + } + } + + /* Compute full blocks */ + blocks = (len / block_len); + len -= (blocks * block_len); + while (blocks--) { + /* CTR mode */ +#ifndef WORDS_BIGENDIAN + for (i = 0; i < block_len / sizeof(SilcUInt64); i++) + *(SilcUInt64 *)(dst + (i * sizeof(SilcUInt64))) = + *(SilcUInt64 *)(src + (i * sizeof(SilcUInt64))) ^ + *(SilcUInt64 *)(enc_ctr + (i * sizeof(SilcUInt64))); +#else + SilcUInt64 dst_tmp, src_tmp, enc_ctr_tmp; + + for (i = 0; i < block_len / sizeof(SilcUInt64); i++) { + SILC_GET64_MSB(src_tmp, src + (i * sizeof(SilcUInt64))); + SILC_GET64_MSB(enc_ctr_tmp, enc_ctr + (i * sizeof(SilcUInt64))); + dst_tmp = src_tmp ^ enc_ctr_tmp; + SILC_PUT64_MSB(dst_tmp, dst + (i * sizeof(SilcUInt64))); + } +#endif /* !WORDS_BIGENDIAN */ + + src += block_len; + dst += block_len; + enc_ctr += block_len; + + if (++block == cipher_blocks) { + /* Push the used up key stream back to the queue */ + SILC_SOFTACC_DEBUG(("Push empty key stream %p index %d back to queue", + key, key->key_index)); + silc_thread_queue_push(c->queue, c->num_key_stream, key, FALSE); + + /* Get new key stream from queue */ + c->cur_index = (c->cur_index + 1) % c->num_key_stream; + c->cur = key = silc_thread_queue_pop(c->queue, c->cur_index, TRUE); + SILC_SOFTACC_DEBUG(("Got key stream %p, index %d", key, + key->key_index)); + enc_ctr = key->key; + block = 0; + } + } + + /* Compute partial block */ + if (len > 0) { + pad = 0; + while (len-- > 0) + *dst++ = *src++ ^ enc_ctr[pad++]; + } + + c->cur_block = block; + c->pad = pad; + + return TRUE; +} + +/* Return accelerator cipher context */ + +SILC_CIPHER_API_INIT(softacc_cipher) +{ + SilcSoftaccCipher c = silc_calloc(1, sizeof(*c)); + + if (!c) + return NULL; + + c->pad = 16; + + return c; +} + +/* Uninitialize the cipher accelerator */ + +SILC_CIPHER_API_UNINIT(softacc_cipher) +{ + SilcSoftaccCipher c = context; + int i; + + /* Stop threads */ + if (c->queue) { + for (i = 0; i < c->cipher_threads; i++) + silc_thread_queue_push(c->queue, c->num_key_stream, + SILC_KEYSTREAM_STOP, FALSE); + + /* Disconnect from key stream queue */ + if (silc_thread_queue_disconnect(c->queue)) + return; + } + + silc_free(c->key_stream); + memset(c, 0, sizeof(*c)); + silc_free(c); +}