5 Author: Pekka Riikonen <priikone@silcnet.org>
7 Copyright (C) 2002 - 2007 Pekka Riikonen
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; version 2 of the License.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
20 /****h* silcutil/SILC Types
24 * This header file includes basic types and definitions, and various system
25 * specific macros and functions used in SILC Toolkits. Application programmer
26 * may use them when needed.
33 /* The bool macro is deprecated. Use SilcBool instead. */
39 #define bool unsigned char
43 #if SILC_SIZEOF_SHORT > 2
44 #error "size of the short must be 2 bytes"
47 /******************************* Public API *********************************/
49 /****d* silcutil/SILCTypes/SilcBool
53 * typedef unigned char SilcBool;
57 * Boolean value, and is always 8-bits. Represents value 0 or 1.
60 typedef unsigned char SilcBool;
62 /****d* silcutil/SILCTypes/TRUE
70 * Boolean true value indicator.
79 /****d* silcutil/SILCTypes/FALSE
87 * Boolean false value indicator.
96 /****d* silcutil/SILCTypes/SilcUInt8
100 * typedef unsigned char SilcUInt8;
104 * 8-bit unsigned integer.
108 typedef unsigned char SilcUInt8;
111 /****d* silcutil/SILCTypes/SilcInt8
115 * typedef signed char SilcInt8;
119 * 8-bit signed integer.
123 typedef signed char SilcInt8;
126 /****d* silcutil/SILCTypes/SilcUInt16
130 * typedef unsigned short SilcUInt16;
134 * 16-bit unsigned integer. Guaranteed to be 16-bits.
138 typedef unsigned short SilcUInt16;
141 /****d* silcutil/SILCTypes/SilcInt16
145 * typedef signed short SilcInt16;
149 * 16-bit signed integer. Guaranteed to be 16-bits.
153 typedef signed short SilcInt16;
156 /****d* silcutil/SILCTypes/SilcUInt32
160 * typedef unsigned long SilcUInt32;
164 * 32-bit unsigned integer. Guaranteed to be 32-bits.
168 #if SILC_SIZEOF_LONG == 4
169 typedef unsigned long SilcUInt32;
170 typedef signed long SilcInt32;
172 #if SILC_SIZEOF_INT == 4
173 typedef unsigned int SilcUInt32;
174 typedef signed int SilcInt32;
176 #if SILC_SIZEOF_LONG_LONG >= 4
178 typedef unsigned long long SilcUInt32;
179 typedef signed long long SilcInt32;
186 /****d* silcutil/SILCTypes/SilcInt32
190 * typedef signed long SilcInt32;
194 * 32-bit signed integer. Guaranteed to be 32-bits.
198 /****d* silcutil/SILCTypes/SilcUInt64
202 * typedef unsigned long long SilcUInt64;
206 * 64-bit unsigned integer. Guaranteed to be 64-bits on systems that
211 #if SILC_SIZEOF_LONG >= 8
212 typedef unsigned long SilcUInt64;
213 typedef signed long SilcInt64;
215 #if SILC_SIZEOF_LONG_LONG >= 8
217 typedef unsigned long long SilcUInt64;
218 typedef signed long long SilcInt64;
220 typedef unsigned __int64 SilcUInt64;
221 typedef signed __int64 SilcInt64;
224 typedef SilcUInt32 SilcUInt64;
225 typedef SilcInt32 SilcInt64;
230 /****d* silcutil/SILCTypes/SilcInt64
234 * typedef signed long long SilcInt64;
238 * 64-bit signed integer. Guaranteed to be 64-bits on systems that
243 #if SILC_SIZEOF_VOID_P < 4
244 typedef SilcUInt32 * void *;
247 /****d* silcutil/SILCTypes/SilcSocket
255 * Platform specific socket. On POSIX compliant systems this is simply
256 * an integer, representing the socket. On other systems it is platform
257 * specific socket context. Access it only through routines that can
258 * handle SilcSocket types, unless you know what you are doing.
262 #if defined(SILC_UNIX)
263 typedef int SilcSocket;
264 #elif defined(SILC_WIN32)
265 typedef SOCKET SilcSocket;
266 #elif defined(SILC_SYMBIAN)
267 typedef void * SilcSocket;
273 #if (defined(SILC_I486) || defined(SILC_X86_64)) && defined(__GNUC__)
274 #define SILC_GET_WORD(cp) \
276 SilcUInt32 _result_; \
277 asm volatile ("movl (%1), %0; bswapl %0" \
278 : "=q" (_result_) : "q" (cp)); \
282 #define SILC_GET_WORD(cp) ((SilcUInt32)(SilcUInt8)(cp)[0]) << 24 \
283 | ((SilcUInt32)(SilcUInt8)(cp)[1] << 16) \
284 | ((SilcUInt32)(SilcUInt8)(cp)[2] << 8) \
285 | ((SilcUInt32)(SilcUInt8)(cp)[3])
286 #endif /* (SILC_I486 || SILC_X86_64) && __GNUC__ */
288 /****d* silcutil/SILCTypes/SILC_GET16_MSB
292 * #define SILC_GET16_MSB(dest, src)
296 * Return two 8-bit bytes, most significant bytes first.
300 #if (defined(SILC_I386) || defined(SILC_X86_64)) && defined(__GNUC__)
301 #define SILC_GET16_MSB(l, cp) \
302 asm volatile ("movw (%1), %w0; rolw $8, %w0" \
303 : "=q" (l) : "q" (cp) : "memory", "cc");
305 #define SILC_GET16_MSB(l, cp) \
307 (l) = ((SilcUInt32)(SilcUInt8)(cp)[0] << 8) \
308 | ((SilcUInt32)(SilcUInt8)(cp)[1]); \
310 #endif /* (SILC_I386 || SILC_X86_64) && __GNUC__ */
313 /****d* silcutil/SILCTypes/SILC_GET32_MSB
317 * #define SILC_GET32_MSB(dest, src)
321 * Return four 8-bit bytes, most significant bytes first.
325 #if (defined(SILC_I486) || defined(SILC_X86_64)) && defined(__GNUC__)
326 #define SILC_GET32_MSB(l, cp) \
327 asm volatile ("movl (%1), %0; bswapl %0" \
328 : "=q" (l) : "q" (cp) : "memory", "cc");
330 #define SILC_GET32_MSB(l, cp) \
332 (l) = ((SilcUInt32)(SilcUInt8)(cp)[0]) << 24 \
333 | ((SilcUInt32)(SilcUInt8)(cp)[1] << 16) \
334 | ((SilcUInt32)(SilcUInt8)(cp)[2] << 8) \
335 | ((SilcUInt32)(SilcUInt8)(cp)[3]); \
337 #endif /* (SILC_I486 || SILC_X86_64) && __GNUC__ */
340 /* Same as upper but XOR the result always. Special purpose macro. */
341 #if (defined(SILC_I486) || defined(SILC_X86_64)) && defined(__GNUC__)
342 #define SILC_GET32_X_MSB(l, cp) \
344 register volatile SilcUInt32 _x_; \
345 asm volatile ("movl %1, %3; movl (%2), %0;\n\t" \
346 "bswapl %0; xorl %3, %0" \
347 : "=r" (l) : "0" (l), "r" (cp), "r" (_x_) \
351 #define SILC_GET32_X_MSB(l, cp) \
352 (l) ^= ((SilcUInt32)(SilcUInt8)(cp)[0]) << 24 \
353 | ((SilcUInt32)(SilcUInt8)(cp)[1] << 16) \
354 | ((SilcUInt32)(SilcUInt8)(cp)[2] << 8) \
355 | ((SilcUInt32)(SilcUInt8)(cp)[3]);
356 #endif /* (SILC_I486 || SILC_X86_64) && __GNUC__ */
358 /****d* silcutil/SILCTypes/SILC_GET64_MSB
362 * #define SILC_GET64_MSB(dest, src)
366 * Return eight 8-bit bytes, most significant bytes first.
370 #if defined(SILC_X86_64) && defined(__GNUC__)
371 #define SILC_GET64_MSB(l, cp) \
372 asm volatile ("movq (%1), %0; bswapq %0" \
373 : "=r" (l) : "r" (cp) : "memory", "cc");
375 #define SILC_GET64_MSB(l, cp) \
377 (l) = ((((SilcUInt64)SILC_GET_WORD((cp))) << 32) | \
378 ((SilcUInt64)SILC_GET_WORD((cp) + 4))); \
380 #endif /* SILC_X86_64 && __GNUC__ */
383 /****d* silcutil/SILCTypes/SILC_GET16_LSB
387 * #define SILC_GET16_MSB(dest, src)
391 * Return two 8-bit bytes, least significant bytes first.
395 #if defined(SILC_I386) || defined(SILC_X86_64)
396 #define SILC_GET16_LSB(l, cp) (l) = (*(SilcUInt16 *)(cp))
398 #define SILC_GET16_LSB(l, cp) \
400 (l) = ((SilcUInt32)(SilcUInt8)(cp)[0]) \
401 | ((SilcUInt32)(SilcUInt8)(cp)[1] << 8); \
403 #endif /* SILC_I386 || SILC_X86_64 */
406 /****d* silcutil/SILCTypes/SILC_GET32_LSB
410 * #define SILC_GET32_LSB(dest, src)
414 * Return four 8-bit bytes, least significant bytes first.
418 #if defined(SILC_I386) || defined(SILC_X86_64)
419 #define SILC_GET32_LSB(l, cp) (l) = (*(SilcUInt32 *)(cp))
421 #define SILC_GET32_LSB(l, cp) \
423 (l) = ((SilcUInt32)(SilcUInt8)(cp)[0]) \
424 | ((SilcUInt32)(SilcUInt8)(cp)[1] << 8) \
425 | ((SilcUInt32)(SilcUInt8)(cp)[2] << 16) \
426 | ((SilcUInt32)(SilcUInt8)(cp)[3] << 24); \
428 #endif /* SILC_I386 || SILC_X86_64 */
431 /* Same as upper but XOR the result always. Special purpose macro. */
432 #if defined(SILC_I386) || defined(SILC_X86_64)
433 #define SILC_GET32_X_LSB(l, cp) (l) ^= (*(SilcUInt32 *)(cp))
435 #define SILC_GET32_X_LSB(l, cp) \
436 (l) ^= ((SilcUInt32)(SilcUInt8)(cp)[0]) \
437 | ((SilcUInt32)(SilcUInt8)(cp)[1] << 8) \
438 | ((SilcUInt32)(SilcUInt8)(cp)[2] << 16) \
439 | ((SilcUInt32)(SilcUInt8)(cp)[3] << 24)
440 #endif /* SILC_I386 || SILC_X86_64 */
442 /****d* silcutil/SILCTypes/SILC_PUT16_MSB
446 * #define SILC_PUT16_MSB(dest, src)
450 * Put two 8-bit bytes, most significant bytes first.
454 #if (defined(SILC_I386) || defined(SILC_X86_64)) && defined(__GNUC__)
455 #define SILC_PUT16_MSB(l, cp) \
456 asm volatile ("rolw $8, %w1; movw %w1, (%0)" \
457 : : "q" (cp), "q" (l) : "memory", "cc");
459 #define SILC_PUT16_MSB(l, cp) \
461 (cp)[0] = (SilcUInt8)((l) >> 8); \
462 (cp)[1] = (SilcUInt8)(l); \
464 #endif /* (SILC_I386 || SILC_X86_64) && __GNUC__ */
467 /****d* silcutil/SILCTypes/SILC_PUT32_MSB
471 * #define SILC_PUT32_MSB(dest, src)
475 * Put four 8-bit bytes, most significant bytes first.
479 #if (defined(SILC_I486) || defined(SILC_X86_64)) && defined(__GNUC__)
480 #define SILC_PUT32_MSB(l, cp) \
481 asm volatile ("bswapl %1; movl %1, (%0); bswapl %1" \
482 : : "q" (cp), "q" (l) : "memory", "cc");
484 #define SILC_PUT32_MSB(l, cp) \
486 (cp)[0] = (SilcUInt8)((l) >> 24); \
487 (cp)[1] = (SilcUInt8)((l) >> 16); \
488 (cp)[2] = (SilcUInt8)((l) >> 8); \
489 (cp)[3] = (SilcUInt8)(l); \
491 #endif /* (SILC_I486 || SILC_X86_64) && __GNUC__ */
494 /****d* silcutil/SILCTypes/SILC_PUT64_MSB
498 * #define SILC_PUT64_MSB(dest, src)
502 * Put eight 8-bit bytes, most significant bytes first.
506 #if defined(SILC_X86_64) && defined(__GNUC__)
507 #define SILC_PUT64_MSB(l, cp) \
508 asm volatile ("bswapq %1; movq %1, (%0); bswapq %1" \
509 : : "r" (cp), "r" (l) : "memory", "cc");
511 #define SILC_PUT64_MSB(l, cp) \
513 SILC_PUT32_MSB((SilcUInt32)((SilcUInt64)(l) >> 32), (cp)); \
514 SILC_PUT32_MSB((SilcUInt32)(l), (cp) + 4); \
516 #endif /* SILC_X86_64 && __GNUC__ */
519 /****d* silcutil/SILCTypes/SILC_PUT16_LSB
523 * #define SILC_PUT16_LSB(dest, src)
527 * Put two 8-bit bytes, least significant bytes first.
531 #if defined(SILC_I386) || defined(SILC_X86_64)
532 #define SILC_PUT16_LSB(l, cp) (*(SilcUInt16 *)(cp)) = (l)
534 #define SILC_PUT16_LSB(l, cp) \
536 (cp)[0] = (SilcUInt8)(l); \
537 (cp)[1] = (SilcUInt8)((l) >> 8); \
539 #endif /* SILC_I386 || SILC_X86_64 */
542 /****d* silcutil/SILCTypes/SILC_PUT32_LSB
546 * #define SILC_PUT32_LSB(dest, src)
550 * Put four 8-bit bytes, least significant bytes first.
554 #if defined(SILC_I386) || defined(SILC_X86_64)
555 #define SILC_PUT32_LSB(l, cp) (*(SilcUInt32 *)(cp)) = (l)
557 #define SILC_PUT32_LSB(l, cp) \
559 (cp)[0] = (SilcUInt8)(l); \
560 (cp)[1] = (SilcUInt8)((l) >> 8); \
561 (cp)[2] = (SilcUInt8)((l) >> 16); \
562 (cp)[3] = (SilcUInt8)((l) >> 24); \
564 #endif /* SILC_I386 || SILC_X86_64 */
567 /****d* silcutil/SILCTypes/SILC_SWAB_16
571 * #define SILC_SWAB_16(integer)
575 * Swabs 16-bit unsigned integer byte order. Returns the new value.
579 #if (defined(SILC_I386) || defined(SILC_X86_64)) && defined(__GNUC__)
580 #define SILC_SWAB_16(l) \
582 SilcUInt16 _result_; \
583 asm volatile ("movw %w1, %w0; rolw $8, %w0" \
584 : "=q" (_result_) : "q" (l)); \
588 #define SILC_SWAB_16(l) \
589 ((SilcUInt16)(((SilcUInt16)(l) & (SilcUInt16)0x00FFU) << 8) | \
590 (((SilcUInt16)(l) & (SilcUInt16)0xFF00U) >> 8))
591 #endif /* (SILC_I386 || SILC_X86_64) && __GNUC__ */
594 /****d* silcutil/SILCTypes/SILC_SWAB_32
598 * #define SILC_SWAB_32(integer)
602 * Swabs 32-bit unsigned integer byte order. Returns the new value.
606 #if (defined(SILC_I486) || defined(SILC_X86_64)) && defined(__GNUC__)
607 #define SILC_SWAB_32(l) \
609 SilcUInt32 _result_; \
610 asm volatile ("movl %1, %0; bswapl %0" \
611 : "=q" (_result_) : "q" (l)); \
615 #define SILC_SWAB_32(l) \
616 ((SilcUInt32)(((SilcUInt32)(l) & (SilcUInt32)0x000000FFUL) << 24) | \
617 (((SilcUInt32)(l) & (SilcUInt32)0x0000FF00UL) << 8) | \
618 (((SilcUInt32)(l) & (SilcUInt32)0x00FF0000UL) >> 8) | \
619 (((SilcUInt32)(l) & (SilcUInt32)0xFF000000UL) >> 24))
620 #endif /* (SILC_I486 || SILC_X86_64) && __GNUC__ */
623 /****d* silcutil/SILCTypes/SILC_PTR_TO_32
627 * #define SILC_PTR_TO_32(ptr)
631 * Type casts a pointer's value into a 32-bit integer. Use this to
632 * avoid compiler warnings when type casting pointers to integers
637 #if SILC_SIZEOF_VOID_P < 8
638 #define SILC_PTR_TO_32(_ptr__) ((SilcUInt32)(_ptr__))
640 #define SILC_PTR_TO_32(_ptr__) \
641 ((SilcUInt32)((SilcUInt64)(_ptr__) & (SilcUInt32)0xFFFFFFFFUL))
645 /****d* silcutil/SILCTypes/SILC_PTR_TO_64
649 * #define SILC_PTR_TO_64(ptr)
653 * Type casts a pointer's value into a 64-bit integer. Use this to
654 * avoid compiler warnings when type casting pointers to integers
659 #if SILC_SIZEOF_VOID_P < 8
660 #define SILC_PTR_TO_64(_ptr__) ((SilcUInt64)((SilcUInt32)(_ptr__)))
662 #define SILC_PTR_TO_64(_ptr__) ((SilcUInt64)((SilcUInt64)(_ptr__)))
666 /****d* silcutil/SILCTypes/SILC_32_TO_PTR
670 * #define SILC_32_TO_PTR(ptr)
674 * Type casts a 32-bit integer value into a pointer. Use this to
675 * avoid compiler warnings when type casting integers to pointers of
680 #if SILC_SIZEOF_VOID_P < 8
681 #define SILC_32_TO_PTR(_ival__) ((void *)((SilcUInt32)(_ival__)))
683 #define SILC_32_TO_PTR(_ival__) ((void *)((SilcUInt64)(_ival__)))
687 /****d* silcutil/SILCTypes/SILC_64_TO_PTR
691 * #define SILC_64_TO_PTR(ptr)
695 * Type casts a 64-bit integer value into a pointer. Use this to
696 * avoid compiler warnings when type casting integers to pointers of
701 #if SILC_SIZEOF_VOID_P < 8
702 #define SILC_64_TO_PTR(_ival__) \
703 ((void *)((SilcUInt32)((SilcUInt64)(_ival__) & (SilcUInt32)0xFFFFFFFFUL)))
705 #define SILC_64_TO_PTR(_ival__) ((void *)((SilcUInt64)(_ival__)))
709 /****d* silcutil/SILCTypes/silc_rol
713 * static inline SilcUInt32 silc_rol(SilcUInt32 val, int num);
717 * Rotate 32-bit integer's bits to left `num' times. Bits pushed to the
718 * left will appear from the right side of the integer, thus rotating.
719 * Returns the rotated value.
722 static inline SilcUInt32 silc_rol(SilcUInt32 val, int num)
724 #if (defined(SILC_I386) || defined(SILC_X86_64)) && defined(__GNUC__)
725 asm volatile ("roll %%cl, %0"
726 : "=q" (val) : "0" (val), "c" (num));
729 return ((val << (SilcUInt32)num) | (val >> (32 - (SilcUInt32)num)));
730 #endif /* (SILC_I486 || SILC_X86_64) && __GNUC__ */
733 /****d* silcutil/SILCTypes/silc_ror
737 * static inline SilcUInt32 silc_ror(SilcUInt32 val, int num);
741 * Rotate 32-bit integer's bits to right `num' times. Bits pushed to the
742 * right will appear from the left side of the integer, thus rotating.
743 * Returns the rotated value.
746 static inline SilcUInt32 silc_ror(SilcUInt32 val, int num)
748 #if (defined(SILC_I386) || defined(SILC_X86_64)) && defined(__GNUC__)
749 asm volatile ("rorl %%cl, %0"
750 : "=q" (val) : "0" (val), "c" (num));
753 return ((val >> (SilcUInt32)num) | (val << (32 - (SilcUInt32)num)));
754 #endif /* (SILC_I486 || SILC_X86_64) && __GNUC__ */
757 /****d* silcutil/SILCTypes/silc_rol64
761 * static inline SilcUInt64 silc_rol64(SilcUInt64 val, int num);
765 * Rotate 64-bit integer's bits to left `num' times. Bits pushed to the
766 * left will appear from the right side of the integer, thus rotating.
767 * Returns the rotated value.
770 static inline SilcUInt64 silc_rol64(SilcUInt64 val, int num)
772 #if defined(SILC_X86_64) && defined(__GNUC__)
773 asm volatile ("rolq %%cl, %0"
774 : "=q" (val) : "0" (val), "c" (num));
777 return ((val << (SilcUInt64)num) | (val >> (64 - (SilcUInt64)num)));
778 #endif /* SILC_X86_64 && __GNUC__ */
781 /****d* silcutil/SILCTypes/silc_ror64
785 * static inline SilcUInt64 silc_ror64(SilcUInt64 val, int num);
789 * Rotate 64-bit integer's bits to right `num' times. Bits pushed to the
790 * right will appear from the left side of the integer, thus rotating.
791 * Returns the rotated value.
794 static inline SilcUInt64 silc_ror64(SilcUInt64 val, int num)
796 #if defined(SILC_X86_64) && defined(__GNUC__)
797 asm volatile ("rorq %%cl, %0"
798 : "=q" (val) : "0" (val), "c" (num));
801 return ((val >> (SilcUInt64)num) | (val << (64 - (SilcUInt64)num)));
802 #endif /* SILC_X86_64 && __GNUC__ */
805 /****d* silcutil/SILCTypes/silc_offsetof
809 * #define silc_offsetof(TYPE, MEMBER)
813 * offsetof() macro replacement. Use this instead of offsetof().
816 #define silc_offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
818 /****d* silcutil/SILCTypes/silc_attribute
822 * #define silc_attribute(attrlist)
826 * Compiler attributes. If compiler doesn't support attributes this macro
827 * doesn't do anything. Currently this works only with GCC compiler.
828 * See GCC documentation for specified attributes.
832 * int printf(const char *fmt, ...) silc_attribute((format(printf, 1, 2)));
835 #if defined(__GNUC__)
836 #define silc_attribute(attrlist) __attribute__(attrlist)
838 #define silc_attribute(attrlist)
839 #endif /* __GNUC__ */
841 /****d* silcutil/SILCTypes/silc_likely
845 * #define silc_likely(expression)
849 * Brach prediction macro. It specifies that it is likely that the brach
850 * where silc_likely is applied is taken. Compiler will optimize the
851 * code based on this prediction. Never use this before you have profiled
856 /****d* silcutil/SILCTypes/silc_unlikely
860 * #define silc_unlikely(expression)
864 * Branch prediction macro. It specifies that it is unlikely that the
865 * branch where silc_unlikely is applied is taken. Compiler will optimize
866 * the code based on this prediction. Never use this before you have
867 * profiled the code first.
871 #define silc_likely(expr) __builtin_expect(!!(expr), 1)
872 #define silc_unlikely(expr) __builtin_expect(!!(expr), 0)
874 #define silc_likely(expr) (expr)
875 #define silc_unlikely(expr) (expr)
876 #endif /* __GNUC__ >= 3 */
878 /* Prefetch operations. Use these to prefetch data to CPU cache before
879 reading or writing if you think that the data will be needed soon after
882 /****d* silcutil/SILCTypes/silc_prefetch
886 * static inline void silc_prefetch(void *addr, int rw, int locality);
890 * Simple prefetch. Loads memory from specified address to CPU cache.
891 * The amount of data loaded is CPU dependant (cache line length). The
892 * `rw' argument defines the reason for prefetch: 0=read, 1=write. The
893 * `locality' argument defines the locality of the prefetch, 0=non-temporal
894 * (non-temporal cache, cache closest to CPU, data will not stay long in
895 * the cache), 1=temporal (L2+ cache), 2=temporal (L2, L3+ cache),
896 * 3=temporal (fetch to all caches, data stays longer time in cache).
900 * This produces only a hint for CPU. CPU doesn't have to actually
901 * prefetch the data. Use silc_prefetch_block to ensure CPU always
906 static inline silc_attribute((always_inline))
907 void silc_prefetch(void *addr, int rw, int locality)
910 __builtin_prefetch(addr, rw, locality);
911 #endif /* __GNUC__ */
914 /****d* silcutil/SILCTypes/silc_prefetch_block
918 * static inline void silc_prefetch_block(void *addr,
919 * int prefetch_length,
920 * const int cache_line_length)
924 * Enforced block prefetch. This function loads the specified amount
925 * `prefetch_length' of memory from the specified address `addr' to CPU
926 * cache with each loaded cache line being the size of `cache_line_length'.
927 * If you don't know the cache line size use 64 bytes. Note that, the
928 * `cache_line_length' is a const int. In this context this mean its
929 * value must not come from a variable but must be a constant (the code
930 * won't compile if it comes from a variable).
932 * The `prefetch_length' must be multiple of twice of the
933 * `cache_line_length' or 128 if you don't know the cache line size, hence
934 * the minimum length for `prefetch_length' is 128 bytes when the
935 * `cache_line_length' is 64 bytes. Shorter cache line length (32 bytes)
938 * You should use the correct `cache_line_length' value for your CPU or
939 * the value of the CPU for which you want to optimize your code. Intel
940 * CPUs usually have cache size of 32 or 64 bytes. The most optimal
941 * prefetch is achieved if the `cache_line_length' is the actual CPU cache
942 * line size. Always do performance testing with and without prefetching
943 * to make sure the prefetch actually helps. If used improperly, it may
944 * slow down your program.
946 * The difference to silc_prefetch is that this function always performs
947 * the prefetch and has the ability to prefetch more than one cache line
948 * worth of memory, whereas silc_prefetch can prefetch only one cache line
949 * and may not do the prefetch at all.
953 static inline silc_attribute((always_inline))
954 void silc_prefetch_block(void *addr,
956 const int cache_line_length)
959 SILC_ASSERT(cache_line_length >= 32);
960 SILC_ASSERT(cache_line_length % 32 == 0);
961 SILC_ASSERT(prefetch_length >= cache_line_length);
962 SILC_ASSERT(prefetch_length % (cache_line_length * 2) == 0);
965 #if SILC_SIZEOF_VOID_P < 8
966 #define SILC_PREFETCH_UINT SilcUInt32
968 #define SILC_PREFETCH_UINT SilcUInt64
969 #endif /* SILC_SIZEOF_VOID_P < 8 */
971 #if defined(__GNUC__) && (defined(SILC_I386) || defined(SILC_X86_64))
973 /* Assembler implementation.
975 The idea here is to simply enforce the CPU to load the requested amount
976 of bytes to cache. We simply mov data from the memory to a register.
977 Each mov will load a full cache line worth of data from the memory.
979 We expect the `cache_line_length' to be the actual cache line size.
980 It doesn't matter if it is. If it is smaller the prefetch is a bit
981 slower as there is redundancy. If it is larger we skip some of the
982 data and don't prefetch everything.
984 The loop is unrolled to handle two mov's at once, this why we expect
985 the `prefetch_length' to be multiple of twice the length of
986 `cache_line_length`. We also mov the data from end to beginning instead
987 of from the beginning to assure CPU doesn't prefetch the data before
988 we actually want to do it.
990 This technique is described by AMD in:
991 http://cdrom.amd.com/devconn/events/AMD_block_prefetch_paper.pdf */
994 SILC_PREFETCH_UINT temp;
996 #define SILC_PREFETCH_ASM(ip, rp) \
997 asm volatile ("1: \n\t" \
998 "mov" ip " -%c4(%2, %" rp "3), %0 \n\t" \
999 "mov" ip " -%c5(%2, %" rp "3), %0 \n\t" \
1000 "sub" ip " %5, %" rp "3 \n\t" \
1002 : "=&r" (temp), "=r" (prefetch_length) \
1003 : "r" (addr), "1" (prefetch_length), \
1004 "Z" (cache_line_length), \
1005 "Z" (cache_line_length * 2) \
1008 #if defined(SILC_I386)
1009 /* 32-bit prefetch */
1010 SILC_PREFETCH_ASM("l", "");
1012 /* 64-bit prefetch */
1013 SILC_PREFETCH_ASM("q", "q");
1014 #endif /* SILC_I386 */
1018 /* C implementation. Yes, you can do it in C too. In fact, we'll try to
1019 make the compiler generate nearly identical code to the above assembler
1020 code. Note that, the memory access must be volatile, otherwise the
1021 compiler will optimize them away because the temp variable isn't actually
1022 used for anything. This should be as fast as the assembler code above,
1023 unless the compiler decides to start meddling with it (don't use
1024 -funroll-loops with this code). */
1027 register unsigned char *a = addr;
1028 register int len = prefetch_length;
1029 register SILC_PREFETCH_UINT temp;
1032 temp = *(SILC_PREFETCH_UINT volatile *)
1033 (a + (len - cache_line_length));
1034 temp = *(SILC_PREFETCH_UINT volatile *)
1035 (a + (len - (cache_line_length * 2)));
1036 len -= (cache_line_length * 2);
1039 #endif /* __GNUC__ */
1040 #undef SILC_PREFETCH_UINT
1041 #undef SILC_PREFETCH_ASM
1044 #endif /* SILCTYPES_H */