Browse Source

add asm versions of imult64 and umult64 for x86_64

(22 cycles -> 12 cycles on my Athlon 64)
master
Felix von Leitner 15 years ago
parent
commit
d1aef8af83
  1. 2
      CHANGES
  2. 22
      mult/imult64.c
  3. 21
      mult/umult64.c

2
CHANGES

@ -18,6 +18,8 @@
unless you #define _NETBSD_SOURCE, and then their CMSG_* macros use
it. This is horrible even by OpenBSD standards)
remove support for ip6.int (it's no longer delegated)
add asm versions of imult64 and umult64 for x86_64
(22 cycles -> 12 cycles on my Athlon 64)
0.24:
fix scan_to_sa (Tim Lorenz)

22
mult/imult64.c

@ -1,3 +1,24 @@
#ifdef __x86_64__
void imult64() {
asm volatile(
"xchgq %rdx,%rsi\n"
"movq %rdi,%rax\n"
"imulq %rdx\n"
"jc 1f\n" /* overflow */
"movq %rax,(%rsi)\n"
"xorq %rax,%rax\n"
"inc %rax\n"
"ret\n"
"1:\n"
"xorq %rax,%rax\n"
/* the closing ret is renerated by gcc */
);
}
#else
#include "safemult.h"
int imult64(int64 a,int64 b,int64* c) {
@ -11,3 +32,4 @@ int imult64(int64 a,int64 b,int64* c) {
return 1;
}
#endif

21
mult/umult64.c

@ -1,3 +1,23 @@
#ifdef __x86_64__
void umult64() {
asm volatile(
"xchgq %rdx,%rsi\n"
"movq %rdi,%rax\n"
"mulq %rdx\n"
"jc 1f\n" /* overflow */
"movq %rax,(%rsi)\n"
"xorq %rax,%rax\n"
"inc %rax\n"
"ret\n"
"1:\n"
"xorq %rax,%rax\n"
/* the closing ret is renerated by gcc */
);
}
#else
#include "safemult.h"
/* return 1 for overflow, 0 for ok */
@ -20,3 +40,4 @@ int umult64(uint64 a,uint64 b,uint64* c) {
return 1;
}
#endif

Loading…
Cancel
Save