mmx.h

00001 /*
00002     mmx.h
00003 
00004     MultiMedia eXtensions GCC interface library for IA32.
00005 
00006     To use this library, simply include this header file
00007     and compile with GCC.  You MUST have inlining enabled
00008     in order for mmx_ok() to work; this can be done by
00009     simply using -O on the GCC command line.
00010 
00011     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
00012     EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
00013     LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
00014     AND FITNESS FOR ANY PARTICULAR PURPOSE.
00015 
00016     1997-98 by H. Dietz and R. Fisher
00017 
00018     $Id: mmx.h,v 1.3 2005/12/06 14:34:03 steveu Exp $
00019 */
00020 
00021 #if !defined(_MMX_H_)
00022 #define _MMX_H_
00023 
00024 #if defined(__i386__)
00025 /*
00026     The type of an value that fits in an MMX register
00027     (note that long long constant values MUST be suffixed
00028     by LL and unsigned long long values by ULL, lest
00029     they be truncated by the compiler)
00030 */
00031 typedef union
00032 {
00033     int64_t     q;      /* Quadword (64-bit) value */
00034     uint64_t    uq;     /* Unsigned Quadword */
00035     int32_t     d[2];   /* 2 Doubleword (32-bit) values */
00036     uint32_t    ud[2];  /* 2 Unsigned Doubleword */
00037     int16_t     w[4];   /* 4 Word (16-bit) values */
00038     uint16_t    uw[4];  /* 4 Unsigned Word */
00039     int8_t      b[8];   /* 8 Byte (8-bit) values */
00040     uint8_t     ub[8];  /* 8 Unsigned Byte */
00041     float       s[2];   /* Single-precision (32-bit) value */
00042 } mmx_t;
00043 
00044 typedef union
00045 {
00046     int64_t     q[2];   /* Quadword (64-bit) value */
00047     uint64_t    uq[2];  /* Unsigned Quadword */
00048     int32_t     d[4];   /* 2 Doubleword (32-bit) values */
00049     uint32_t    ud[4];  /* 2 Unsigned Doubleword */
00050     int16_t     w[8];   /* 4 Word (16-bit) values */
00051     uint16_t    uw[8];  /* 4 Unsigned Word */
00052     int8_t      b[16];  /* 8 Byte (8-bit) values */
00053     uint8_t     ub[16]; /* 8 Unsigned Byte */
00054     float       s[4];   /* Single-precision (32-bit) value */
00055 } xmm_t;
00056 
00057 #ifdef __cplusplus
00058 extern "C" {
00059 #endif
00060 
00061 /*
00062     Function to test if multimedia instructions are supported...
00063 */
00064 static inline int mm_support(void)
00065 {
00066     /* Returns 1 if MMX instructions are supported,
00067        3 if Cyrix MMX and Extended MMX instructions are supported
00068        5 if AMD MMX and 3DNow! instructions are supported
00069        0 if hardware does not support any of these
00070     */
00071     register int rval = 0;
00072 
00073     __asm__ __volatile__ (
00074         /* See if CPUID instruction is supported ... */
00075         /* ... Get copies of EFLAGS into eax and ecx */
00076         "pushf\n\t"
00077         "popl %%eax\n\t"
00078         "movl %%eax, %%ecx\n\t"
00079 
00080         /* ... Toggle the ID bit in one copy and store */
00081         /*     to the EFLAGS reg */
00082         "xorl $0x200000, %%eax\n\t"
00083         "push %%eax\n\t"
00084         "popf\n\t"
00085 
00086         /* ... Get the (hopefully modified) EFLAGS */
00087         "pushf\n\t"
00088         "popl %%eax\n\t"
00089 
00090         /* ... Compare and test result */
00091         "xorl %%eax, %%ecx\n\t"
00092         "testl $0x200000, %%ecx\n\t"
00093         "jz NotSupported1\n\t"              /* Nothing supported */
00094 
00095 
00096         /* Get standard CPUID information, and
00097            go to a specific vendor section */
00098         "movl $0, %%eax\n\t"
00099         "cpuid\n\t"
00100 
00101         /* Check for Intel */
00102         "cmpl $0x756e6547, %%ebx\n\t"
00103         "jne TryAMD\n\t"
00104         "cmpl $0x49656e69, %%edx\n\t"
00105         "jne TryAMD\n\t"
00106         "cmpl $0x6c65746e, %%ecx\n"
00107         "jne TryAMD\n\t"
00108         "jmp Intel\n\t"
00109 
00110         /* Check for AMD */
00111         "\nTryAMD:\n\t"
00112         "cmpl $0x68747541, %%ebx\n\t"
00113         "jne TryCyrix\n\t"
00114         "cmpl $0x69746e65, %%edx\n\t"
00115         "jne TryCyrix\n\t"
00116         "cmpl $0x444d4163, %%ecx\n"
00117         "jne TryCyrix\n\t"
00118         "jmp AMD\n\t"
00119 
00120         /* Check for Cyrix */
00121         "\nTryCyrix:\n\t"
00122         "cmpl $0x69727943, %%ebx\n\t"
00123         "jne NotSupported2\n\t"
00124         "cmpl $0x736e4978, %%edx\n\t"
00125         "jne NotSupported3\n\t"
00126         "cmpl $0x64616574, %%ecx\n\t"
00127         "jne NotSupported4\n\t"
00128         /* Drop through to Cyrix... */
00129 
00130 
00131         /* Cyrix Section */
00132         /* See if extended CPUID is supported */
00133         "movl $0x80000000, %%eax\n\t"
00134         "cpuid\n\t"
00135         "cmpl $0x80000000, %%eax\n\t"
00136         "jl MMXtest\n\t"                    /* Try standard CPUID instead */
00137 
00138         /* Extended CPUID supported, so get extended features */
00139         "movl $0x80000001, %%eax\n\t"
00140         "cpuid\n\t"
00141         "testl $0x00800000, %%eax\n\t"      /* Test for MMX */
00142         "jz NotSupported5\n\t"              /* MMX not supported */
00143         "testl $0x01000000, %%eax\n\t"      /* Test for Ext'd MMX */
00144         "jnz EMMXSupported\n\t"
00145         "movl $1, %0:\n\n\t"                /* MMX Supported */
00146         "jmp Return\n\n"
00147         "EMMXSupported:\n\t"
00148         "movl $3, %0:\n\n\t"                /* EMMX and MMX Supported */
00149         "jmp Return\n\t"
00150 
00151 
00152         /* AMD Section */
00153         "AMD:\n\t"
00154 
00155         /* See if extended CPUID is supported */
00156         "movl $0x80000000, %%eax\n\t"
00157         "cpuid\n\t"
00158         "cmpl $0x80000000, %%eax\n\t"
00159         "jl MMXtest\n\t"                    /* Try standard CPUID instead */
00160 
00161         /* Extended CPUID supported, so get extended features */
00162         "movl $0x80000001, %%eax\n\t"
00163         "cpuid\n\t"
00164         "testl $0x00800000, %%edx\n\t"      /* Test for MMX */
00165         "jz NotSupported6\n\t"              /* MMX not supported */
00166         "testl $0x80000000, %%edx\n\t"      /* Test for 3DNow! */
00167         "jnz ThreeDNowSupported\n\t"
00168         "movl $1, %0:\n\n\t"                /* MMX Supported */
00169         "jmp Return\n\n"
00170         "ThreeDNowSupported:\n\t"
00171         "movl $5, %0:\n\n\t"                /* 3DNow! and MMX Supported */
00172         "jmp Return\n\t"
00173 
00174 
00175         /* Intel Section */
00176         "Intel:\n\t"
00177 
00178         /* Check for MMX */
00179         "MMXtest:\n\t"
00180         "movl $1, %%eax\n\t"
00181         "cpuid\n\t"
00182         "testl $0x00800000, %%edx\n\t"      /* Test for MMX */
00183         "jz NotSupported7\n\t"              /* MMX Not supported */
00184         "movl $1, %0:\n\n\t"                /* MMX Supported */
00185         "jmp Return\n\t"
00186 
00187         /* Nothing supported */
00188         "\nNotSupported1:\n\t"
00189         "#movl $101, %0:\n\n\t"
00190         "\nNotSupported2:\n\t"
00191         "#movl $102, %0:\n\n\t"
00192         "\nNotSupported3:\n\t"
00193         "#movl $103, %0:\n\n\t"
00194         "\nNotSupported4:\n\t"
00195         "#movl $104, %0:\n\n\t"
00196         "\nNotSupported5:\n\t"
00197         "#movl $105, %0:\n\n\t"
00198         "\nNotSupported6:\n\t"
00199         "#movl $106, %0:\n\n\t"
00200         "\nNotSupported7:\n\t"
00201         "#movl $107, %0:\n\n\t"
00202         "movl $0, %0:\n\n\t"
00203 
00204         "Return:\n\t"
00205         : "=a" (rval)
00206         : /* no input */
00207         : "eax", "ebx", "ecx", "edx"
00208     );
00209 
00210     /* Return */
00211     return(rval);
00212 }
00213 
00214 /*
00215     Function to test if mmx instructions are supported...
00216 */
00217 static inline int mmx_ok(void)
00218 {
00219     /* Returns 1 if MMX instructions are supported, 0 otherwise */
00220     return (mm_support() & 0x1);
00221 }
00222 
00223 /*
00224     Helper functions for the instruction macros that follow...
00225     (note that memory-to-register, m2r, instructions are nearly
00226     as efficient as register-to-register, r2r, instructions;
00227     however, memory-to-memory instructions are really simulated
00228     as a convenience, and are only 1/3 as efficient)
00229 */
00230 #define mmx_i2r(op, imm, reg) \
00231     __asm__ __volatile__ (#op " $" #imm ", %%" #reg \
00232                           : /* nothing */ \
00233                           : /* nothing */);
00234 
00235 #define mmx_m2r(op, mem, reg) \
00236     __asm__ __volatile__ (#op " %0, %%" #reg \
00237                           : /* nothing */ \
00238                           : "X" (mem))
00239 
00240 #define mmx_r2m(op, reg, mem) \
00241     __asm__ __volatile__ (#op " %%" #reg ", %0" \
00242                           : "=X" (mem) \
00243                           : /* nothing */ )
00244 
00245 #define mmx_r2r(op, regs, regd) \
00246     __asm__ __volatile__ (#op " %" #regs ", %" #regd)
00247 
00248 #define mmx_m2m(op, mems, memd) \
00249     __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
00250                           #op " %1, %%mm0\n\t" \
00251                           "movq %%mm0, %0" \
00252                           : "=X" (memd) \
00253                           : "X" (mems))
00254 
00255 /*
00256     1x64 MOVe Quadword
00257     (this is both a load and a store...
00258     in fact, it is the only way to store)
00259 */
00260 #define movq_m2r(var, reg)          mmx_m2r(movq, var, reg)
00261 #define movq_r2m(reg, var)          mmx_r2m(movq, reg, var)
00262 #define movq_r2r(regs, regd)        mmx_r2r(movq, regs, regd)
00263 #define movq(vars, vard) \
00264     __asm__ __volatile__ ("movq %1, %%mm0\n\t" \
00265                           "movq %%mm0, %0" \
00266                           : "=X" (vard) \
00267                           : "X" (vars))
00268 
00269 /*
00270     1x32 MOVe Doubleword
00271     (like movq, this is both load and store...
00272     but is most useful for moving things between
00273     mmx registers and ordinary registers)
00274 */
00275 #define movd_m2r(var, reg)          mmx_m2r(movd, var, reg)
00276 #define movd_r2m(reg, var)          mmx_r2m(movd, reg, var)
00277 #define movd_r2r(regs, regd)        mmx_r2r(movd, regs, regd)
00278 #define movd(vars, vard) \
00279     __asm__ __volatile__ ("movd %1, %%mm0\n\t" \
00280                           "movd %%mm0, %0" \
00281                           : "=X" (vard) \
00282                           : "X" (vars))
00283 
00284 /*
00285     2x32, 4x16, and 8x8 Parallel ADDs
00286 */
00287 #define paddd_m2r(var, reg)         mmx_m2r(paddd, var, reg)
00288 #define paddd_r2r(regs, regd)       mmx_r2r(paddd, regs, regd)
00289 #define paddd(vars, vard)           mmx_m2m(paddd, vars, vard)
00290 
00291 #define paddw_m2r(var, reg)         mmx_m2r(paddw, var, reg)
00292 #define paddw_r2r(regs, regd)       mmx_r2r(paddw, regs, regd)
00293 #define paddw(vars, vard)           mmx_m2m(paddw, vars, vard)
00294 
00295 #define paddb_m2r(var, reg)         mmx_m2r(paddb, var, reg)
00296 #define paddb_r2r(regs, regd)       mmx_r2r(paddb, regs, regd)
00297 #define paddb(vars, vard)           mmx_m2m(paddb, vars, vard)
00298 
00299 /*
00300     4x16 and 8x8 Parallel ADDs using Saturation arithmetic
00301 */
00302 #define paddsw_m2r(var, reg)        mmx_m2r(paddsw, var, reg)
00303 #define paddsw_r2r(regs, regd)      mmx_r2r(paddsw, regs, regd)
00304 #define paddsw(vars, vard)          mmx_m2m(paddsw, vars, vard)
00305 
00306 #define paddsb_m2r(var, reg)        mmx_m2r(paddsb, var, reg)
00307 #define paddsb_r2r(regs, regd)      mmx_r2r(paddsb, regs, regd)
00308 #define paddsb(vars, vard)          mmx_m2m(paddsb, vars, vard)
00309 
00310 /*
00311     4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
00312 */
00313 #define paddusw_m2r(var, reg)       mmx_m2r(paddusw, var, reg)
00314 #define paddusw_r2r(regs, regd)     mmx_r2r(paddusw, regs, regd)
00315 #define paddusw(vars, vard)         mmx_m2m(paddusw, vars, vard)
00316 
00317 #define paddusb_m2r(var, reg)       mmx_m2r(paddusb, var, reg)
00318 #define paddusb_r2r(regs, regd)     mmx_r2r(paddusb, regs, regd)
00319 #define paddusb(vars, vard)         mmx_m2m(paddusb, vars, vard)
00320 
00321 /*
00322     2x32, 4x16, and 8x8 Parallel SUBs
00323 */
00324 #define psubd_m2r(var, reg)         mmx_m2r(psubd, var, reg)
00325 #define psubd_r2r(regs, regd)       mmx_r2r(psubd, regs, regd)
00326 #define psubd(vars, vard)           mmx_m2m(psubd, vars, vard)
00327 
00328 #define psubw_m2r(var, reg)         mmx_m2r(psubw, var, reg)
00329 #define psubw_r2r(regs, regd)       mmx_r2r(psubw, regs, regd)
00330 #define psubw(vars, vard)           mmx_m2m(psubw, vars, vard)
00331 
00332 #define psubb_m2r(var, reg)         mmx_m2r(psubb, var, reg)
00333 #define psubb_r2r(regs, regd)       mmx_r2r(psubb, regs, regd)
00334 #define psubb(vars, vard)           mmx_m2m(psubb, vars, vard)
00335 
00336 /*
00337     4x16 and 8x8 Parallel SUBs using Saturation arithmetic
00338 */
00339 #define psubsw_m2r(var, reg)        mmx_m2r(psubsw, var, reg)
00340 #define psubsw_r2r(regs, regd)      mmx_r2r(psubsw, regs, regd)
00341 #define psubsw(vars, vard)          mmx_m2m(psubsw, vars, vard)
00342 
00343 #define psubsb_m2r(var, reg)        mmx_m2r(psubsb, var, reg)
00344 #define psubsb_r2r(regs, regd)      mmx_r2r(psubsb, regs, regd)
00345 #define psubsb(vars, vard)          mmx_m2m(psubsb, vars, vard)
00346 
00347 /*
00348     4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
00349 */
00350 #define psubusw_m2r(var, reg)       mmx_m2r(psubusw, var, reg)
00351 #define psubusw_r2r(regs, regd)     mmx_r2r(psubusw, regs, regd)
00352 #define psubusw(vars, vard)         mmx_m2m(psubusw, vars, vard)
00353 
00354 #define psubusb_m2r(var, reg)       mmx_m2r(psubusb, var, reg)
00355 #define psubusb_r2r(regs, regd)     mmx_r2r(psubusb, regs, regd)
00356 #define psubusb(vars, vard)         mmx_m2m(psubusb, vars, vard)
00357 
00358 /*
00359     4x16 Parallel MULs giving Low 4x16 portions of results
00360 */
00361 #define pmullw_m2r(var, reg)        mmx_m2r(pmullw, var, reg)
00362 #define pmullw_r2r(regs, regd)      mmx_r2r(pmullw, regs, regd)
00363 #define pmullw(vars, vard)          mmx_m2m(pmullw, vars, vard)
00364 
00365 /*
00366     4x16 Parallel MULs giving High 4x16 portions of results
00367 */
00368 #define pmulhw_m2r(var, reg)        mmx_m2r(pmulhw, var, reg)
00369 #define pmulhw_r2r(regs, regd)      mmx_r2r(pmulhw, regs, regd)
00370 #define pmulhw(vars, vard)          mmx_m2m(pmulhw, vars, vard)
00371 
00372 /*
00373     4x16->2x32 Parallel Mul-ADD
00374     (muls like pmullw, then adds adjacent 16-bit fields
00375     in the multiply result to make the final 2x32 result)
00376 */
00377 #define pmaddwd_m2r(var, reg)       mmx_m2r(pmaddwd, var, reg)
00378 #define pmaddwd_r2r(regs, regd)     mmx_r2r(pmaddwd, regs, regd)
00379 #define pmaddwd(vars, vard)         mmx_m2m(pmaddwd, vars, vard)
00380 
00381 /*
00382     1x64 bitwise AND
00383 */
00384 #define pand_m2r(var, reg)          mmx_m2r(pand, var, reg)
00385 #define pand_r2r(regs, regd)        mmx_r2r(pand, regs, regd)
00386 #define pand(vars, vard)            mmx_m2m(pand, vars, vard)
00387 
00388 /*
00389     1x64 bitwise AND with Not the destination
00390 */
00391 #define pandn_m2r(var, reg)         mmx_m2r(pandn, var, reg)
00392 #define pandn_r2r(regs, regd)       mmx_r2r(pandn, regs, regd)
00393 #define pandn(vars, vard)           mmx_m2m(pandn, vars, vard)
00394 
00395 /*
00396     1x64 bitwise OR
00397 */
00398 #define por_m2r(var, reg)           mmx_m2r(por, var, reg)
00399 #define por_r2r(regs, regd)         mmx_r2r(por, regs, regd)
00400 #define por(vars, vard)             mmx_m2m(por, vars, vard)
00401 
00402 /*
00403     1x64 bitwise eXclusive OR
00404 */
00405 #define pxor_m2r(var, reg)          mmx_m2r(pxor, var, reg)
00406 #define pxor_r2r(regs, regd)        mmx_r2r(pxor, regs, regd)
00407 #define pxor(vars, vard)            mmx_m2m(pxor, vars, vard)
00408 
00409 /*
00410     2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
00411     (resulting fields are either 0 or -1)
00412 */
00413 #define pcmpeqd_m2r(var, reg)       mmx_m2r(pcmpeqd, var, reg)
00414 #define pcmpeqd_r2r(regs, regd)     mmx_r2r(pcmpeqd, regs, regd)
00415 #define pcmpeqd(vars, vard)         mmx_m2m(pcmpeqd, vars, vard)
00416 
00417 #define pcmpeqw_m2r(var, reg)       mmx_m2r(pcmpeqw, var, reg)
00418 #define pcmpeqw_r2r(regs, regd)     mmx_r2r(pcmpeqw, regs, regd)
00419 #define pcmpeqw(vars, vard)         mmx_m2m(pcmpeqw, vars, vard)
00420 
00421 #define pcmpeqb_m2r(var, reg)       mmx_m2r(pcmpeqb, var, reg)
00422 #define pcmpeqb_r2r(regs, regd)     mmx_r2r(pcmpeqb, regs, regd)
00423 #define pcmpeqb(vars, vard)         mmx_m2m(pcmpeqb, vars, vard)
00424 
00425 /*
00426     2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
00427     (resulting fields are either 0 or -1)
00428 */
00429 #define pcmpgtd_m2r(var, reg)       mmx_m2r(pcmpgtd, var, reg)
00430 #define pcmpgtd_r2r(regs, regd)     mmx_r2r(pcmpgtd, regs, regd)
00431 #define pcmpgtd(vars, vard)         mmx_m2m(pcmpgtd, vars, vard)
00432 
00433 #define pcmpgtw_m2r(var, reg)       mmx_m2r(pcmpgtw, var, reg)
00434 #define pcmpgtw_r2r(regs, regd)     mmx_r2r(pcmpgtw, regs, regd)
00435 #define pcmpgtw(vars, vard)         mmx_m2m(pcmpgtw, vars, vard)
00436 
00437 #define pcmpgtb_m2r(var, reg)       mmx_m2r(pcmpgtb, var, reg)
00438 #define pcmpgtb_r2r(regs, regd)     mmx_r2r(pcmpgtb, regs, regd)
00439 #define pcmpgtb(vars, vard)         mmx_m2m(pcmpgtb, vars, vard)
00440 
00441 /*
00442     1x64, 2x32, and 4x16 Parallel Shift Left Logical
00443 */
00444 #define psllq_i2r(imm, reg)         mmx_i2r(psllq, imm, reg)
00445 #define psllq_m2r(var, reg)         mmx_m2r(psllq, var, reg)
00446 #define psllq_r2r(regs, regd)       mmx_r2r(psllq, regs, regd)
00447 #define psllq(vars, vard)           mmx_m2m(psllq, vars, vard)
00448 
00449 #define pslld_i2r(imm, reg)         mmx_i2r(pslld, imm, reg)
00450 #define pslld_m2r(var, reg)         mmx_m2r(pslld, var, reg)
00451 #define pslld_r2r(regs, regd)       mmx_r2r(pslld, regs, regd)
00452 #define pslld(vars, vard)           mmx_m2m(pslld, vars, vard)
00453 
00454 #define psllw_i2r(imm, reg)         mmx_i2r(psllw, imm, reg)
00455 #define psllw_m2r(var, reg)         mmx_m2r(psllw, var, reg)
00456 #define psllw_r2r(regs, regd)       mmx_r2r(psllw, regs, regd)
00457 #define psllw(vars, vard)           mmx_m2m(psllw, vars, vard)
00458 
00459 /*
00460     1x64, 2x32, and 4x16 Parallel Shift Right Logical
00461 */
00462 #define psrlq_i2r(imm, reg)         mmx_i2r(psrlq, imm, reg)
00463 #define psrlq_m2r(var, reg)         mmx_m2r(psrlq, var, reg)
00464 #define psrlq_r2r(regs, regd)       mmx_r2r(psrlq, regs, regd)
00465 #define psrlq(vars, vard)           mmx_m2m(psrlq, vars, vard)
00466 
00467 #define psrld_i2r(imm, reg)         mmx_i2r(psrld, imm, reg)
00468 #define psrld_m2r(var, reg)         mmx_m2r(psrld, var, reg)
00469 #define psrld_r2r(regs, regd)       mmx_r2r(psrld, regs, regd)
00470 #define psrld(vars, vard)           mmx_m2m(psrld, vars, vard)
00471 
00472 #define psrlw_i2r(imm, reg)         mmx_i2r(psrlw, imm, reg)
00473 #define psrlw_m2r(var, reg)         mmx_m2r(psrlw, var, reg)
00474 #define psrlw_r2r(regs, regd)       mmx_r2r(psrlw, regs, regd)
00475 #define psrlw(vars, vard)           mmx_m2m(psrlw, vars, vard)
00476 
00477 /*
00478     2x32 and 4x16 Parallel Shift Right Arithmetic
00479 */
00480 #define psrad_i2r(imm, reg)         mmx_i2r(psrad, imm, reg)
00481 #define psrad_m2r(var, reg)         mmx_m2r(psrad, var, reg)
00482 #define psrad_r2r(regs, regd)       mmx_r2r(psrad, regs, regd)
00483 #define psrad(vars, vard)           mmx_m2m(psrad, vars, vard)
00484 
00485 #define psraw_i2r(imm, reg)         mmx_i2r(psraw, imm, reg)
00486 #define psraw_m2r(var, reg)         mmx_m2r(psraw, var, reg)
00487 #define psraw_r2r(regs, regd)       mmx_r2r(psraw, regs, regd)
00488 #define psraw(vars, vard)           mmx_m2m(psraw, vars, vard)
00489 
00490 
00491 /*
00492     2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
00493     (packs source and dest fields into dest in that order)
00494 */
00495 #define packssdw_m2r(var, reg)      mmx_m2r(packssdw, var, reg)
00496 #define packssdw_r2r(regs, regd)    mmx_r2r(packssdw, regs, regd)
00497 #define packssdw(vars, vard)        mmx_m2m(packssdw, vars, vard)
00498 
00499 #define packsswb_m2r(var, reg)      mmx_m2r(packsswb, var, reg)
00500 #define packsswb_r2r(regs, regd)    mmx_r2r(packsswb, regs, regd)
00501 #define packsswb(vars, vard)        mmx_m2m(packsswb, vars, vard)
00502 
00503 
00504 /*
00505     4x16->8x8 PACK and Unsigned Saturate
00506     (packs source and dest fields into dest in that order)
00507 */
00508 #define packuswb_m2r(var, reg)      mmx_m2r(packuswb, var, reg)
00509 #define packuswb_r2r(regs, regd)    mmx_r2r(packuswb, regs, regd)
00510 #define packuswb(vars, vard)        mmx_m2m(packuswb, vars, vard)
00511 
00512 /*
00513     2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
00514     (interleaves low half of dest with low half of source
00515     as padding in each result field)
00516 */
00517 #define punpckldq_m2r(var, reg)     mmx_m2r(punpckldq, var, reg)
00518 #define punpckldq_r2r(regs, regd)   mmx_r2r(punpckldq, regs, regd)
00519 #define punpckldq(vars, vard)       mmx_m2m(punpckldq, vars, vard)
00520 
00521 #define punpcklwd_m2r(var, reg)     mmx_m2r(punpcklwd, var, reg)
00522 #define punpcklwd_r2r(regs, regd)   mmx_r2r(punpcklwd, regs, regd)
00523 #define punpcklwd(vars, vard)       mmx_m2m(punpcklwd, vars, vard)
00524 
00525 #define punpcklbw_m2r(var, reg)     mmx_m2r(punpcklbw, var, reg)
00526 #define punpcklbw_r2r(regs, regd)   mmx_r2r(punpcklbw, regs, regd)
00527 #define punpcklbw(vars, vard)       mmx_m2m(punpcklbw, vars, vard)
00528 
00529 /*
00530     2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
00531     (interleaves high half of dest with high half of source
00532     as padding in each result field)
00533 */
00534 #define punpckhdq_m2r(var, reg)     mmx_m2r(punpckhdq, var, reg)
00535 #define punpckhdq_r2r(regs, regd)   mmx_r2r(punpckhdq, regs, regd)
00536 #define punpckhdq(vars, vard)       mmx_m2m(punpckhdq, vars, vard)
00537 
00538 #define punpckhwd_m2r(var, reg)     mmx_m2r(punpckhwd, var, reg)
00539 #define punpckhwd_r2r(regs, regd)   mmx_r2r(punpckhwd, regs, regd)
00540 #define punpckhwd(vars, vard)       mmx_m2m(punpckhwd, vars, vard)
00541 
00542 #define punpckhbw_m2r(var, reg)     mmx_m2r(punpckhbw, var, reg)
00543 #define punpckhbw_r2r(regs, regd)   mmx_r2r(punpckhbw, regs, regd)
00544 #define punpckhbw(vars, vard)       mmx_m2m(punpckhbw, vars, vard)
00545 
00546 /*
00547     Empty MMx State
00548     (used to clean-up when going from mmx to float use
00549     of the registers that are shared by both; note that
00550     there is no float-to-mmx operation needed, because
00551     only the float tag word info is corruptible)
00552 */
00553 #define emms()                      __asm__ __volatile__ ("emms")
00554 
00555 #if defined(USE_SSE2)
00556 #define movdqu_m2r(var, reg)        mmx_m2r(movdqu, var, reg)
00557 #define movdqu_r2m(reg, var)        mmx_r2m(movdqu, reg, var)
00558 #define movdqu_r2r(regs, regd)      mmx_r2r(movdqu, regs, regd)
00559 
00560 #define movdqa_m2r(var, reg)        mmx_m2r(movdqa, var, reg)
00561 #define movdqa_r2m(reg, var)        mmx_r2m(movdqa, reg, var)
00562 #define movdqa_r2r(regs, regd)      mmx_r2r(movdqa, regs, regd)
00563 
00564 #define psrldq_i2r(imm, reg)        mmx_i2r(psrldq, imm, reg)
00565 #define psrldq_m2r(var, reg)        mmx_m2r(psrldq, var, reg)
00566 #define psrldq_r2r(regs, regd)      mmx_r2r(psrldq, regs, regd)
00567 #define psrldq(vars, vard)          mmx_m2m(psrldq, vars, vard)
00568 #endif
00569 
00570 #ifdef __cplusplus
00571 }
00572 #endif
00573 
00574 #endif
00575 
00576 #endif
00577 /*- End of file ------------------------------------------------------------*/

Generated on Fri Nov 10 09:40:24 2006 for libspandsp by  doxygen 1.5.1