#include #include #define _SHOW_SSE //#define _SHOW_BASIC_ASM double getcurrenttime() { struct timeval timestruct; gettimeofday(×truct,NULL); return (double)( 1.0 *timestruct.tv_sec + (timestruct.tv_usec/1000000.0) ); } struct float4{ float w; float x; float y; float z; }; float A[4] = {10, 20, 30, 40}; float B[4] = {100, 200, 300, 400}; float C[4]; inline void mulfvec4(float *a, float *b, float *c) { int i; for (i=0;i<4;i++) { c[i] = a[i] * b[i]; } } #define mulfvec4_SSE(a, b, c) \ { \ __asm__ __volatile__ ("movups %1, %%xmm0 \n\t" \ "movups %2, %%xmm1 \n\t" \ "mulps %%xmm0, %%xmm1 \n\t" \ "movups %%xmm1, %0 \n\t" \ :"=m" (c) \ :"m" (a), "m" (b)); \ } #define MUL_TIMES 10000000 #ifdef _SHOW_SSE int main(void) { double non_SSE_start_time = getcurrenttime(); for (long i=0;i