#include<pmmintrin.h>voidsse_mul(int n,float a[][maxN],float b[][maxN],float c[][maxN]){
__m128 t1, t2, sum;for(int i =0; i < n;++i)for(int j =0; j < i;++j)swap(b[i][j], b[j][i]);for(int i =0; i < n;++i){for(int j =0; j < n;++j){
c[i][j]=0.0;
sum =_mm_setzero_ps();for(int k = n -4; k >=0; k -=4){
t1 =_mm_loadu_ps(a[i]+ k);
t2 =_mm_loadu_ps(b[j]+ k);
t1 =_mm_mul_ps(t1, t2);
sum =_mm_add_ps(sum, t1);}
sum =_mm_hadd_ps(sum, sum);
sum =_mm_hadd_ps(sum, sum);_mm_store_ss(c[i]+ j, sum);for(int k =(n %4)-1; k >=0;--k){
c[i][j]+= a[i][k]* b[j][k];}}}for(int i =0; i < n;++i)for(int j =0; j < i;++j)swap(b[i][j], b[j][i]);}
Pthread每个子线程输出字符串
#include<stdio.h>#include<stdlib.h>#include<pthread.h>int thread_count;pthread_mutex_init(&mutex,NULL);void*Hello(void* rank);intmain(int argc,char* argv[]){long thread;
pthread_t* thread_handles;//get number of threads from cmd
thread_count =strtol(argv[1],NULL,10);
thread_handles =(pthread_t*)malloc(thread_count*sizeof(pthread_t));for(thread =0; thread < thread_count; thread++)pthread_create(&thread_handles[thread],NULL,Hello,(void*)thread);printf("Hello from the main thread\n");for(thread =0; thread < thread_count; thread++)pthread_join(thread_handles[thread],NULL);free(thread_handles);return0;}void*Hello(void* rank){long my_rank =(long)rank;pthread_mutex_lock(&mutex);printf("Hello from thread %ld of %d\n", my_rank, thread_count);pthread_mutex_unlock(&mutex);returnNULL;}
OpenMP部分
#ifdef_OPENMP#include<omp.h>#endifvoidHello(void);intmain(int argc,char* argv[]){int thread_count =strtol(argv[1],NULL,10);#pragmaomp parallel num_threads(thread_count)Hello();return0;}voidHello(void){#ifdef_OPENMPint my_rank =omp_get_thread_num();int thread_count =omp_get_num_threads();#elseint my_rank =0;int thread_count =1;#endifprintf("Hello from thread %d of %d\n",my_rank,thread_count);}
MPI Hello
#include<mpi.h>intmain(int argc,char* argv){int myid, numprocs;int namelen;char processor_name[MPI_MAX_PROCESSOR_NAME];MPI_Init(&argc,&argv);MPI_Comm_rank(MPI_COMM_WORLD,&myid);//MPI_Comm_size(MPI_COMM_WORLD,&numprocs);//这两个对称着记MPI_Get_processor_name(processor_name,&namelen);fprintf(stderr,"Hello World! Process %d of %d on %s\n",
myid, numprocs, processor_name);MPI_Finalize();//return0;}