如果找到了对您有用的资料,烦请点击右手边的Google广告支持我继续共享知识,谢谢! http://dengpeng.spaces.live.com/

2007年8月31日星期五

c



 
/*
    Skeleton file for 433-677 Project 1
    aharwood, 2007
    Student logins:
*/
 
 
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
 
char *probname[]={"prefix sum",
     "matrix multiplication",
     "Gaussian elimination",
     "Sieve of Eratosthenes",
     "maximum contiguous sum"};
 
 
/* function to dynamically allocate a matrix of size n*n floats */
float **matrix(int n){
    float **M;
    int i;
 
    M=(float **)malloc(sizeof(float *)*n);
    if(M==NULL){
        printf("failed to allocate matrix rows\n");
        exit(1);
    }
 
    for(i=0;i<n;i++){
        M[i]=(float *)malloc(sizeof(float)*n);
        if(M[i]==NULL){
            printf("failed to allocate matrix columns\n");
            exit(1);
        }
    }
 
    return M;
 
}
 
int main(int argc, char** argv){
    int t;  // problem type
    int n;  // size of the problem
    int P;    // number of threads
    int *ia; // array for ints
    float *fa; // array for floats
    float **A,**B,**C; // matrices
 
 
    int sqrtn; // the square root of n
    int numprimes; // the number of primes <= n
    float m;    // for Guassian elimination
    float max;    // for maximum contiguous sum
    float sum;    // for maximum contiguous sum
    int start,end; // for maximum contiguous sum
    int i,j,k;  // dummy index variables
    
    int psd;
    int psk;
 
    if(argc != 4) {
        printf("usage: proj1 problem_type num_threads problem_size\n");
        printf("problem_type: 1 - prefix sum\n");
        printf("problem_type: 2 - matrix multiplication\n");
        printf("problem_type: 3 - Gaussian elimination\n");
        printf("problem_type: 4 - Sieve of Eratosthenes\n");
        printf("problem_type: 5 - maximum contiguous sum\n");
        exit(1);
    }    
    sscanf(argv[1],"%i",&t);
    sscanf(argv[2],"%i",&P);
    sscanf(argv[3],"%i",&n);        
    
 
    printf("number of processors: %d\n", omp_get_num_procs());
 
    omp_set_num_threads(P);
 
        printf("max threads: %d\n", omp_get_max_threads());
 
    printf("problem: %s\n",probname[t-1]);
    printf("array size %i\n",n);
 
    sqrtn=(int)ceil(sqrt(n));
    
    if(P==1){
        /*************************/
        /* Sequential algorithms */
        /*************************/
        
        
        switch(t){
        case 1:
            /* Initialize the array */
            fa=(float *)malloc(sizeof(float)*n);
            if(fa==NULL){
                printf("failed to allocate array\n");
                exit(1);
            }
            for(i=0;i<n;i++) fa[i]=i;    
 
            /* Compute the prefix sum */
            for(i=1;i<n;i++) fa[i]=fa[i]+fa[i-1];
            
            // uncomment to check
            printf("last value %f\n",fa[n-1]);
            break;
 
        case 2:
            /* Initialize the arrays */
            A=matrix(n);
            B=matrix(n);
            C=matrix(n);
            for(i=0;i<n;i++)
                for(j=0;j<n;j++){
                    A[i][j]=i+j;    // some dummy values
                    B[i][j]=i-j;    // some dummy values
                }
 
            /* Compute the matrix multiplication */
            for(i=0;i<n;i++)
                for(j=0;j<n;j++){
                      C[i][j]=0;
                      for(k=0;k<n;k++)
                        C[i][j]+=A[i][k]*B[k][j];
                }
    
            // uncomment to print answer for small arrays
            //for(i=0;i<n;i++)
            //    for(j=0;j<n;j++)
            //        printf("C[%d][%d]=%f\n",i,j,C[i][j]);
 
            break;
 
        case 3:
            /* Initialize the arrays */
            A=matrix(n);
            fa=(float *)malloc(sizeof(float)*n);
            if(fa==NULL){
                printf("failed to allocate array\n");
                exit(1);
            }
            
            for(i=0;i<n;i++){
                fa[i]=2.0*(1.0+i);    // dummy value
                for(j=0;j<n;j++)
                    A[i][j]=10.0+(i+2.0)/(j+1.0); // dummy value
            }
 
            /* Do Gaussian elimination */
            for(i=0;i<n-1;i++)
                  for(j=i+1;j<n;j++){
                    m=A[j][i]/A[i][i];
                    for(k=i;k<n;k++)
                          A[j][k]=A[j][k]-A[i][k]*m;
                    fa[j]=fa[j]-fa[i]*m;
                  }
 
            // uncomment to print resulting array for small arrays
            //for(i=0;i<n;i++)
                //for(j=0;j<n;j++)
                    //printf("A[%d][%d]=%f\n",i,j,A[i][j]);
 
            break;
 
        case 4:
            /* Initialize the array */
            ia=(int *)malloc(sizeof(int)*n);
            if(ia==NULL){
                printf("failed to allocate array\n");
                exit(1);
            }
            for(i=0;i<n;i++) ia[i]=i;
    
            /* Use the Sieve of Eratosthenes to find prime numbers */
            for(i=2;i<=sqrtn;i++){
                if(ia[i]>0)
                    for(j=i*i;j<n;j+=i)
                        ia[j]=0;
            }        
 
            //Uncomment to print the primes, for small sizes of n only
            //for(i=0;i<n;i++) if(ia[i]>0) printf("%i is a prime\n",ia[i]);
 
            /* Compact the array */
            numprimes=0;
            for(j=0;j<n;j++)
                if(ia[j]>0){
                    ia[numprimes]=ia[j];
                    numprimes++;
                }
            
 
            //Uncomment to print the number of primes
            //printf("%d primes less than or equal to %d\n", numprimes, n);
 
            break;
 
        case 5:
            /* Initialize the array */
            fa=(float *)malloc(sizeof(float)*n);
            if(fa==NULL){
                printf("failed to allocate array\n");
                exit(1);
            }
            srand48(100); // dummy seed value for random numbers
            for(i=0;i<n;i++) fa[i]=drand48()-0.5;
 
 
            /* Find maximum contiguous sum */
            sum=0.0;
            max=0.0;
            start=0;
            end=0;    
            i=0;
            for(j=0;j<n;j++){
                sum+=fa[j];
                if(sum>max){
                    max=sum;
                    start=i;
                    end=j;
                } else if (sum<0.0){
                    i=j+1;
                    sum=0;
                }
            }
        
            //uncomment to print answer    
            //printf("maximum contiguous sum = %f\n",max);
        
            break;        
        }
 
    } else {
        /***********************/
        /* Parallel algorithms */
        /***********************/
        switch(t){
        case 1:    /* Put parallel prefix sum algorithm here */
            /* Initialize the array */
            omp_set_num_threads(P);
            
            //printf("Parallel Prefix Sum\n");
            fa=(float *)malloc(sizeof(float)*n);
            if(fa==NULL){
                printf("failed to allocate array\n");
                exit(1);
            }
            
            /* Some initializations */
            for (i=0; i <n; i++)
                fa[i] = i;
            
            for(psd=0;psd<=(int)log2(n)-1;psd++)
            {
                int var1=(int)pow(2,psd+1);
                #pragma omp parallel for
                //for(psk=0;psk<n-1;psk+=(int)pow(2,psd+1))
                for(psk=0;psk<n-1;psk+=var1)
                {
                    fa[psk+var1-1]=fa[psk+(int)pow(2,psd)-1]+fa[psk+var1-1];
                    //printf("fa[%d]=%f\n",psk+(int)pow(2,psd+1)-1,fa[psk+(int)pow(2,psd+1)-1]);
                }
    
            }
            fa[n-1]=0.0;
            for(psd=(int)log2(n);psd>=0;psd--)
            {
                int var2=(int)pow(2,psd+1);
                int var3=(int)pow(2,psd);
                
                #pragma omp parallel for
                //for(psk=0;psk<n-1;psk+=(int)pow(2,psd+1))
                for(psk=0;psk<n-1;psk+=var2)
                {
                    float t = fa[psk+var3-1];
                    fa[psk+var3-1]=fa[psk+var2-1];
                    fa[psk+var2-1]= t + fa[psk+var2-1];
                    //printf("fa[%d]=%f\n",psk+(int)pow(2,psd)-1,fa[psk+(int)pow(2,psd+1)-1]);
                }
            }
            
            printf("last value %f\n",fa[n-1]);
            
            //for(int jj=0;jj<8;jj++)
            //{
                //printf("fa[%d]=%f\n",jj,fa[jj]);
            //}
            break;
 
        case 2: /* Put parallel matrix multplication algorithm here */
            
            break;
 
        case 3: /* Put parallel Gaussian elimination algorithm here */
            /* Initialize the arrays */
            omp_set_num_threads(P);
            
            A=matrix(n);
            fa=(float *)malloc(sizeof(float)*n);
            if(fa==NULL){
                printf("failed to allocate array\n");
                exit(1);
            }
            
            for(i=0;i<n;i++){
                fa[i]=2.0*(1.0+i);    // dummy value
                for(j=0;j<n;j++)
                    A[i][j]=10.0+(i+2.0)/(j+1.0); // dummy value
            }
 
            /* Do Gaussian elimination */
            for(i=0;i<n-1;i++)
                #pragma omp parallel for private(j,k,m)
                  for(j=i+1;j<n;j++){
                    m=A[j][i]/A[i][i];
                    //#pragma omp parallel for
                    for(k=i;k<n;k++)
                          A[j][k]=A[j][k]-A[i][k]*m;
                    fa[j]=fa[j]-fa[i]*m;
                  }
 
            // uncomment to print resulting array for small arrays
            //for(i=0;i<n;i++)
                //for(j=0;j<n;j++)
                    //printf("A[%d][%d]=%f\n",i,j,A[i][j]);
 
            break;
 
        case 4: /* Put parallel Sieve of Eratosthenes algoritm here */
 
            break;
        
        case 5: /* Put parallel maximum contiguous sum algorithm here */
 
            break;
        }
 
    }
 
    exit(0);
}
 
v