/*
Skeleton file for 433-677 Project 1
aharwood, 2007
Student logins:
*/
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
char *probname[]={"prefix sum",
"matrix multiplication",
"Gaussian elimination",
"Sieve of Eratosthenes",
"maximum contiguous sum"};
/* function to dynamically allocate a matrix of size n*n floats */
float **matrix(int n){
float **M;
int i;
M=(float **)malloc(sizeof(float *)*n);
if(M==NULL){
printf("failed to allocate matrix rows\n");
exit(1);
}
for(i=0;i<n;i++){
M[i]=(float *)malloc(sizeof(float)*n);
if(M[i]==NULL){
printf("failed to allocate matrix columns\n");
exit(1);
}
}
return M;
}
int main(int argc, char** argv){
int t; // problem type
int n; // size of the problem
int P; // number of threads
int *ia; // array for ints
float *fa; // array for floats
float **A,**B,**C; // matrices
int sqrtn; // the square root of n
int numprimes; // the number of primes <= n
float m; // for Guassian elimination
float max; // for maximum contiguous sum
float sum; // for maximum contiguous sum
int start,end; // for maximum contiguous sum
int i,j,k; // dummy index variables
int psd;
int psk;
if(argc != 4) {
printf("usage: proj1 problem_type num_threads problem_size\n");
printf("problem_type: 1 - prefix sum\n");
printf("problem_type: 2 - matrix multiplication\n");
printf("problem_type: 3 - Gaussian elimination\n");
printf("problem_type: 4 - Sieve of Eratosthenes\n");
printf("problem_type: 5 - maximum contiguous sum\n");
exit(1);
}
sscanf(argv[1],"%i",&t);
sscanf(argv[2],"%i",&P);
sscanf(argv[3],"%i",&n);
printf("number of processors: %d\n", omp_get_num_procs());
omp_set_num_threads(P);
printf("max threads: %d\n", omp_get_max_threads());
printf("problem: %s\n",probname[t-1]);
printf("array size %i\n",n);
sqrtn=(int)ceil(sqrt(n));
if(P==1){
/*************************/
/* Sequential algorithms */
/*************************/
switch(t){
case 1:
/* Initialize the array */
fa=(float *)malloc(sizeof(float)*n);
if(fa==NULL){
printf("failed to allocate array\n");
exit(1);
}
for(i=0;i<n;i++) fa[i]=i;
/* Compute the prefix sum */
for(i=1;i<n;i++) fa[i]=fa[i]+fa[i-1];
// uncomment to check
printf("last value %f\n",fa[n-1]);
break;
case 2:
/* Initialize the arrays */
A=matrix(n);
B=matrix(n);
C=matrix(n);
for(i=0;i<n;i++)
for(j=0;j<n;j++){
A[i][j]=i+j; // some dummy values
B[i][j]=i-j; // some dummy values
}
/* Compute the matrix multiplication */
for(i=0;i<n;i++)
for(j=0;j<n;j++){
C[i][j]=0;
for(k=0;k<n;k++)
C[i][j]+=A[i][k]*B[k][j];
}
// uncomment to print answer for small arrays
//for(i=0;i<n;i++)
// for(j=0;j<n;j++)
// printf("C[%d][%d]=%f\n",i,j,C[i][j]);
break;
case 3:
/* Initialize the arrays */
A=matrix(n);
fa=(float *)malloc(sizeof(float)*n);
if(fa==NULL){
printf("failed to allocate array\n");
exit(1);
}
for(i=0;i<n;i++){
fa[i]=2.0*(1.0+i); // dummy value
for(j=0;j<n;j++)
A[i][j]=10.0+(i+2.0)/(j+1.0); // dummy value
}
/* Do Gaussian elimination */
for(i=0;i<n-1;i++)
for(j=i+1;j<n;j++){
m=A[j][i]/A[i][i];
for(k=i;k<n;k++)
A[j][k]=A[j][k]-A[i][k]*m;
fa[j]=fa[j]-fa[i]*m;
}
// uncomment to print resulting array for small arrays
//for(i=0;i<n;i++)
//for(j=0;j<n;j++)
//printf("A[%d][%d]=%f\n",i,j,A[i][j]);
break;
case 4:
/* Initialize the array */
ia=(int *)malloc(sizeof(int)*n);
if(ia==NULL){
printf("failed to allocate array\n");
exit(1);
}
for(i=0;i<n;i++) ia[i]=i;
/* Use the Sieve of Eratosthenes to find prime numbers */
for(i=2;i<=sqrtn;i++){
if(ia[i]>0)
for(j=i*i;j<n;j+=i)
ia[j]=0;
}
//Uncomment to print the primes, for small sizes of n only
//for(i=0;i<n;i++) if(ia[i]>0) printf("%i is a prime\n",ia[i]);
/* Compact the array */
numprimes=0;
for(j=0;j<n;j++)
if(ia[j]>0){
ia[numprimes]=ia[j];
numprimes++;
}
//Uncomment to print the number of primes
//printf("%d primes less than or equal to %d\n", numprimes, n);
break;
case 5:
/* Initialize the array */
fa=(float *)malloc(sizeof(float)*n);
if(fa==NULL){
printf("failed to allocate array\n");
exit(1);
}
srand48(100); // dummy seed value for random numbers
for(i=0;i<n;i++) fa[i]=drand48()-0.5;
/* Find maximum contiguous sum */
sum=0.0;
max=0.0;
start=0;
end=0;
i=0;
for(j=0;j<n;j++){
sum+=fa[j];
if(sum>max){
max=sum;
start=i;
end=j;
} else if (sum<0.0){
i=j+1;
sum=0;
}
}
//uncomment to print answer
//printf("maximum contiguous sum = %f\n",max);
break;
}
} else {
/***********************/
/* Parallel algorithms */
/***********************/
switch(t){
case 1: /* Put parallel prefix sum algorithm here */
/* Initialize the array */
omp_set_num_threads(P);
//printf("Parallel Prefix Sum\n");
fa=(float *)malloc(sizeof(float)*n);
if(fa==NULL){
printf("failed to allocate array\n");
exit(1);
}
/* Some initializations */
for (i=0; i <n; i++)
fa[i] = i;
for(psd=0;psd<=(int)log2(n)-1;psd++)
{
int var1=(int)pow(2,psd+1);
#pragma omp parallel for
//for(psk=0;psk<n-1;psk+=(int)pow(2,psd+1))
for(psk=0;psk<n-1;psk+=var1)
{
fa[psk+var1-1]=fa[psk+(int)pow(2,psd)-1]+fa[psk+var1-1];
//printf("fa[%d]=%f\n",psk+(int)pow(2,psd+1)-1,fa[psk+(int)pow(2,psd+1)-1]);
}
}
fa[n-1]=0.0;
for(psd=(int)log2(n);psd>=0;psd--)
{
int var2=(int)pow(2,psd+1);
int var3=(int)pow(2,psd);
#pragma omp parallel for
//for(psk=0;psk<n-1;psk+=(int)pow(2,psd+1))
for(psk=0;psk<n-1;psk+=var2)
{
float t = fa[psk+var3-1];
fa[psk+var3-1]=fa[psk+var2-1];
fa[psk+var2-1]= t + fa[psk+var2-1];
//printf("fa[%d]=%f\n",psk+(int)pow(2,psd)-1,fa[psk+(int)pow(2,psd+1)-1]);
}
}
printf("last value %f\n",fa[n-1]);
//for(int jj=0;jj<8;jj++)
//{
//printf("fa[%d]=%f\n",jj,fa[jj]);
//}
break;
case 2: /* Put parallel matrix multplication algorithm here */
break;
case 3: /* Put parallel Gaussian elimination algorithm here */
/* Initialize the arrays */
omp_set_num_threads(P);
A=matrix(n);
fa=(float *)malloc(sizeof(float)*n);
if(fa==NULL){
printf("failed to allocate array\n");
exit(1);
}
for(i=0;i<n;i++){
fa[i]=2.0*(1.0+i); // dummy value
for(j=0;j<n;j++)
A[i][j]=10.0+(i+2.0)/(j+1.0); // dummy value
}
/* Do Gaussian elimination */
for(i=0;i<n-1;i++)
#pragma omp parallel for private(j,k,m)
for(j=i+1;j<n;j++){
m=A[j][i]/A[i][i];
//#pragma omp parallel for
for(k=i;k<n;k++)
A[j][k]=A[j][k]-A[i][k]*m;
fa[j]=fa[j]-fa[i]*m;
}
// uncomment to print resulting array for small arrays
//for(i=0;i<n;i++)
//for(j=0;j<n;j++)
//printf("A[%d][%d]=%f\n",i,j,A[i][j]);
break;
case 4: /* Put parallel Sieve of Eratosthenes algoritm here */
break;
case 5: /* Put parallel maximum contiguous sum algorithm here */
break;
}
}
exit(0);
}
v