Please NO PARTIAL answers.... Thanks High Performance Computing Optimization Con
ID: 3890123 • Letter: P
Question
Please NO PARTIAL answers....
Thanks
High Performance Computing
Optimization
Consider the following code. Using the principles we discussed (1. keep memory local, stride through arrays in order when possible; 2. move invariant calculations out of loops; reduce use of function calls within loops by either 3. inlining, 4. strength reduction, or 5. passing arrays rather than elements of arrays to functions; 6. merging simple loops to increase the number of lines per loop; 7. reducing conditions inside of loops; and 8. considering use of static over dynamic allocation for arrays in time-consuming loops) optimize the following code.
Add comments to the code stating why you are making each change you make, and for any of the above principles you do not employ, state in your final writeup why they do not apply to this code. Using both the built in gcc and icc compilers on puma, with optimization levels -O0 (no optimization), -O1, and -O2. Show your elapsed time for optimized and non-optimized codes.
comments and a writeup of your timing results and any optimization techniques not employed as a plain text file. Save code as hw2_lastname.c and writeup as hw2_lastname.txt.
=====================================================================
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
int main(int argc, char ** argv) {
int n = 10000000;
double *x = (double *)malloc(sizeof(double)*n);
double *y = (double *)malloc(sizeof(double)*n);
double *z = (double *)malloc(sizeof(double)*n);
double *z2 = (double *)malloc(sizeof(double)*n);
double min=0.0;
double max = 10.0;
double step = (min-max)/(double)(n-1);
int i,iter;
int itmax = 100;
clock_t start, stop;
double elapsed_time;
start = clock();
for(i=0;i<n;i++) {
x[i] = min+(double)i*step;
}
for(i=0;i<n;i++) {
y[i] = exp(-(pow(x[i]-5.0,2.0)));
}
for(i=0;i<n;i++) {
double pi = acos(-1.0);
z[i] = y[i]*sin((x[i]-min)/(max-min)*2.0*pi);
}
for(iter=0;iter<itmax;iter++) {
z2[0] = 0.5*(z[0]+z[1]);
z2[n-1] = 0.5*(z[n-1]+z[n-2]);
for(i=1;i<n-1;i++) {
z2[i] = (1.0/3.0)*(z[i-1]+z[i]+z[i+1]);
}
for(i=0;i<n;i++) {
z[i] = z2[i]*y[i];
}
}
stop = clock();
elapsed_time = (stop-start)/(double)CLOCKS_PER_SEC;
printf("ELAPSED TIME = %f ",elapsed_time);
free(x);
free(y);
free(z);
free(z2);
}
Explanation / Answer
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
int main(int argc, char ** argv) {
int n = 10000000;
double *x = (double *)malloc(sizeof(double)*n);
double *y = (double *)malloc(sizeof(double)*n);
double *z = (double *)malloc(sizeof(double)*n);
double *z2 = (double *)malloc(sizeof(double)*n);
double min=0.0;
double max = 10.0;
double step = (min-max)/(double)(n-1);
int i,iter;
int itmax = 100;
clock_t start, stop;
double elapsed_time;
start = clock();
double pi = acos(-1.0);//loop invariant
for(i=0;i<n;i++) {
x[i] = min+(double)i*step;//merging loops for simple calculations
y[i] = exp(-(pow(x[i]-5.0,2.0)));//this is dependent on x but on same index
z[i] = y[i]*sin((x[i]-min)/(max-min)*2.0*pi);//this is dependent on x and y but same index
}
double t=1.0/3.0;// its a invariant calculation so we can use a variable
for(iter=0;iter<itmax;iter++) {
if(iter==itmax-1)//as these z2[0],z2[n-1] values will be updated at every cycle but their final values will be when z //array will second last time get modified
{
z2[0] = 0.5*(z[0]+z[1]);
z2[n-1] = 0.5*(z[n-1]+z[n-2]);
}
for(i=1;i<n-1;i++) {
z2[i] = t*(z[i-1]+z[i]+z[i+1]);
}
for(i=0;i<n;i++) {
z[i] = z2[i]*y[i];
}
}
stop = clock();
elapsed_time = (stop-start)/(double)CLOCKS_PER_SEC;
printf("ELAPSED TIME = %f ",elapsed_time);
free(x);
free(y);
free(z);
free(z2);
}
time elasped for code optimization at level 0 is 20.846000 s ,without optimization at level 0 22.263000 s
time elasped for code optimization at level 1 is 11.826000s , without optimization at level 1 10.445000 s
time elasped for code optimization at level 1 is 10.555000s , without optimization at level 1 10.769000 s