Commit 1441dfdf authored by John Leidel's avatar John Leidel

adding assignment source

parents
#!?bin/sh
CC=gcc
CFLAGS=-O3
COPT=-std=c99
OMPFLAGS=-fopenmp
all: dgemm_seq dgemm_l1 dgemm_l2 dgemm_l3
dgemm_seq: main.o dgemm_seq.o
$(CC) $(CFLAGS) -o $@ main.o dgemm_seq.o
dgemm_l1: main.o dgemm_l1.o
$(CC) $(CFLAGS) $(OMPFLAGS) -o $@ main.o dgemm_l1.o
dgemm_l2: main.o dgemm_l2.o
$(CC) $(CFLAGS) $(OMPFLAGS) -o $@ main.o dgemm_l2.o
dgemm_l3: main.o dgemm_l3.o
$(CC) $(CFLAGS) $(OMPFLAGS) -o $@ main.o dgemm_l3.o
dgemm_seq.o: ./src/dgemm_seq.c
$(CC) $(CFLAGS) $(COPT) -c -o $@ $<
dgemm_l1.o: ./src/dgemm_l1.c
$(CC) $(CFLAGS) $(COPT) $(OMPFLAGS) -c -o $@ $<
dgemm_l2.o: ./src/dgemm_l1.c
$(CC) $(CFLAGS) $(COPT) $(OMPFLAGS) -c -o $@ $<
dgemm_l3.o: ./src/dgemm_l1.c
$(CC) $(CFLAGS) $(COPT) $(OMPFLAGS) -c -o $@ $<
main.o: ./src/main.c
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -Rf dgemm_seq dgemm_l1 dgemm_l2 dgemm_l3 ./*.o
CS5332 OpenMP Assignment
-----------------------------------
john.leidel@ttu.edu
http://discl.cs.ttu.edu/gitlab/jleidel/cs5332_omp.git
-----------------------------------
The goal of this assignment is two fold.
First, we seek to utilize our lecture notes
from lecture 10 and implement a parallel
matrix-matrix multiplication kernel.
Second, we seek to observe the performance
differences depending upon where the directives
are implemented.
The source directory contains several files
- main.c : driver
- dgemm_seq.c : sequential DGEMM
- dgemm_l1.c : L1 parallel solver
- dgemm_l2.c : L2 parallel solver
- dgemm_l3.c : L3 parallel solver
For each of the {L1,L2,L3} parallel solvers,
implement the necessary OpenMP clauses on
the 1st, 2nd and 3rd loop nests, respectively.
Make sure that the private and shared data
variables are correctly specified.
Execute the sequential version (dgemm_seq)
and record the timing and performance.
Execute each of the parallel versions
using 1-8 threads and record the timing
and performance for each. The driver
will print the timing and the performance.
Use the included makefile to build the
different versions. (Just type 'make').
-----------------------------------
EOF
/*
* _DGMEMM_L1_C_
*
* L1 OpenMP DGEMM
*
* CS5332 OpenMP Programming Assignment
*
* John Leidel: john.leidel@ttu.edu
* http://discl.cs.ttu.edu/gitlab/jleidel/cs5332_omp.git*
*
*/
extern void naive_dgemm( double **restrict Result,
double **restrict A,
double **restrict B,
int N ){
/* vars */
int i = 0;
int j = 0;
int k = 0;
double sum = 0.;
/* --- */
for( i=0; i<N; i++ ){
for( j=0; j<N; j++ ){
sum = 0.;
for( k=0; k<N; k++ ){
sum = sum + A[i][k] * B[k][j];
}
Result[i][j] = sum;
}
}
}
/* EOF */
/*
* _DGMEMM_L2_C_
*
* L2 OpenMP DGEMM
*
* CS5332 OpenMP Programming Assignment
*
* John Leidel: john.leidel@ttu.edu
* http://discl.cs.ttu.edu/gitlab/jleidel/cs5332_omp.git*
*
*/
extern void naive_dgemm( double **restrict Result,
double **restrict A,
double **restrict B,
int N ){
/* vars */
int i = 0;
int j = 0;
int k = 0;
double sum = 0.;
/* --- */
for( i=0; i<N; i++ ){
for( j=0; j<N; j++ ){
sum = 0.;
for( k=0; k<N; k++ ){
sum = sum + A[i][k] * B[k][j];
}
Result[i][j] = sum;
}
}
}
/* EOF */
/*
* _DGMEMM_L3_C_
*
* L3 OpenMP DGEMM
*
* CS5332 OpenMP Programming Assignment
*
* John Leidel: john.leidel@ttu.edu
* http://discl.cs.ttu.edu/gitlab/jleidel/cs5332_omp.git*
*
*/
extern void naive_dgemm( double **restrict Result,
double **restrict A,
double **restrict B,
int N ){
/* vars */
int i = 0;
int j = 0;
int k = 0;
double sum = 0.;
/* --- */
for( i=0; i<N; i++ ){
for( j=0; j<N; j++ ){
sum = 0.;
for( k=0; k<N; k++ ){
sum = sum + A[i][k] * B[k][j];
}
Result[i][j] = sum;
}
}
}
/* EOF */
/*
* _DGMEMM_SEQ_C_
*
* Sequential DGEMM
*
* CS5332 OpenMP Programming Assignment
*
* John Leidel: john.leidel@ttu.edu
* http://discl.cs.ttu.edu/gitlab/jleidel/cs5332_omp.git*
*
*/
extern void naive_dgemm( double **restrict Result,
double **restrict A,
double **restrict B,
int N ){
/* vars */
int i = 0;
int j = 0;
int k = 0;
double sum = 0.;
/* --- */
for( i=0; i<N; i++ ){
for( j=0; j<N; j++ ){
sum = 0.;
for( k=0; k<N; k++ ){
sum = sum + A[i][k] * B[k][j];
}
Result[i][j] = sum;
}
}
}
/* EOF */
/*
* _MAIN_C_
*
* CS5332 OpenMP Programming Assignment
*
* John Leidel: john.leidel@ttu.edu
* http://discl.cs.ttu.edu/gitlab/jleidel/cs5332_omp.git
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/time.h>
/* --------------------------------- MACROS */
#define X_LEN 1024
#define Y_LEN 1024
/* --------------------------------- FUNCTION PROTOTYPES */
extern void naive_dgemm( double **Result,
double **A,
double **B,
int N );
/* --------------------------------- MYSECOND */
double mysecond(){
struct timeval tp;
struct timezone tzp;
gettimeofday( &tp, &tzp );
return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
}
/* --------------------------------- MAIN */
int main( int argc, char **argv ){
/* vars */
double *Ap = NULL; /* A storage */
double *Bp = NULL; /* B storage */
double *Rp = NULL; /* R storage */
double **A = NULL; /* A pointer storage */
double **B = NULL; /* B pointer storage */
double **R = NULL; /* R pointer storage */
double start_t = 0.;
double end_t = 0.;
double flop = 0.;
unsigned long long i = 0;
pid_t pid = -1;
/* ---- */
flop = ((double)(X_LEN)*(double)(Y_LEN))*((2.0*(double)(X_LEN))-1.0);
/* -- allocate memory */
Ap = malloc( sizeof( double ) * X_LEN * Y_LEN );
if( Ap == NULL ){
printf( "Error : could not allocate memory for Ap\n" );
return -1;
}
Bp = malloc( sizeof( double ) * X_LEN * Y_LEN );
if( Bp == NULL ){
printf( "Error : could not allocate memory for Bp\n" );
return -1;
}
Rp = malloc( sizeof( double ) * X_LEN * Y_LEN );
if( Rp == NULL ){
printf( "Error : could not allocate memory for Rp\n" );
return -1;
}
A = malloc( sizeof( double * ) * X_LEN );
if( A == NULL ){
printf( "Error : could not allocate memory for A\n" );
return -1;
}
B = malloc( sizeof( double * ) * X_LEN );
if( B == NULL ){
printf( "Error : could not allocate memory for B\n" );
return -1;
}
R = malloc( sizeof( double * ) * X_LEN );
if( R == NULL ){
printf( "Error : could not allocate memory for R\n" );
return -1;
}
/* init the memory with nonzero values */
pid = getpid();
for( i=0; i<(X_LEN*Y_LEN); i++ ){
Ap[i] = (double)(pid)*(double)(i);
Bp[i] = Ap[i]*2.0;
Rp[i] = 0.;
}
/* init our 2D pointers */
for( i=0; i<X_LEN; i++ ){
A[i] = &(Ap[i*Y_LEN]);
B[i] = &(Bp[i*Y_LEN]);
R[i] = &(Rp[i*Y_LEN]);
}
/* execute the benchmark */
start_t = mysecond();
naive_dgemm( R, A, B, X_LEN );
end_t = mysecond();
printf( "Executed DGMEM in %f seconds; %f Gflops\n",
(end_t-start_t),
(flop/1000000000.0)/(end_t-start_t) );
/* free the memory */
free( A );
free( B );
free( R );
free( Ap );
free( Bp );
free( Rp );
return 0;
}
/* EOF */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment