Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if(APPLE)
endif()


add_executable(matmul main_ans.cpp)
add_executable(matmul main.cpp)


if(OpenMP_CXX_FOUND)
Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,32 @@
**Points**: 100

---
### Results table
| Test Case | Dimensions (m × n × p) | Naive Time (s) | Blocked Time (s) | Parallel Time (s) | Blocked Speedup | Parallel Speedup |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 0 | 64 × 64 × 64 | 0.000999928 | 0.00199986 | 0.00200009 | 0.5× | 0.49994× |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 1 | 128 x 64 x 128 | 0.00300002 | 0.00500011 | 0.000999928 | 0.59999x | 3.00024x |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 2 | 100 x 128 x 56 | 0.00200009 | 0.00300002 | 0.000999928 | 0.666693x | 2.00024x |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 3 | 128 x 64 x 128 | 0.00300002 | 0.00500011 | 0.00100017 | 0.59999x | 2.99952x |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 4 | 32 x 128 x 32 | 0.000999928 | 0.000999928 | 0 | 1x | infx |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 5 | 200 x 100 x 256 | 0.0190001 | 0.0249999 | 0.00500011 | 0.760006x | 3.79992x |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 6 | 256 X 256 X 256 | 0.0580001 | 0.0799999 | 0.013 | 0.725002x | 4.46154x |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 7 | 256 X 300 256 | 0.0669999 | 0.095 | 0.0170002 | 0.705262x | 3.94113x |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 8 | 64 x 128 x 64 | 0.00200009 | 0.00299978 | 0.000999928 | 0.666746x | 2.00024x |
|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------|
| 9 | 256 x 256 x 257 | 0.0580001 | 0.0819998 | 0.0140002 | 0.70732x | 4.14281x |

- **Block size**: 64
- **OPM_NUM_THREADS**: 8
---
### Assignment Overview

Welcome to the second homework assignment of the Parallel Programming course! In Assignment 1, you implemented a naive
Expand Down
220 changes: 201 additions & 19 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,110 @@
#include <string>
#include <omp.h>
#include <cmath>
#include <iomanip>

void naive_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) {
//TODO : Implement naive matrix multiplication
void naive_matmul(float *C, float *A, float *B, int m, int n, int p) {
// Initialize elements in C
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
C[i * p + j] = 0;
}
}
// Implement naive matrix multiplication C = A x B
// A is m x n, B is n x p, C is m x p
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
for (int k = 0; k < n; ++k) {
// Access elements by Row-Major indexing, multiply using given formula
C[i * p + j] += A[i * n + k] * B[k * p + j];
}
}
}
}

void blocked_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p, uint32_t block_size) {
// TODO: Implement blocked matrix multiplication
// A is m x n, B is n x p, C is m x p
void blocked_matmul(float *C, float *A, float *B, int m, int n, int p, int block_size) {
// Initialize elements in C
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
C[i * p + j] = 0;
}
}
// Implement blocked matrix multiplication
// Split matrices into blocks by block_size
for (int ii = 0; ii < m; ii += block_size) {
for (int jj = 0; jj < p; jj += block_size) {
for (int kk = 0; kk < n; kk += block_size) {
// Perform matrix multiplication on the smaller blocks, same principle as in naive_matmul
for (int i = ii; i < std::min(ii + block_size, m); ++i) {
for (int j = jj; j < std::min(jj + block_size, p); ++j) {
for (int k = kk; k < std::min(kk + block_size, n); ++k) {
C[i * p + j] += A[i * n + k] * B[k * p + j];
}
}
}
}
}
}
// Use block_size to divide matrices into submatrices
}

void parallel_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) {
// TODO: Implement parallel matrix multiplication using OpenMP
void parallel_matmul(float *C, float *A, float *B, int m, int n, int p) {
// Initialize elements in C
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
C[i * p + j] = 0;
}
}
// Implement parallel matrix multiplication using OpenMP
// A is m x n, B is n x p, C is m x p
#pragma omp parallel for
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
for (int k = 0; k < n; ++k) {
C[i * p + j] += A[i * n + k] * B[k * p + j];
}
}
}
}

bool validate_result(const std::string &result_file, const std::string &reference_file) {
//TODO : Implement result validation
bool validate_result(const std::string &result_file, const std::string &reference_file, int m, int p) {
// Implement result validation
std::ifstream comparison(reference_file);
if (!comparison.is_open()) {
// Validate that file opened correctly
std::cerr << "Unable to open file";
exit(1);
}

std::ifstream res(result_file);
if (!res.is_open()) {
// Validate that file opened correctly
std::cerr << "Unable to open file";
exit(1);
}

float Comp, ResValue;
// Iterate using the dimensions of C.
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
// Get element from both matrix by index, compare values and throw error if values don't match
res >> ResValue;
comparison >> Comp;
if (ResValue != Comp) {
std::cerr << "Value mismatch";
exit(1);
}
}
}
// Close both files once comparison is done
comparison.close();
res.close();
return true;
}

int main(int argc, char *argv[]) {
int m, n, p;

if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <case_number>" << std::endl;
return 1;
Expand All @@ -42,41 +125,122 @@ int main(int argc, char *argv[]) {
std::string result_file = folder + "result.raw";
std::string reference_file = folder + "output.raw";

// TODO Read input0.raw (matrix A)

// Read input0.raw (matrix A)
std::ifstream FileA(input0_file);
// Validate that file is opened correctly
if (!FileA.is_open()) {
std::cerr << "Error opening file";
return 1;
}

// TODO Read input1.raw (matrix B)
// Read input1.raw (matrix B)
std::ifstream FileB(input1_file);
// Validate that file is opened correctly
if (!FileB.is_open()) {
std::cerr << "Error opening file";
return 1;
}

// Get matrix dimensions
FileA >> m >> n;
FileB >> n >> p;

// Allocate memory for result matrices
float *C_naive = new float[m * p];
float *C_blocked = new float[m * p];
float *C_parallel = new float[m * p];

// Allocate memory for matrices A and B
float* A = (float*)malloc(m * n * sizeof(float));
// Validate that memory is allocated correctly
if (A == NULL) {
std::cerr << "Memory allocation failed";
return 1;
}

float* B = (float*)malloc(n * p * sizeof(float));
if (B == NULL) {
std::cerr << "Memory allocation failed";
return 1;
}

//Read matrix elements into A and B (row-major order), close file after reading
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
FileA >> A[i * n + j];
}
}
FileA.close();

for (int i = 0; i < n; ++i) {
for (int j = 0; j < p; ++j) {
FileB >> B[i * p + j];
}
}
FileB.close();

// Measure performance of naive_matmul
double start_time = omp_get_wtime();
naive_matmul(C_naive, A, B, m, n, p);
double naive_time = omp_get_wtime() - start_time;

// TODO Write naive result to file
// Write naive result to file
// Write dimensions and elements to result.raw
std::ofstream result(result_file);
// Validate that file is created correctly
if (!result) {
std::cerr << "Unable to open file";
exit(1);
}

// Write the dimensions of C on the first line
result << m << " " << p << std::endl;
result << std::fixed << std::setprecision(2);
// Iterate C and write each element to result.raw
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
result << C_naive[i * p + j] << " ";
}
result << std::endl;
}
// Close file after writing
result.close();

// Validate naive result
bool naive_correct = validate_result(result_file, reference_file);
bool naive_correct = validate_result(result_file, reference_file, m, p);
if (!naive_correct) {
std::cerr << "Naive result validation failed for case " << case_number << std::endl;
}


// Measure performance of blocked_matmul (use block_size = 32 as default)
start_time = omp_get_wtime();
blocked_matmul(C_blocked, A, B, m, n, p, 32);
blocked_matmul(C_blocked, A, B, m, n, p, 128);
double blocked_time = omp_get_wtime() - start_time;

// TODO Write blocked result to file
// Write blocked result to file
std::ofstream result_block(result_file);
// Validate that file is created correctly
if (!result_block) {
std::cerr << "Unable to open file";
exit(1);
}

// Write the dimensions of C on the first line
result_block << m << " " << p << std::endl;
result_block << std::fixed << std::setprecision(2);
// Iterate C and write each element to result.raw
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
result_block << C_blocked[i * p + j] << " ";
}
result_block << std::endl;
}
// Close file after writing
result_block.close();

// Validate blocked result
bool blocked_correct = validate_result(result_file, reference_file);
bool blocked_correct = validate_result(result_file, reference_file, m, p);
if (!blocked_correct) {
std::cerr << "Blocked result validation failed for case " << case_number << std::endl;
}
Expand All @@ -86,11 +250,29 @@ int main(int argc, char *argv[]) {
parallel_matmul(C_parallel, A, B, m, n, p);
double parallel_time = omp_get_wtime() - start_time;

// TODO Write parallel result to file
// Write parallel result to file
std::ofstream result_para(result_file);
// Validate that file is created correctly
if (!result_para) {
std::cerr << "Unable to open file";
exit(1);
}

// Write the dimensions of C on the first line
result_para << m << " " << p << std::endl;
result_para << std::fixed << std::setprecision(2);
// Iterate C and write each element to result.raw
for (int i = 0; i < m; ++i) {
for (int j = 0; j < p; ++j) {
result_para << C_parallel[i * p + j] << " ";
}
result_para << std::endl;
}
// Close file after writing
result_para.close();

// Validate parallel result
bool parallel_correct = validate_result(result_file, reference_file);
bool parallel_correct = validate_result(result_file, reference_file, m, p);
if (!parallel_correct) {
std::cerr << "Parallel result validation failed for case " << case_number << std::endl;
}
Expand Down