diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b04fd0..abcfc6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ if(APPLE) endif() -add_executable(matmul main_ans.cpp) +add_executable(matmul main.cpp) if(OpenMP_CXX_FOUND) diff --git a/README.md b/README.md index 0f91d63..45d2fd7 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,32 @@ **Points**: 100 --- +### Results table +| Test Case | Dimensions (m × n × p) | Naive Time (s) | Blocked Time (s) | Parallel Time (s) | Blocked Speedup | Parallel Speedup | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 0 | 64 × 64 × 64 | 0.000999928 | 0.00199986 | 0.00200009 | 0.5× | 0.49994× | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 1 | 128 x 64 x 128 | 0.00300002 | 0.00500011 | 0.000999928 | 0.59999x | 3.00024x | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 2 | 100 x 128 x 56 | 0.00200009 | 0.00300002 | 0.000999928 | 0.666693x | 2.00024x | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 3 | 128 x 64 x 128 | 0.00300002 | 0.00500011 | 0.00100017 | 0.59999x | 2.99952x | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 4 | 32 x 128 x 32 | 0.000999928 | 0.000999928 | 0 | 1x | infx | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 5 | 200 x 100 x 256 | 0.0190001 | 0.0249999 | 0.00500011 | 0.760006x | 3.79992x | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 6 | 256 X 256 X 256 | 0.0580001 | 0.0799999 | 0.013 | 0.725002x | 4.46154x | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 7 | 256 X 300 256 | 0.0669999 | 0.095 | 0.0170002 | 0.705262x | 3.94113x | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 8 | 64 x 128 x 64 | 0.00200009 | 0.00299978 | 0.000999928 | 0.666746x | 2.00024x | +|-----------|------------------------|----------------|------------------|-------------------|-----------------|------------------| +| 9 | 256 x 256 x 257 | 0.0580001 | 0.0819998 | 0.0140002 | 0.70732x | 4.14281x | +- **Block size**: 64 +- **OPM_NUM_THREADS**: 8 +--- ### Assignment Overview Welcome to the second homework assignment of the Parallel Programming course! In Assignment 1, you implemented a naive diff --git a/main.cpp b/main.cpp index 65bf108..96115c3 100644 --- a/main.cpp +++ b/main.cpp @@ -3,27 +3,110 @@ #include #include #include +#include -void naive_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) { - //TODO : Implement naive matrix multiplication +void naive_matmul(float *C, float *A, float *B, int m, int n, int p) { + // Initialize elements in C + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + C[i * p + j] = 0; + } + } + // Implement naive matrix multiplication C = A x B + // A is m x n, B is n x p, C is m x p + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + for (int k = 0; k < n; ++k) { + // Access elements by Row-Major indexing, multiply using given formula + C[i * p + j] += A[i * n + k] * B[k * p + j]; + } + } + } } -void blocked_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p, uint32_t block_size) { - // TODO: Implement blocked matrix multiplication - // A is m x n, B is n x p, C is m x p +void blocked_matmul(float *C, float *A, float *B, int m, int n, int p, int block_size) { + // Initialize elements in C + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + C[i * p + j] = 0; + } + } + // Implement blocked matrix multiplication + // Split matrices into blocks by block_size + for (int ii = 0; ii < m; ii += block_size) { + for (int jj = 0; jj < p; jj += block_size) { + for (int kk = 0; kk < n; kk += block_size) { + // Perform matrix multiplication on the smaller blocks, same principle as in naive_matmul + for (int i = ii; i < std::min(ii + block_size, m); ++i) { + for (int j = jj; j < std::min(jj + block_size, p); ++j) { + for (int k = kk; k < std::min(kk + block_size, n); ++k) { + C[i * p + j] += A[i * n + k] * B[k * p + j]; + } + } + } + } + } + } // Use block_size to divide matrices into submatrices } -void parallel_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) { - // TODO: Implement parallel matrix multiplication using OpenMP +void parallel_matmul(float *C, float *A, float *B, int m, int n, int p) { + // Initialize elements in C + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + C[i * p + j] = 0; + } + } + // Implement parallel matrix multiplication using OpenMP // A is m x n, B is n x p, C is m x p + #pragma omp parallel for + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + for (int k = 0; k < n; ++k) { + C[i * p + j] += A[i * n + k] * B[k * p + j]; + } + } + } } -bool validate_result(const std::string &result_file, const std::string &reference_file) { - //TODO : Implement result validation +bool validate_result(const std::string &result_file, const std::string &reference_file, int m, int p) { + // Implement result validation + std::ifstream comparison(reference_file); + if (!comparison.is_open()) { + // Validate that file opened correctly + std::cerr << "Unable to open file"; + exit(1); + } + + std::ifstream res(result_file); + if (!res.is_open()) { + // Validate that file opened correctly + std::cerr << "Unable to open file"; + exit(1); + } + + float Comp, ResValue; + // Iterate using the dimensions of C. + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + // Get element from both matrix by index, compare values and throw error if values don't match + res >> ResValue; + comparison >> Comp; + if (ResValue != Comp) { + std::cerr << "Value mismatch"; + exit(1); + } + } + } + // Close both files once comparison is done + comparison.close(); + res.close(); + return true; } int main(int argc, char *argv[]) { + int m, n, p; + if (argc != 2) { std::cerr << "Usage: " << argv[0] << " " << std::endl; return 1; @@ -42,41 +125,122 @@ int main(int argc, char *argv[]) { std::string result_file = folder + "result.raw"; std::string reference_file = folder + "output.raw"; - // TODO Read input0.raw (matrix A) - + // Read input0.raw (matrix A) + std::ifstream FileA(input0_file); + // Validate that file is opened correctly + if (!FileA.is_open()) { + std::cerr << "Error opening file"; + return 1; + } - // TODO Read input1.raw (matrix B) + // Read input1.raw (matrix B) + std::ifstream FileB(input1_file); + // Validate that file is opened correctly + if (!FileB.is_open()) { + std::cerr << "Error opening file"; + return 1; + } + // Get matrix dimensions + FileA >> m >> n; + FileB >> n >> p; // Allocate memory for result matrices float *C_naive = new float[m * p]; float *C_blocked = new float[m * p]; float *C_parallel = new float[m * p]; + // Allocate memory for matrices A and B + float* A = (float*)malloc(m * n * sizeof(float)); + // Validate that memory is allocated correctly + if (A == NULL) { + std::cerr << "Memory allocation failed"; + return 1; + } + + float* B = (float*)malloc(n * p * sizeof(float)); + if (B == NULL) { + std::cerr << "Memory allocation failed"; + return 1; + } + + //Read matrix elements into A and B (row-major order), close file after reading + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + FileA >> A[i * n + j]; + } + } + FileA.close(); + + for (int i = 0; i < n; ++i) { + for (int j = 0; j < p; ++j) { + FileB >> B[i * p + j]; + } + } + FileB.close(); + // Measure performance of naive_matmul double start_time = omp_get_wtime(); naive_matmul(C_naive, A, B, m, n, p); double naive_time = omp_get_wtime() - start_time; - // TODO Write naive result to file + // Write naive result to file + // Write dimensions and elements to result.raw + std::ofstream result(result_file); + // Validate that file is created correctly + if (!result) { + std::cerr << "Unable to open file"; + exit(1); + } + // Write the dimensions of C on the first line + result << m << " " << p << std::endl; + result << std::fixed << std::setprecision(2); + // Iterate C and write each element to result.raw + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + result << C_naive[i * p + j] << " "; + } + result << std::endl; + } + // Close file after writing + result.close(); // Validate naive result - bool naive_correct = validate_result(result_file, reference_file); + bool naive_correct = validate_result(result_file, reference_file, m, p); if (!naive_correct) { std::cerr << "Naive result validation failed for case " << case_number << std::endl; } + // Measure performance of blocked_matmul (use block_size = 32 as default) start_time = omp_get_wtime(); - blocked_matmul(C_blocked, A, B, m, n, p, 32); + blocked_matmul(C_blocked, A, B, m, n, p, 128); double blocked_time = omp_get_wtime() - start_time; - // TODO Write blocked result to file + // Write blocked result to file + std::ofstream result_block(result_file); + // Validate that file is created correctly + if (!result_block) { + std::cerr << "Unable to open file"; + exit(1); + } + // Write the dimensions of C on the first line + result_block << m << " " << p << std::endl; + result_block << std::fixed << std::setprecision(2); + // Iterate C and write each element to result.raw + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + result_block << C_blocked[i * p + j] << " "; + } + result_block << std::endl; + } + // Close file after writing + result_block.close(); // Validate blocked result - bool blocked_correct = validate_result(result_file, reference_file); + bool blocked_correct = validate_result(result_file, reference_file, m, p); if (!blocked_correct) { std::cerr << "Blocked result validation failed for case " << case_number << std::endl; } @@ -86,11 +250,29 @@ int main(int argc, char *argv[]) { parallel_matmul(C_parallel, A, B, m, n, p); double parallel_time = omp_get_wtime() - start_time; - // TODO Write parallel result to file + // Write parallel result to file + std::ofstream result_para(result_file); + // Validate that file is created correctly + if (!result_para) { + std::cerr << "Unable to open file"; + exit(1); + } + // Write the dimensions of C on the first line + result_para << m << " " << p << std::endl; + result_para << std::fixed << std::setprecision(2); + // Iterate C and write each element to result.raw + for (int i = 0; i < m; ++i) { + for (int j = 0; j < p; ++j) { + result_para << C_parallel[i * p + j] << " "; + } + result_para << std::endl; + } + // Close file after writing + result_para.close(); // Validate parallel result - bool parallel_correct = validate_result(result_file, reference_file); + bool parallel_correct = validate_result(result_file, reference_file, m, p); if (!parallel_correct) { std::cerr << "Parallel result validation failed for case " << case_number << std::endl; }