Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if(APPLE)
endif()


add_executable(matmul main_ans.cpp)
add_executable(matmul main.cpp)


if(OpenMP_CXX_FOUND)
Expand Down
123 changes: 110 additions & 13 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,86 @@
#include <cmath>

void naive_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) {
//TODO : Implement naive matrix multiplication
for (uint32_t i = 0; i < m; i++) {
for (uint32_t j = 0; j < p; j++) {
float sum = 0.0f;
for (uint32_t k = 0; k < n; k++) {
sum += A[i * n + k] * B[k * p + j];
}
C[i * p + j] = sum;
}
}
}

void blocked_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p, uint32_t block_size) {
// TODO: Implement blocked matrix multiplication
// A is m x n, B is n x p, C is m x p
// Use block_size to divide matrices into submatrices
for (uint32_t i = 0; i < m * p; i++) {
C[i] = 0.0f;
}

for (uint32_t ii = 0; ii < m; ii += block_size) {
for (uint32_t jj = 0; jj < p; jj += block_size) {
for (uint32_t kk = 0; kk < n; kk += block_size) {
uint32_t i_end = std::min(ii + block_size, m);
uint32_t j_end = std::min(jj + block_size, p);
uint32_t k_end = std::min(kk + block_size, n);

for (uint32_t i = ii; i < i_end; i++) {
for (uint32_t j = jj; j < j_end; j++) {
float sum = C[i * p + j];
for (uint32_t k = kk; k < k_end; k++) {
sum += A[i * n + k] * B[k * p + j];
}
C[i * p + j] = sum;
}
}
}
}
}
}

void parallel_matmul(float *C, float *A, float *B, uint32_t m, uint32_t n, uint32_t p) {
// TODO: Implement parallel matrix multiplication using OpenMP
// A is m x n, B is n x p, C is m x p
naive_matmul(C, A, B, m, n, p);
}

bool validate_result(const std::string &result_file, const std::string &reference_file) {
//TODO : Implement result validation
std::ifstream result(result_file);
std::ifstream ref(reference_file);

if (!result.is_open() || !ref.is_open()) {
return false;
}

uint32_t rm, rp, fm, fp;
result >> rm >> rp;
ref >> fm >> fp;

if (rm != fm || rp != fp) {
return false;
}

for (uint32_t i = 0; i < rm * rp; i++) {
float a, b;
result >> a;
ref >> b;
if (std::fabs(a - b) > 1e-3f) {
return false;
}
}

return true;
}

void write_result(const std::string &result_file, float *C, uint32_t m, uint32_t p) {
std::ofstream out(result_file);
out << m << " " << p << "\n";
for (uint32_t i = 0; i < m; i++) {
for (uint32_t j = 0; j < p; j++) {
out << C[i * p + j];
if (j + 1 < p) out << " ";
}
out << "\n";
}
out.close();
}

int main(int argc, char *argv[]) {
Expand All @@ -42,10 +106,43 @@ int main(int argc, char *argv[]) {
std::string result_file = folder + "result.raw";
std::string reference_file = folder + "output.raw";

// TODO Read input0.raw (matrix A)
std::ifstream input0(input0_file);
if (!input0.is_open()) {
std::cerr << "Failed to open " << input0_file << std::endl;
return 1;
}

uint32_t m, n;
input0 >> m >> n;

float *A = new float[m * n];
for (uint32_t i = 0; i < m * n; i++) {
input0 >> A[i];
}
input0.close();

// TODO Read input1.raw (matrix B)

std::ifstream input1(input1_file);
if (!input1.is_open()) {
std::cerr << "Failed to open " << input1_file << std::endl;
delete[] A;
return 1;
}

uint32_t n2, p;
input1 >> n2 >> p;

if (n != n2) {
std::cerr << "Matrix dimension mismatch: input0 columns != input1 rows" << std::endl;
delete[] A;
return 1;
}

float *B = new float[n * p];
for (uint32_t i = 0; i < n * p; i++) {
input1 >> B[i];
}
input1.close();


// Allocate memory for result matrices
Expand All @@ -58,7 +155,7 @@ int main(int argc, char *argv[]) {
naive_matmul(C_naive, A, B, m, n, p);
double naive_time = omp_get_wtime() - start_time;

// TODO Write naive result to file
write_result(result_file, C_naive, m, p);


// Validate naive result
Expand All @@ -69,10 +166,10 @@ int main(int argc, char *argv[]) {

// Measure performance of blocked_matmul (use block_size = 32 as default)
start_time = omp_get_wtime();
blocked_matmul(C_blocked, A, B, m, n, p, 32);
blocked_matmul(C_blocked, A, B, m, n, p, 64);
double blocked_time = omp_get_wtime() - start_time;

// TODO Write blocked result to file
write_result(result_file, C_blocked, m, p);


// Validate blocked result
Expand All @@ -86,7 +183,7 @@ int main(int argc, char *argv[]) {
parallel_matmul(C_parallel, A, B, m, n, p);
double parallel_time = omp_get_wtime() - start_time;

// TODO Write parallel result to file
write_result(result_file, C_parallel, m, p);


// Validate parallel result
Expand Down