Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions 08_bilateral_filter/Jason-Young123/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/obj/
/useful/
/useless/
/analysis/
/runTester
/tmp_env/
/result/
init.mk
145 changes: 145 additions & 0 deletions 08_bilateral_filter/Jason-Young123/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
WDIR = .
SRC_DIR = $(WDIR)/src
OBJ_DIR = $(WDIR)/obj
INCLUDES_DIR = $(WDIR)/include
ANALYSIS_DIR = $(WDIR)/analysis
ENV_DIR = $(WDIR)/tmp_env
SCRIPTS_DIR = $(WDIR)/scripts
MK = $(WDIR)/init.mk
RESULT_DIR = ./result
LOG = $(RESULT_DIR)/runtime.log

-include $(MK)

PLATFORM ?= nvidia
# resolve platform
ifeq ($(PLATFORM),nvidia)
PLATFORM_DEFINE := -DPLATFORM_NVIDIA
PREFIX := /usr/local/cuda-12.8/bin/
CXX := $(PREFIX)nvcc
GDB := $(PREFIX)cuda-gdb
NCU := $(PREFIX)ncu
NCU-GUI := $(PREFIX)ncu-ui
NSYS := $(PREFIX)nsys
CXXFLAGS := -Xcompiler -std=c++17 -O3 -use_fast_math -Xcudafe --diag_suppress=611 -Wno-deprecated-gpu-targets -I$(INCLUDES_DIR)
SUFFIX := cu
else ifeq ($(PLATFORM),iluvatar)
PLATFORM_DEFINE := -DPLATFORM_ILUVATAR
CXX := clang++
CXXFLAGS := -std=c++17 -O3 -I$(INCLUDES_DIR) -Wno-implicit-const-int-float-conversion -Wno-literal-range
LIBS := -lcudart -I/usr/local/corex/include -L/usr/local/corex/lib64 -fPIC
SUFFIX := cu
else ifeq ($(PLATFORM),moore)
PLATFORM_DEFINE := -DPLATFORM_MOORE
CXX := mcc
CXXFLAGS := -std=c++17 -O3 -I$(INCLUDES_DIR) -I/usr/local/musa/include
LIBS := -L$(ENV_DIR)/lib -L/usr/lib/gcc/x86_64-linux-gnu/11/ -L/usr/local/musa/lib -lmusart
RUNTIME_LIBS += /usr/local/musa/lib
SUFFIX := mu
else ifeq ($(PLATFORM),metax)
PLATFORM_DEFINE := -DPLATFORM_METAX
CXX := mxcc
CXXFLAGS := -std=c++17 -O3 -I$(INCLUDES_DIR)
SUFFIX := maca
else
$(error Unsupported PLATFORM '$(PLATFORM)' (expected: nvidia, iluvatar, moore, metax))
endif
-include $(SCRIPTS_DIR)/$(PLATFORM).mk


ifeq ($(HAS_CV_ENV), yes)
CXXFLAGS += -DHAS_CV
endif



SRCS = $(wildcard $(SRC_DIR)/*.$(SUFFIX))
OBJS = $(patsubst $(SRC_DIR)/%.$(SUFFIX), $(OBJ_DIR)/%.o, $(SRCS))
EXEC = $(WDIR)/runTester


.DEFAULT_GOAL := all

all: $(EXEC)


init:
@rm -rf $(ENV_DIR) $(MK)
@echo "[INIT] Checking OpenCV Environment..."
@$(PLATFORM_INIT_ENV); \
if pkg-config --exists opencv4; then \
echo "[INFO] Built-in OpenCV Found."; \
echo "HAS_CV_ENV = yes" > $(MK); \
echo "CVFLAGS = $$(pkg-config --cflags opencv4)" >> $(MK); \
echo "CVLIBS = $$(pkg-config --libs opencv4)" >> $(MK); \
echo "CVRUNTIME_LIBS = $${LD_LIBRARY_PATH}" >> $(MK); \
elif command -v conda > /dev/null 2>&1; then \
echo "[INFO] Using Conda to Create $(ENV_DIR)..."; \
conda create --prefix $(ENV_DIR) -y libopencv; \
echo "HAS_CV_ENV = yes" > $(MK); \
echo "CVFLAGS = -I$(ENV_DIR)/include/opencv4" >> $(MK); \
echo "CVLIBS = -L$(ENV_DIR)/lib -lopencv_core -lopencv_imgproc -lopencv_imgcodecs" >> $(MK); \
echo "CVRUNTIME_LIBS = $(ENV_DIR)/lib:$${LD_LIBRARY_PATH}" >> $(MK); \
else \
echo "[WARN] No OpenCV is Available on this Server."; \
echo "HAS_CV_ENV = no" > $(MK); \
fi
@echo "[INFO] Project Init Done."
@echo "[INFO] Enter \`make (run) PLATFORM=[nvidia, moore, metax, iluvatar]\` to Compile & Run."




#link: .o -> exec
$(EXEC): $(OBJS)
@echo "[INFO] $^ -> $@"
@$(CXX) $(CXXFLAGS) $(CVFLAGS) $(LIBS) $(CVLIBS) -o $@ $^


#compile: .cu -> .o
$(OBJ_DIR)/%.o: $(SRC_DIR)/%.$(SUFFIX)
@mkdir -p $(OBJ_DIR)
@echo "[INFO] $< -> $@"
@$(CXX) $(CXXFLAGS) $(CVFLAGS) $(PLATFORM_DEFINE) -c -o $@ $<


run: $(EXEC)
@mkdir -p $(RESULT_DIR)
@export LD_LIBRARY_PATH=$(CVRUNTIME_LIBS):$$LD_LIBRARY_PATH; \
$(MAKE) -s -f $(SCRIPTS_DIR)/$(PLATFORM).mk exec LD_LIBRARY_PATH="$$LD_LIBRARY_PATH" 2>&1 | tee $(LOG)
@echo "[INFO] Runtime log has been written to $(LOG)"



gdb: $(EXEC)
@$(GDB) $<




#Nsight System,系统级分析工具
nsys: $(EXEC)
@mkdir -p $(ANALYSIS_DIR)
@$(NSYS) profile -t cuda,nvtx,osrt -o $(ANALYSIS_DIR)/$(EXEC) -f true $<
@$(NSYS) stats $(ANALYSIS_DIR)/$(EXEC).nsys-rep --force-export=true

#Nsight Compute,内核级分析工具
ncu: $(EXEC)
@mkdir -p $(ANALYSIS_DIR)
@$(NCU) --print-details all --nvtx --call-stack --set full $<

ncu-gui: $(EXEC)
@mkdir -p $(ANALYSIS_DIR)
@$(NCU) --nvtx --call-stack --set full -f --export $(ANALYSIS_DIR)/$(EXEC).ncu-rep $<
@$(NCU-GUI) $(ANALYSIS_DIR)/$(EXEC).ncu-rep


clean:
rm -rf $(OBJ_DIR) $(ANALYSIS_DIR) $(EXEC) $(RESULT_DIR)

cleanup:
rm -rf $(OBJ_DIR) $(ANALYSIS_DIR) $(EXEC) $(RESULT_DIR)
rm -rf $(MK) $(ENV_DIR)


.PHONY:
38 changes: 38 additions & 0 deletions 08_bilateral_filter/Jason-Young123/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Bilateral-Filter


## 介绍
InifiniTensor 2025冬训练营项目阶段, CUDA方向题目八: 基于CUDA的实时图像双边滤波


## 使用说明
### step1: 环境初始化
在开始编译前需配置本地openCV环境(优先使用系统自带openCV, 若缺失则尝试基于conda建立虚拟环境tmp_env并安装openCV; 若conda也不可用则项目将在缺少openCV对照的情况下直接运行)
```bash
make init PLATFORM=[nvidia, moore, metax, iluvatar]
```

### step2: 配置参数文件
修改`tester/config.txt`以配置radius, sigma_spatial和sigma_color(当radius设置为 <= 0时,代码将自适应选取滤波半径);
在`tester/gray/4K`和`tester/rgb/4K`路径下存放待测试的4K(3840 * 2160)图片的bin文件,分别包含10张灰白壁纸和15张RGB壁纸(具体jpeg图片参见resource下的相应路径);
你也可以在`tester/gray/4K`和`tester/rgb/4K`中自行添加待测试.bin文件

### step3: 编译运行
输出.bin文件和性能日志(runtime.log)将自动保存至`result`路径下
```bash
make run PLATFORM=[nvidia, moore, metax, iluvatar]
```


## 其他
清除编译和运行结果
```bash
make clean
```

清除所有生成文件(包括虚拟环境和辅助脚本)
```bash
make cleanup
```

基于各平台的结果和性能分析参见`report`路径
83 changes: 83 additions & 0 deletions 08_bilateral_filter/Jason-Young123/include/auxiliary.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
//文件读写等辅助函数
#pragma once

#include <fstream>
#include <iostream>
#include <filesystem>
#include <vector>
#include <myPixel.h>


bool getCfg(const std::string& cfgPath, int& radius, float& sigma_spatial, float& sigma_color);

bool getBin(const std::string& binPath, int& width, int& height, uint8_t*& src1, myPixel*& src2);

float binDiff(const std::string& binPath_ref, const std::string& binPath_test);


//生成bin文件
template <typename T>
bool genBin(const std::string& outBinPath, const T* img, int width, int height){
std::ofstream file(outBinPath, std::ios::binary);
if(!file){
std::cerr << "Error: Could not open file: " << outBinPath << std::endl;
return false;
}

int channels = std::is_same_v<T, myPixel> ? 3 : 1;

file.write(reinterpret_cast<char*>(&width), sizeof(int));
file.write(reinterpret_cast<char*>(&height), sizeof(int));
file.write(reinterpret_cast<char*>(&channels), sizeof(int));
if constexpr(std::is_same_v<T, myPixel>){
for(size_t i = 0; i < width * height; ++i){
uint8_t rgb[3] = {img[i].R(), img[i].G(), img[i].B() };
file.write(reinterpret_cast<char*>(rgb), 3);
}
}
else{
file.write(reinterpret_cast<const char*>(img), width * height);
}

file.close();
//std::cout << "Successfully genBin: " << outBinPath << std::endl;
return true;
}


//大图坐标相对小图坐标而言,取值范围在-radius ~ len + radius - 1
inline int mapReflect101(int p, int len){
if(p < 0){//左越界
return -p;
}
else if(p >= len){//右越界
return 2 * (len - 1) - p;
}
else{//不越界
return p;
}
}

//将输入图像以reflect101模式进行边缘延拓
template <typename T>
T* Reflect101(const T* src, int width, int height, int radius){
if(radius < 0 || radius >= width || radius >= height){
std::cerr << "Error: unsupported radius" << std::endl;
return nullptr;
}

int pixel_count_r101 = (width + 2 * radius) * (height + 2 * radius);
T* src_r101 = new T[pixel_count_r101];

for(int i = 0; i < height + 2 * radius; ++i){
for(int j = 0; j < width + 2 * radius; ++j){
int relative_i = i - radius;
int relative_j = j - radius;
int src_y = mapReflect101(relative_i, height);//纵坐标
int src_x = mapReflect101(relative_j, width);//横坐标

src_r101[i * (width + 2 * radius) + j] = src[src_y * width + src_x];
}
}
return src_r101;
}
Loading