Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ build-profiling/
*.model
*.etdump
tokenizer.json
*.pte
*.ptd
!test_bpe_tokenizer.bin
!test_tiktoken_tokenizer.model
Expand Down Expand Up @@ -69,6 +68,11 @@ xcuserdata/
/src/executorch/share/
/src/executorch/version.py
*_etdump
/runtime/core/portable_type/c10/CMakeFiles/
/runtime/core/portable_type/c10/bin/
/runtime/core/portable_type/c10/Makefile
/runtime/core/portable_type/c10/cmake_install.cmake
/runtime/core/portable_type/c10/*.a

# Android
*.aar
Expand Down
18 changes: 13 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,21 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
endif()

if(EXECUTORCH_OPTIMIZE_SIZE)
# -Os: Optimize for size.
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Os")
if(NOT EXECUTORCH_BUILD_CADENCE)
if(OPTIMIZE_SIZE)
# -Os: Optimize for size
set(CMAKE_CXX_FLAGS_RELEASE "-Os ${CMAKE_CXX_FLAGS_RELEASE}")
else()
# -O2: Moderate opt.
set(CMAKE_CXX_FLAGS_RELEASE "-O2 ${CMAKE_CXX_FLAGS_RELEASE}")
endif()
else()
# -O2: Moderate opt.
set(CMAKE_CXX_FLAGS_RELEASE "-O2 ${CMAKE_CXX_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_RELEASE
"-O3 -mcoproc -mlongcalls -LNO:simd -ffunction-sections -fsigned-char -fno-exceptions -INLINE:requested -fno-zero-initialized-in-bss -mtext-section-literals -fmessage-length=0")
set(CMAKE_C_FLAGS_RELEASE
"-O3 -mcoproc -mlongcalls -LNO:simd -ffunction-sections -fsigned-char -fno-exceptions -INLINE:requested -fno-zero-initialized-in-bss -mtext-section-literals -fmessage-length=0")
endif()
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")

if(EXECUTORCH_BUILD_TESTS)
include(CTest)
Expand Down
4 changes: 4 additions & 0 deletions backends/cadence/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ elseif(EXECUTORCH_FUSION_G3_OPT)
)
elseif(EXECUTORCH_VISION_OPT)
set(TARGET_DIR vision)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
else()
set(TARGET_DIR generic)
Expand Down
18 changes: 12 additions & 6 deletions backends/cadence/aot/functions_vision.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@
- op: max_pool2d_with_indices.out
kernels:
- arg_meta: null
kernel_name: torch::executor::max_pool2d_with_indices_out
kernel_name: impl::vision::max_pool2d_with_indices_out

- op: mean.out
kernels:
- arg_meta: null
kernel_name: torch::executor::mean_dim_out
kernel_name: impl::vision::mean_dim_out

- op: mul.out
kernels:
Expand Down Expand Up @@ -205,6 +205,16 @@
- arg_meta: null
kernel_name: impl::vision::quantized_conv2d_nhwc_out

- func: cadence::quantized_conv2d_nchw.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::vision::quantized_conv2d_nchw_per_tensor_out

- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::vision::quantized_conv2d_nhwc_per_tensor_out

- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
Expand Down Expand Up @@ -249,10 +259,6 @@
- arg_meta: null
kernel_name: impl::vision::im2row_per_tensor_out

- func: cadence::quantized_conv.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
kernels:
- arg_meta: null
kernel_name: impl::vision::quantized_conv_per_tensor_out

- func: cadence::quantized_fully_connected.out(Tensor src, Tensor weight, Tensor bias, int src_zero_point, Tensor weight_zero_point, Tensor out_multiplier, Tensor out_shift, int out_zero_point, Tensor? offset, *, Tensor(a!) out) -> Tensor(a!)
kernels:
Expand Down
2 changes: 1 addition & 1 deletion backends/cadence/utils/FACTO
Submodule FACTO updated from 3b8c77 to 1db37f
File renamed without changes.
107 changes: 107 additions & 0 deletions backends/cadence/vision/config_generator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Config Generator Python

Python tools for extracting convolution layer parameters from neural network models and generating optimized C header configurations for DMA-tiled execution on the Xtensa XRC Vision DSP (XAI CNN runtime).

## Prerequisites

The script requires the Python venv in the executorch tree and must be run from a **bash** terminal (not csh):

```bash
# The venv is at <executorch>/.venv/
# All paths below are relative to the executorch root.

# Option 1: call the venv python directly (works from any shell)
.venv/bin/python3 backends/cadence/vision/config_generator/generate_layer_configs.py ...

# Option 2: activate the venv in a bash shell
bash
source .venv/bin/activate
python3 backends/cadence/vision/config_generator/generate_layer_configs.py ...
```

> **Note:** The default terminal on this machine is `csh`. Inline python commands
> and `source ... && ...` chains will fail in csh. Always use `bash` or invoke
> the venv python by its full path.

## Quick Start

```bash
# Run from the executorch root directory: cd <executorch>

# From a single ExecuTorch .pte binary
.venv/bin/python3 backends/cadence/vision/config_generator/generate_layer_configs.py \
--pte operator_and_model_testing/resnet18/pte/resnet18_quantized.pte \
--output backends/cadence/vision/config_generator/conv_layer_configs.h \
--dram0 62976 --dram1 62976

# From multiple .pte files (layers are deduplicated automatically)
.venv/bin/python3 backends/cadence/vision/config_generator/generate_layer_configs.py \
--pte operator_and_model_testing/resnet18/pte/resnet18_quantized.pte \
operator_and_model_testing/resnet50/pte/resnet50_quantized.pte \
--output backends/cadence/vision/config_generator/conv_layer_configs_combined.h \
--dram0 62976 --dram1 62976

# From a torchvision model (requires torchvision installed in venv)
.venv/bin/python3 backends/cadence/vision/config_generator/generate_layer_configs.py \
--model resnet18 --input-size 1,3,64,64 \
--output backends/cadence/vision/config_generator/conv_layer_configs.h \
--dram0 32768 --dram1 32768
```

### Full working commands

```bash
# cd to the executorch root first
cd <path-to-executorch>

# ResNet18 with 62976 bytes per DRAM bank
.venv/bin/python3 backends/cadence/vision/config_generator/generate_layer_configs.py \
--pte operator_and_model_testing/resnet18/pte/resnet18_quantized.pte \
--output backends/cadence/vision/config_generator/conv_layer_configs_62k_pte.h \
--dram0 62976 --dram1 62976

# ResNet18 + ResNet50 combined
.venv/bin/python3 backends/cadence/vision/config_generator/generate_layer_configs.py \
--pte operator_and_model_testing/resnet18/pte/resnet18_quantized.pte \
operator_and_model_testing/resnet50/pte/resnet50_quantized.pte \
--output backends/cadence/vision/config_generator/conv_layer_configs_62k_combined.h \
--dram0 62976 --dram1 62976
```

---

## `generate_layer_configs.py` — Arguments

| Flag | Default | Description |
|------|---------|-------------|
| `--model`, `-m` | — | Comma or `+`-separated torchvision model names (e.g. `resnet18+resnet50`) |
| `--pte` | — | Path to an ExecuTorch `.pte` binary; bootstraps `exir._serialize` from the local source tree — no pip install needed |
| `--flatc` | cmake-out default | Path to `flatc` binary (auto-detected; only relevant with `--pte`) |
| `--input-size` | `1,3,64,64` | Input tensor shape `N,C,H,W` (only used with `--model`) |
| `--output`, `-o` | `conv_layer_configs.h` | Output C header file |
| `--dram0` | `32768` | DRAM0 size in bytes |
| `--dram1` | `32768` | DRAM1 size in bytes |
| `--cache-mode` | off | Append `_cache` to every kernel name |

---

## Output

The generated header contains:

- `conv_layer_config_t` struct with ~60 fields (buffer sizes, tile dimensions, DRAM0/1 placement, kernel name, quantization params)
- `CONV_LAYER_CONFIGS[]` static array — one entry per unique layer
- `get_layer_config()`, `get_layer_config_by_params()`, `get_layer_config_by_key()` inline accessors

---

## Directory Structure

```
config_generator_python/
├── generate_layer_configs.py # Main entry point
├── generate_idma_buffers.py # Core tiling / buffer sizing engine
├── extract_layers_from_pte.py # .pte/.onnx → JSON (intermediate step)
├── config/ # Pre-generated headers
└── bin/ # Compare / test utilities
```
Loading
Loading