From a3587e5238891f89e529dd04380440a32ee7f3d7 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 1 Dec 2025 17:36:10 +0100 Subject: [PATCH 01/17] Migrating to `cmake` --- .github/workflows/compilation.yml | 65 +- .gitignore | 3 + CMAKE_BUILD.md | 195 ++++++ CMakeLists.txt | 339 +++++++++++ Makefile | 98 --- Makefile.org | 212 ------- cmake/preprocess_vu1.cmake | 49 ++ examples/CMakeLists.txt | 107 ++++ examples/box/Makefile | 36 -- examples/logo/Makefile | 35 -- examples/nehe/lesson02/Makefile | 40 -- examples/nehe/lesson03/Makefile | 40 -- examples/nehe/lesson04/Makefile | 40 -- examples/nehe/lesson05/Makefile | 40 -- examples/performance/Makefile | 36 -- examples/shared_code/Makefile | 6 - examples/shared_code/Makefile.examples | 250 -------- examples/tricked_out/Makefile | 39 -- glut/CMakeLists.txt | 86 +++ glut/Makefile | 38 -- glut/Makefile.org | 215 ------- tests/CMakeLists.txt | 4 + vu1/fast_nolights_vcl.vsm | 179 ------ vu1/fast_vcl.vsm | 257 -------- vu1/general_nospec_quad_vcl.vsm | 593 ------------------ vu1/general_nospec_tri_vcl.vsm | 490 --------------- vu1/general_nospec_vcl.vsm | 570 ------------------ vu1/general_pv_diff_quad_vcl.vsm | 714 ---------------------- vu1/general_pv_diff_tri_vcl.vsm | 688 --------------------- vu1/general_pv_diff_vcl.vsm | 769 ------------------------ vu1/general_quad_vcl.vsm | 794 ------------------------- vu1/general_tri_vcl.vsm | 689 --------------------- vu1/general_vcl.vsm | 771 ------------------------ vu1/indexed_vcl.vsm | 718 ---------------------- 34 files changed, 816 insertions(+), 8389 deletions(-) create mode 100644 CMAKE_BUILD.md create mode 100644 CMakeLists.txt delete mode 100644 Makefile delete mode 100644 Makefile.org create mode 100644 cmake/preprocess_vu1.cmake create mode 100644 examples/CMakeLists.txt delete mode 100644 examples/box/Makefile delete mode 100644 examples/logo/Makefile delete mode 100644 examples/nehe/lesson02/Makefile delete mode 100644 examples/nehe/lesson03/Makefile delete mode 100644 examples/nehe/lesson04/Makefile delete mode 100644 examples/nehe/lesson05/Makefile delete mode 100644 examples/performance/Makefile delete mode 100644 examples/shared_code/Makefile delete mode 100644 examples/shared_code/Makefile.examples delete mode 100644 examples/tricked_out/Makefile create mode 100644 glut/CMakeLists.txt delete mode 100644 glut/Makefile delete mode 100644 glut/Makefile.org create mode 100644 tests/CMakeLists.txt delete mode 100644 vu1/fast_nolights_vcl.vsm delete mode 100644 vu1/fast_vcl.vsm delete mode 100644 vu1/general_nospec_quad_vcl.vsm delete mode 100644 vu1/general_nospec_tri_vcl.vsm delete mode 100644 vu1/general_nospec_vcl.vsm delete mode 100644 vu1/general_pv_diff_quad_vcl.vsm delete mode 100644 vu1/general_pv_diff_tri_vcl.vsm delete mode 100644 vu1/general_pv_diff_vcl.vsm delete mode 100644 vu1/general_quad_vcl.vsm delete mode 100644 vu1/general_tri_vcl.vsm delete mode 100644 vu1/general_vcl.vsm delete mode 100644 vu1/indexed_vcl.vsm diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index bbbe344d..d4bc2fcc 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -5,6 +5,7 @@ on: pull_request: repository_dispatch: types: [run_build] + workflow_dispatch: {} jobs: build: @@ -12,51 +13,51 @@ jobs: container: ps2dev/ps2sdk:latest steps: - uses: actions/checkout@v4 - - - name: Install dependencies + + - name: Setup dependencies run: | - apk add build-base git + apk update + apk add cmake build-base git make - name: Install ps2stuff run: | git clone https://github.com/ps2dev/ps2stuff.git - cd ps2stuff - make -j $(getconf _NPROCESSORS_ONLN) clean - make -j $(getconf _NPROCESSORS_ONLN) all - make -j $(getconf _NPROCESSORS_ONLN) install + cd ps2stuff + git checkout cmake + mkdir build + cd build + cmake .. + make -j $(getconf _NPROCESSORS_ONLN) + make install - - name: Compile project + - name: Configure with CMake run: | - make -j $(getconf _NPROCESSORS_ONLN) clean - make -j $(getconf _NPROCESSORS_ONLN) all - make -j $(getconf _NPROCESSORS_ONLN) install + mkdir build + cd build + cmake -DBUILD_GLUT=ON -DBUILD_EXAMPLES=ON .. - - name: Compile GLUT + - name: Build project with CMake run: | - cd glut - make -j $(getconf _NPROCESSORS_ONLN) clean - make -j $(getconf _NPROCESSORS_ONLN) all - make -j $(getconf _NPROCESSORS_ONLN) install - - - name: Compile examples + cd build + make -j $(getconf _NPROCESSORS_ONLN) + + - name: Install libraries run: | - cd examples - cd box && make clean all && cd .. - cd logo && make clean all && cd .. - cd performance && make clean all && cd .. - cd tricked_out && make clean all && cd .. - cd nehe/lesson02 && make clean all && cd ../.. - cd nehe/lesson03 && make clean all && cd ../.. - cd nehe/lesson04 && make clean all && cd ../.. - cd nehe/lesson05 && make clean all && cd ../.. + cd build + make -j $(getconf _NPROCESSORS_ONLN) install + + - name: Get short SHA + id: slug + run: echo "sha8=$(echo ${GITHUB_SHA} | cut -c1-8)" >> $GITHUB_OUTPUT - name: Upload artifacts + if: ${{ success() }} uses: actions/upload-artifact@v4 with: - name: examples + name: ps2gl-examples-${{ steps.slug.outputs.sha8 }} path: | - examples/**/*.elf - examples/**/*.gl - examples/**/*.rtx - examples/**/*.bin + build/examples/*.elf + build/examples/*.gl + build/examples/*.rtx + build/examples/*.bin diff --git a/.gitignore b/.gitignore index 8b1ecbd9..8d06bec3 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ prebuilddone *.a *.elf *.orig + +# CMake +build/ \ No newline at end of file diff --git a/CMAKE_BUILD.md b/CMAKE_BUILD.md new file mode 100644 index 00000000..42fa30da --- /dev/null +++ b/CMAKE_BUILD.md @@ -0,0 +1,195 @@ +# Building ps2gl with CMake + +This document describes how to build ps2gl using CMake instead of the traditional Makefile. + +## Prerequisites + +- PS2DEV environment installed and configured +- `PS2DEV` environment variable set +- CMake 3.13 or later +- VU1 development tools (optional - see note below): + - **Open-source tools (recommended):** `openvcl`, `masp`, `dvp-as` + - **Proprietary tools (legacy):** `vcl`, `gasp`, `dvp-as` + +**Note:** +- VU1 tools are only required for building VU1 renderers from source +- The build automatically detects and prefers open-source tools (`openvcl` + `masp`) over proprietary ones (`vcl` + `gasp`) +- If VU1 tools are not available (e.g., on macOS), the build will automatically use pre-built `.vo` object files from the `vu1/` directory +- Pre-built objects can be generated by running the build once with VU1 tools available +- **OpenVCL compatibility**: 11 out of 13 renderers compile successfully with openvcl. The `indexed` and `scei` renderers require proprietary `vcl` or use pre-built objects as they are incompatible with openvcl + +## Building + +### Basic Build + +```bash +mkdir build +cd build +cmake .. +make +``` + +### Debug Build + +To enable debug symbols and `_DEBUG` definition: + +```bash +cmake -DDEBUG=ON .. +make +``` + +### Building with Tests + +To build the test executables (when available): + +```bash +cmake -DBUILD_TESTS=ON .. +make +``` + +## Installing + +To install the library and headers to `$PS2SDK/ports`: + +```bash +make install +``` + +This will: +- Install `libps2gl.a` to `$PS2SDK/ports/lib/` +- Install GL headers to `$PS2SDK/ports/include/GL/` +- Install ps2gl headers to `$PS2SDK/ports/include/ps2gl/` + +## Configuration Options + +The following CMake options are available: + +| Option | Default | Description | +|--------|---------|-------------| +| `DEBUG` | OFF | Enable debug build with `_DEBUG` definition | +| `BUILD_TESTS` | OFF | Build test executables | + +## Build Flags + +The CMake build automatically applies the following flags: + +- `-DNO_VU0_VECTORS` - Disables VU0 vector code (currently broken) +- `-DNO_ASM` - Disables assembly optimizations +- `-Wno-strict-aliasing` - Suppresses strict aliasing warnings +- `-Wno-conversion-null` - Suppresses conversion null warnings + +## VU1 Renderer Pipeline + +ps2gl includes VU1 assembly renderers that go through a complex preprocessing pipeline: + +1. **Step 1**: Remove C preprocessor directives and fix include paths +2. **Step 2**: `gasp`/`masp` assembler preprocessing +3. **Step 3**: Array notation conversion +4. **Step 4**: C preprocessor with memory layout headers +5. **Step 5**: `vcl`/`openvcl` compiler generates `.vsm` files +6. **Step 6**: `dvp-as` assembler generates `.vo` object files + +The CMake build handles all these steps automatically for the following renderers: + +### OpenVCL Compatible (11 renderers): +- fast_nolights, fast +- general, general_quad, general_tri +- general_nospec, general_nospec_quad, general_nospec_tri +- general_pv_diff, general_pv_diff_quad, general_pv_diff_tri + +### Require proprietary VCL or use pre-built (2 renderers): +- indexed (uses variable naming incompatible with openvcl) +- scei + +## CMake Toolchain + +The build uses the PS2DEV CMake toolchain file located at: +``` +$PS2DEV/share/ps2dev.cmake +``` + +This toolchain file is automatically detected when `PS2DEV` is set. + +## Clean Build + +To perform a clean build: + +```bash +rm -rf build +mkdir build +cd build +cmake .. +make +``` + +## Comparison with Makefile Build + +The CMake build produces the same output as the traditional Makefile: +- Same compiler flags +- Same source files +- Same VU1 preprocessing pipeline +- Same install locations +- Compatible library format + +## Migration Notes + +The CMake build system was designed to be compatible with the existing Makefile build. Both build systems can coexist in the repository. + +### Key Differences: + +1. **Out-of-source builds**: CMake uses a separate `build/` directory +2. **Dependency tracking**: CMake automatically handles dependencies +3. **Parallel VU1 processing**: CMake can process multiple VU1 renderers in parallel +4. **Cross-platform**: CMake can generate build files for different build systems + +## Troubleshooting + +### PS2DEV not found + +If you get an error about PS2DEV not being set: + +```bash +export PS2DEV=/path/to/ps2dev +export PS2SDK=$PS2DEV/ps2sdk +``` + +### Toolchain file not found + +Make sure the toolchain file exists at `$PS2DEV/share/ps2dev.cmake`. + +### VU1 tools not found + +Make sure the VU1 tools are in your PATH. For open-source tools: + +```bash +which openvcl masp dvp-as +``` + +Or for proprietary tools: + +```bash +which vcl gasp dvp-as +``` + +The `dvp-as` tool should be installed as part of PS2DEV. For `openvcl` and `masp`: +- openvcl: https://github.com/ps2dev/openvcl +- masp: https://github.com/AzagraMac/masp + +### Build errors + +Try a clean build: + +```bash +rm -rf build +mkdir build +cd build +cmake .. +make +``` + +### VU1 preprocessing errors + +If VU1 preprocessing fails, check that: +1. All `.vcl` source files exist in `vu1/` +2. Memory layout headers exist: `vu1/vu1_mem_linear.h` and `vu1/vu1_mem_indexed.h` +3. VU1 tools are properly installed diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..ade200f4 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,339 @@ +cmake_minimum_required(VERSION 3.13) + +# Set the toolchain file before project() +if(NOT CMAKE_TOOLCHAIN_FILE) + if(DEFINED ENV{PS2DEV}) + set(CMAKE_TOOLCHAIN_FILE "$ENV{PS2DEV}/share/ps2dev.cmake" CACHE FILEPATH "Toolchain file") + else() + message(FATAL_ERROR "PS2DEV environment variable is not set. Please set it to your PS2DEV installation path.") + endif() +endif() + +project(ps2gl VERSION 1.0.0 LANGUAGES CXX C) + +# Options +option(BUILD_TESTS "Build test projects" OFF) +option(BUILD_GLUT "Build GLUT library" ON) +option(BUILD_EXAMPLES "Build example programs" OFF) +option(DEBUG "Enable debug build" OFF) + +# Check if PS2SDK is set (should be done by toolchain file) +if(NOT DEFINED PS2SDK) + message(FATAL_ERROR "PS2SDK is not defined. Make sure the toolchain file is loaded correctly.") +endif() + +# Set output library name +set(EE_LIB "libps2gl.a") + +# Include directories +include_directories( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/vu1 + ${PS2SDK}/ports/include +) + +# Link directories +link_directories( + ${PS2SDK}/ports/lib +) + +# Compiler flags +if(DEBUG) + add_compile_definitions(_DEBUG) +endif() + +# Warning flags (matching Makefile) +set(WARNING_FLAGS + -Wno-strict-aliasing + -Wno-conversion-null +) + +# VU0 code is broken so disable for now +add_compile_definitions( + NO_VU0_VECTORS + NO_ASM +) + +add_compile_options(${WARNING_FLAGS}) + +# ============================================================================ +# Source files +# ============================================================================ +set(PS2GL_SOURCES + src/base_renderer.cpp + src/clear.cpp + src/displaycontext.cpp + src/dlgmanager.cpp + src/dlist.cpp + src/drawcontext.cpp + src/gblock.cpp + src/glcontext.cpp + src/gmanager.cpp + src/gsmemory.cpp + src/immgmanager.cpp + src/indexed_renderer.cpp + src/inverse.cpp + src/lighting.cpp + src/linear_renderer.cpp + src/material.cpp + src/matrix.cpp + src/metrics.cpp + src/renderermanager.cpp + src/texture.cpp +) + +# ============================================================================ +# VU1 Renderers +# ============================================================================ +set(RENDERERS + fast_nolights + fast + general_nospec_quad + general_nospec_tri + general_nospec + general_pv_diff_quad + general_pv_diff_tri + general_pv_diff + general_quad + general_tri + general + indexed + scei +) + +# Check for VU1 tools availability +# Prefer open-source tools (openvcl + masp) over proprietary ones (vcl + gasp) +find_program(OPENVCL_FOUND openvcl) +find_program(MASP_FOUND masp) +find_program(VCL_FOUND vcl) +find_program(GASP_FOUND gasp) +find_program(DVP_AS_FOUND dvp-as) + +# Determine which tools to use +if(OPENVCL_FOUND AND MASP_FOUND AND DVP_AS_FOUND) + set(VU1_TOOLS_AVAILABLE TRUE) + set(VCL_TOOL ${OPENVCL_FOUND}) + set(GASP_TOOL ${MASP_FOUND}) + message(STATUS "VU1 tools found - using open-source tools (openvcl + masp)") +elseif(VCL_FOUND AND GASP_FOUND AND DVP_AS_FOUND) + set(VU1_TOOLS_AVAILABLE TRUE) + set(VCL_TOOL ${VCL_FOUND}) + set(GASP_TOOL ${GASP_FOUND}) + message(STATUS "VU1 tools found - using proprietary tools (vcl + gasp)") +else() + set(VU1_TOOLS_AVAILABLE FALSE) + message(STATUS "VU1 tools not found - will use pre-built VU1 objects from source tree") +endif() + +# VU1 preprocessing rules +# These implement the complex preprocessing pipeline from the Makefile: +# .vcl -> _pp1.vcl -> _pp2.vcl -> _pp3.vcl -> _pp4.vcl -> _vcl.vsm -> .vo + +set(VU1_OBJECTS) +foreach(RENDERER ${RENDERERS}) + set(VCL_SOURCE "${CMAKE_SOURCE_DIR}/vu1/${RENDERER}.vcl") + set(VO_SOURCE "${CMAKE_SOURCE_DIR}/vu1/${RENDERER}.vo") + set(PP1_FILE "${CMAKE_BINARY_DIR}/vu1/${RENDERER}_pp1.vcl") + set(PP2_FILE "${CMAKE_BINARY_DIR}/vu1/${RENDERER}_pp2.vcl") + set(PP3_FILE "${CMAKE_BINARY_DIR}/vu1/${RENDERER}_pp3.vcl") + set(PP4_FILE "${CMAKE_BINARY_DIR}/vu1/${RENDERER}_pp4.vcl") + set(VSM_FILE "${CMAKE_BINARY_DIR}/vu1/${RENDERER}_vcl.vsm") + set(VO_FILE "${CMAKE_BINARY_DIR}/vu1/${RENDERER}.vo") + + if(NOT VU1_TOOLS_AVAILABLE) + # Use pre-built .vo file from source tree + if(EXISTS ${VO_SOURCE}) + message(STATUS "Using pre-built VU1 object: ${RENDERER}.vo") + add_custom_command( + OUTPUT ${VO_FILE} + COMMAND ${CMAKE_COMMAND} -E copy ${VO_SOURCE} ${VO_FILE} + DEPENDS ${VO_SOURCE} + COMMENT "Copying pre-built VU1 object: ${RENDERER}.vo" + ) + list(APPEND VU1_OBJECTS ${VO_FILE}) + else() + message(WARNING "Pre-built VU1 object not found: ${VO_SOURCE}") + endif() + continue() + endif() + + # Check if this renderer is known to be incompatible with openvcl + # indexed renderer uses variable names that openvcl doesn't recognize + # Temporarily disabled to try compiling with openvcl + # if(OPENVCL_FOUND AND (RENDERER STREQUAL "indexed" OR RENDERER STREQUAL "scei")) + # if(EXISTS ${VO_SOURCE}) + # message(STATUS "Using pre-built VU1 object for ${RENDERER}.vo (openvcl incompatible)") + # add_custom_command( + # OUTPUT ${VO_FILE} + # COMMAND ${CMAKE_COMMAND} -E copy ${VO_SOURCE} ${VO_FILE} + # DEPENDS ${VO_SOURCE} + # COMMENT "Copying pre-built VU1 object: ${RENDERER}.vo" + # ) + # list(APPEND VU1_OBJECTS ${VO_FILE}) + # else() + # message(WARNING "Pre-built VU1 object not found: ${VO_SOURCE}") + # endif() + # continue() + # endif() + + # Step 1: .vcl -> _pp1.vcl (remove #include, #define, fix .include paths) + add_custom_command( + OUTPUT ${PP1_FILE} + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/vu1 + COMMAND ${CMAKE_COMMAND} -D INPUT=${VCL_SOURCE} -D OUTPUT=${PP1_FILE} -D STEP=pp1 -D SOURCE_DIR=${CMAKE_SOURCE_DIR} -P ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + DEPENDS ${VCL_SOURCE} ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + COMMENT "Preprocessing step 1: ${RENDERER}.vcl -> ${RENDERER}_pp1.vcl" + ) + + # Step 2: _pp1.vcl -> _pp2.vcl (gasp/masp preprocessor) + add_custom_command( + OUTPUT ${PP2_FILE} + COMMAND ${CMAKE_COMMAND} -D INPUT=${PP1_FILE} -D OUTPUT=${PP2_FILE} -D STEP=pp2 -D SOURCE_DIR=${CMAKE_SOURCE_DIR} -D GASP_TOOL=${GASP_TOOL} -P ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + DEPENDS ${PP1_FILE} ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + COMMENT "Preprocessing step 2: ${RENDERER}_pp1.vcl -> ${RENDERER}_pp2.vcl (${GASP_TOOL})" + ) + + # Step 3: _pp2.vcl -> _pp3.vcl (array notation conversion) + add_custom_command( + OUTPUT ${PP3_FILE} + COMMAND ${CMAKE_COMMAND} -D INPUT=${PP2_FILE} -D OUTPUT=${PP3_FILE} -D STEP=pp3 -P ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + DEPENDS ${PP2_FILE} ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + COMMENT "Preprocessing step 3: ${RENDERER}_pp2.vcl -> ${RENDERER}_pp3.vcl (array notation)" + ) + + # Step 4: _pp3.vcl -> _pp4.vcl (C preprocessor with memory layout) + # Different preprocessing for indexed renderer + if(RENDERER STREQUAL "indexed") + add_custom_command( + OUTPUT ${PP4_FILE} + COMMAND ${CMAKE_COMMAND} -D INPUT=${PP3_FILE} -D OUTPUT=${PP4_FILE} -D STEP=pp4 -D SOURCE_DIR=${CMAKE_SOURCE_DIR} -D COMPILER=${CMAKE_C_COMPILER} -D MEM_HEADER=${CMAKE_SOURCE_DIR}/vu1/vu1_mem_indexed.h -P ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + DEPENDS ${PP3_FILE} ${CMAKE_SOURCE_DIR}/vu1/vu1_mem_indexed.h ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + COMMENT "Preprocessing step 4: ${RENDERER}_pp3.vcl -> ${RENDERER}_pp4.vcl (indexed memory layout)" + ) + else() + add_custom_command( + OUTPUT ${PP4_FILE} + COMMAND ${CMAKE_COMMAND} -D INPUT=${PP3_FILE} -D OUTPUT=${PP4_FILE} -D STEP=pp4 -D SOURCE_DIR=${CMAKE_SOURCE_DIR} -D COMPILER=${CMAKE_C_COMPILER} -D MEM_HEADER=${CMAKE_SOURCE_DIR}/vu1/vu1_mem_linear.h -P ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + DEPENDS ${PP3_FILE} ${CMAKE_SOURCE_DIR}/vu1/vu1_mem_linear.h ${CMAKE_SOURCE_DIR}/cmake/preprocess_vu1.cmake + COMMENT "Preprocessing step 4: ${RENDERER}_pp3.vcl -> ${RENDERER}_pp4.vcl (linear memory layout)" + ) + endif() + + # Step 5: _pp4.vcl -> _vcl.vsm (vcl/openvcl compiler) + add_custom_command( + OUTPUT ${VSM_FILE} + COMMAND ${VCL_TOOL} -o${VSM_FILE} ${PP4_FILE} + DEPENDS ${PP4_FILE} + COMMENT "VCL compilation: ${RENDERER}_pp4.vcl -> ${RENDERER}_vcl.vsm (${VCL_TOOL})" + ) + + # Step 6: _vcl.vsm -> .vo (dvp assembler) + add_custom_command( + OUTPUT ${VO_FILE} + COMMAND dvp-as -o ${VO_FILE} ${VSM_FILE} + DEPENDS ${VSM_FILE} + COMMENT "DVP assembly: ${RENDERER}_vcl.vsm -> ${RENDERER}.vo" + ) + + # Mark the .vo file as an external object file so CMake includes it in the library + set_source_files_properties(${VO_FILE} PROPERTIES + EXTERNAL_OBJECT TRUE + GENERATED TRUE + ) + + list(APPEND VU1_OBJECTS ${VO_FILE}) +endforeach() + +# Create a custom target that depends on all VU1 objects +# This ensures all .vo files are built before they're added to the library +if(VU1_OBJECTS) + add_custom_target(vu1_objects ALL DEPENDS ${VU1_OBJECTS}) +endif() + +# ============================================================================ +# Build the library +# ============================================================================ +add_library(ps2gl STATIC ${PS2GL_SOURCES}) + +set_target_properties(ps2gl PROPERTIES + OUTPUT_NAME "ps2gl" + ARCHIVE_OUTPUT_NAME "ps2gl" +) + +target_include_directories(ps2gl PUBLIC + $ + $ +) + +# Add VU1 objects to the library after it's created +# CMake doesn't handle .vo files well, so we manually add them with ar +if(VU1_OBJECTS) + # Make ps2gl depend on the vu1_objects target + add_dependencies(ps2gl vu1_objects) + + add_custom_command(TARGET ps2gl POST_BUILD + COMMAND ${CMAKE_AR} qc $ ${VU1_OBJECTS} + COMMAND ${CMAKE_RANLIB} $ + COMMENT "Adding VU1 objects to library" + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + VERBATIM + ) +endif() + +# ============================================================================ +# Install targets +# ============================================================================ +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/${EE_LIB} + DESTINATION "${PS2SDK}/ports/lib" +) + +install( + DIRECTORY ${CMAKE_SOURCE_DIR}/include/GL + DESTINATION "${PS2SDK}/ports/include" + FILES_MATCHING PATTERN "*.h" +) + +install( + DIRECTORY ${CMAKE_SOURCE_DIR}/include/ps2gl + DESTINATION "${PS2SDK}/ports/include" + FILES_MATCHING PATTERN "*.h" +) + +# ============================================================================ +# Include GLUT if enabled +# ============================================================================ +if(BUILD_GLUT) + add_subdirectory(glut) +endif() + +# ============================================================================ +# Include examples if enabled +# ============================================================================ +if(BUILD_EXAMPLES) + add_subdirectory(examples) +endif() + +# ============================================================================ +# Include tests if enabled +# ============================================================================ +if(BUILD_TESTS) + add_subdirectory(tests) +endif() + +# ============================================================================ +# Print configuration summary +# ============================================================================ +message(STATUS "") +message(STATUS "ps2gl configuration:") +message(STATUS " Version: ${PROJECT_VERSION}") +message(STATUS " Debug build: ${DEBUG}") +message(STATUS " Build GLUT: ${BUILD_GLUT}") +message(STATUS " Build examples: ${BUILD_EXAMPLES}") +message(STATUS " Build tests: ${BUILD_TESTS}") +message(STATUS " Output library: ${EE_LIB}") +message(STATUS " Install prefix: ${PS2SDK}/ports") +message(STATUS " VU0 vectors: DISABLED") +message(STATUS " ASM optimizations: DISABLED") +message(STATUS " VU1 renderers: ${RENDERERS}") +message(STATUS "") diff --git a/Makefile b/Makefile deleted file mode 100644 index e18b72c0..00000000 --- a/Makefile +++ /dev/null @@ -1,98 +0,0 @@ -EE_LIB = libps2gl.a - -EE_LDFLAGS += -L. -L$(PS2SDK)/ports/lib -EE_INCS += -I./include -I./vu1 -I$(PS2SDK)/ports/include - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -EE_OBJS = \ - src/base_renderer.o \ - src/clear.o \ - src/displaycontext.o \ - src/dlgmanager.o \ - src/dlist.o \ - src/drawcontext.o \ - src/gblock.o \ - src/glcontext.o \ - src/gmanager.o \ - src/gsmemory.o \ - src/immgmanager.o \ - src/indexed_renderer.o \ - src/inverse.o \ - src/lighting.o \ - src/linear_renderer.o \ - src/material.o \ - src/matrix.o \ - src/metrics.o \ - src/renderermanager.o \ - src/texture.o - -RENDERERS = \ - fast_nolights \ - fast \ - general_nospec_quad \ - general_nospec_tri \ - general_nospec \ - general_pv_diff_quad \ - general_pv_diff_tri \ - general_pv_diff \ - general_quad \ - general_tri \ - general \ - indexed \ - scei - -EE_OBJS += $(addsuffix .vo, $(addprefix vu1/, $(RENDERERS))) - -VSM_SOURCES = $(addsuffix _vcl.vsm, $(addprefix vu1/, $(RENDERERS))) - -all: $(VSM_SOURCES) $(EE_LIB) - -install: all - mkdir -p $(PS2SDK)/ports/include - mkdir -p $(PS2SDK)/ports/lib - cp -rf include/GL $(PS2SDK)/ports/include - cp -rf include/ps2gl $(PS2SDK)/ports/include - cp -f $(EE_LIB) $(PS2SDK)/ports/lib - -clean: - rm -f $(EE_OBJS_LIB) $(EE_OBJS) $(EE_BIN) $(EE_LIB) - -realclean: clean - rm -rf $(PS2SDK)/ports/include/ps2gl - rm -f $(PS2SDK)/ports/lib/$(EE_LIB) - rm -f $(VSM_SOURCES) - -include $(PS2SDK)/Defs.make -include $(PS2SDK)/samples/Makefile.eeglobal - -%.vo: %_vcl.vsm - dvp-as -o $@ $< - -%_vcl.vsm: %_pp4.vcl - vcl -o$@ $< - -%indexed_pp4.vcl: %indexed_pp3.vcl - cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - - -%_pp4.vcl: %_pp3.vcl - cat $< | cc -E -P -imacros vu1/vu1_mem_linear.h -o $@ - - -%_pp3.vcl: %_pp2.vcl - cat $< | sed 's/\[\([0-9]\)\]/_\1/g ; s/\[\([w-zW-Z]\)\]/\1/g' - > $@ - -%_pp2.vcl: %_pp1.vcl - gasp -c ';' -Ivu1 -o $@ $< - -%_pp1.vcl: %.vcl - cat $< | sed 's/#include[ ]\+.\+// ; s/#define[ ]\+.\+// ; s|\(\.include[ ]\+\)"\([^/].\+\)"|\1"$( $@ diff --git a/Makefile.org b/Makefile.org deleted file mode 100644 index 681d9895..00000000 --- a/Makefile.org +++ /dev/null @@ -1,212 +0,0 @@ -########################################################################## -### Copyright (c) 1999, 2000, 2001 Sony Computer Entertainment America Inc. -### All rights reserved. -### -### Boilerplate Makefile by Bret Mogilefsky (mogul@playstation.sony.com) -### and Tyler Daniel (tyler_daniel@playstation.sony.com) -### -### Use this makefile as a template for new projects! -### -### General Features: -### -### Just specify SRCS and go! -### Automatic and minimal (fast!) dependency generation (for vu microcode as well) -### Allows keeping source and headers from src and include dirs, or elsewhere. -### Builds in a subdirectory. -### Allows additional defines, include dirs, and lib dirs without -### specifying -D, -I, and -L -### Easy to specify parallel builds (debug, optimized, release, etc) -### Easy to add flags on a per-file, per-build, or per-file-build basis -### Can specify parent projects to make first (libraries) -### Builds libraries -### Slices, dices, feeds your cat, calls your mum. -### -### VU microcode features: -### -### Generates depencies for microcode (for .include and #include) -### Uses a preprocessing script to manage registers (configurable) -### Runs the c preprocessor over microcode - you can use #define and #include -### freely (and share #defines with c/c++) -### Support for vcl -### -### Useful targets: -### -### run Run the executable. -### xrun Run the executable under a new xterminal. -### clean Remove everything we can rebuild. -### tags Generate source-browsing tags for Emacs. -### documentation Use doxygen to generate docs in docs/html/doxygen_generated -### -### Using builds: -### -### To specify a particular build include the name of the build anywhere on -### the command line: -### make xrun optimized, -### make clean optimized, etc. -### -### Included builds (add your own!): -### debug -### optimized (default) -### release -### -### CodeWarrior builds: -### cw_debug -### cw_optimized -### cw_release -### -### For more info see the "Build Options" section below -########################################################################## - - -########################################################################## -### Target -########################################################################## - - -# The name of the binary file we want to generate. Also handles libraries! (.a) -TARGET = libps2gl.a - - -########################################################################## -### Files and Paths - this is probably the only section you'll need to change -########################################################################## - - -# The source files for the project. -# get all cpp source files -SRCS += $(wildcard *.cpp) -SRCS += $(foreach DIR,$(SRCDIRS),$(subst $(DIR)/,,$(wildcard $(DIR)/*.cpp))) -# get all c source files -SRCS += $(wildcard *.c) -SRCS += $(foreach DIR,$(SRCDIRS),$(subst $(DIR)/,,$(wildcard $(DIR)/*.c))) -# get all _vcl.vsm source files -SRCS += $(wildcard *_vcl.vsm) -SRCS += $(foreach DIR,$(SRCDIRS),$(subst $(DIR)/,,$(wildcard $(DIR)/*_vcl.vsm))) - -# Additional objects to link. Only add things that aren't built from SRCS! -OBJS = - -# Additional libs to link with. (sce libs are listed in the section below) -LIBS = - -# Additional locations for header files -INCDIRS = include ../ps2stuff/include vu1 - -# Additional locations for library files -LIBDIRS = - -# Additional locations for source files -SRCDIRS = src vu1 - -# Object files and the target will be placed in this directory with an -# underscore and the buildname appended (e.g., for the "debug" build: objs_debug/) -OBJDIRBASE = objs - -# Dependency files will be placed in this directory with an underscore and -# the buildname appended (e.g., for the "debug" build: deps_debug/) -DEPDIRBASE = deps - -# If this project depends other projects (a ps2 rendering library for example) that should -# be built with make before making this one, list the directories here. -MAKEPARENTS = ../ps2stuff - -# Where to find PSX2 development stuff. -SCEDIR = $(PS2SDK) -PS2DEVDIR = $(PS2SDK) - -# Where to find the ps2stuff project -PS2STUFF = ../ps2stuff - -########################################################################## -### Common Options (shared across builds) -########################################################################## - -# Additional preprocessor definitions -DEFINES = - -# Compiler optimization options -OPTFLAGS = -fno-rtti -G 0 - -# Compiler debug options - -# enable all warnings -DEBUGFLAGS = -Wall # -Winline -# output assembly listings with c/c++ code -DEBUGFLAGS += -Wa,-alh -# This is not recommended as it generates slower code, but let's leave it -# as the default so that "*(u_long128*)&someVar" behaves as you expect. -# It would be better to remove this and not do the above (try templates). -DEBUGFLAGS += -fno-strict-aliasing -# for multithreading to work properly? -DEBUGFLAGS += -fno-common - -# Command-line arguments to be passed to the target when we run it -RUNARGS = - - -########################################################################## -### Build Options - applied per-build -########################################################################## - - -# use ps2stuff's build configuration -include $(PS2STUFF)/Makefile.builds - - -########################################################################## -### Per-file Options -########################################################################## - - -# Additional defines and include dirs can be specified on a per-file basis -# by prefixing with the stem of the filename. For example, if I wanted special flags -# for building mucilage.cpp, I could add any of the following -# mucilage_INCDIRS = someincdirs -# mucilage_LIBDIRS = somelibdirs -# mucilage_DEFINES = somedefs -# mucilage_OPTFLAGS = someoptflags -# mucilage_DEBUGFLAGS = somedebugflags - - -########################################################################## -### Per-file, per-build Options -########################################################################## - - -# Similar to above.. To apply special flags for building mucilage.cpp for -# the debug build, I could add any of the following -# mucilage_debug_INCDIRS = someincdirs -# mucilage_debug_LIBDIRS = somelibdirs -# mucilage_debug_DEFINES = somedefs -# mucilage_debug_OPTFLAGS = someoptflags -# mucilage_debug_DEBUGFLAGS = somedebugflags - - -########################################################################## -### Makefile operation -########################################################################## - - -# Set this to 1 to print status messages (like 'Compiling somefile.cpp...') -PRINT_MSGS = 1 - -# Set this to 1 to print the exact command lines used to build files -PRINT_CMDS = 0 - - -########################################################################## -### include the makefile that does all the work -########################################################################## - -include $(PS2STUFF)/Makefile.work - -ifeq ($(GCC_MAJOR),3) -DEBUGFLAGS += -Wno-deprecated -endif - -########################################################################## -### Rules for this project -########################################################################## - -documentation: - $(SILENCE)doxygen docs/doxygen.config diff --git a/cmake/preprocess_vu1.cmake b/cmake/preprocess_vu1.cmake new file mode 100644 index 00000000..d77b0245 --- /dev/null +++ b/cmake/preprocess_vu1.cmake @@ -0,0 +1,49 @@ +# VU1 preprocessing script +# Usage: cmake -D INPUT= -D OUTPUT= -D STEP= -D SOURCE_DIR= -D COMPILER= -D MEM_HEADER=
-P preprocess_vu1.cmake + +if(STEP STREQUAL "pp1") + # Step 1: Remove #include, #define, fix .include paths + execute_process( + COMMAND /bin/bash -c "cat ${INPUT} | sed -E 's/#include[[:space:]]+.+// ; s/#define[[:space:]]+.+// ; s|(\\.include[[:space:]]+)\\\"([^/].+)\\\"|\\1\\\"${SOURCE_DIR}/vu1/\\2\\\"|' > ${OUTPUT}" + RESULT_VARIABLE result + ) + if(NOT result EQUAL 0) + message(FATAL_ERROR "Step 1 preprocessing failed") + endif() + +elseif(STEP STREQUAL "pp2") + # Step 2: gasp/masp preprocessor + if(NOT DEFINED GASP_TOOL) + message(FATAL_ERROR "GASP_TOOL not defined") + endif() + execute_process( + COMMAND ${GASP_TOOL} -c ";" -I${SOURCE_DIR}/vu1 -o ${OUTPUT} ${INPUT} + RESULT_VARIABLE result + ) + if(NOT result EQUAL 0) + message(FATAL_ERROR "Step 2 preprocessing (${GASP_TOOL}) failed") + endif() + +elseif(STEP STREQUAL "pp3") + # Step 3: Array notation conversion + execute_process( + COMMAND /bin/bash -c "cat ${INPUT} | sed -E 's/\\[([0-9])\\]/_\\1/g ; s/\\[([w-zW-Z])\\]/\\1/g' > ${OUTPUT}" + RESULT_VARIABLE result + ) + if(NOT result EQUAL 0) + message(FATAL_ERROR "Step 3 preprocessing failed") + endif() + +elseif(STEP STREQUAL "pp4") + # Step 4: C preprocessor with memory layout + execute_process( + COMMAND /bin/bash -c "cat ${INPUT} | ${COMPILER} -E -P -I${SOURCE_DIR}/vu1 -imacros ${MEM_HEADER} -o ${OUTPUT} -" + RESULT_VARIABLE result + ) + if(NOT result EQUAL 0) + message(FATAL_ERROR "Step 4 preprocessing failed") + endif() + +else() + message(FATAL_ERROR "Unknown step: ${STEP}") +endif() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 00000000..ded2ac9e --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,107 @@ +# Examples for ps2gl + +# Shared code library used by multiple examples +add_library(shared_code STATIC + shared_code/text_stuff.cpp +) + +target_include_directories(shared_code PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/shared_code + ${CMAKE_SOURCE_DIR}/glut/include + ${CMAKE_SOURCE_DIR}/include + ${PS2SDK}/ports/include +) + +target_compile_options(shared_code PRIVATE + -Wno-strict-aliasing + -Wno-conversion-null +) + +target_compile_definitions(shared_code PRIVATE + NO_VU0_VECTORS + NO_ASM +) + +# Common function to add examples +function(add_ps2gl_example EXAMPLE_NAME SOURCE_FILE) + add_executable(${EXAMPLE_NAME} ${SOURCE_FILE}) + + target_include_directories(${EXAMPLE_NAME} PRIVATE + ${CMAKE_SOURCE_DIR}/glut/include + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/shared_code + ${PS2SDK}/ports/include + ) + + target_compile_options(${EXAMPLE_NAME} PRIVATE + -Wno-strict-aliasing + -Wno-conversion-null + ) + + target_compile_definitions(${EXAMPLE_NAME} PRIVATE + NO_VU0_VECTORS + NO_ASM + ) + + target_link_directories(${EXAMPLE_NAME} PRIVATE + ${PS2SDK}/ports/lib + ) + + target_link_libraries(${EXAMPLE_NAME} + shared_code + ps2glut + ps2gl + ps2stuff + pad + dma + ) + + set_target_properties(${EXAMPLE_NAME} PROPERTIES + OUTPUT_NAME "${EXAMPLE_NAME}.elf" + SUFFIX "" + ) + + # Strip the executable + add_custom_command(TARGET ${EXAMPLE_NAME} POST_BUILD + COMMAND ${CMAKE_STRIP} --strip-all $ + COMMENT "Stripping ${EXAMPLE_NAME}.elf" + ) + + # Copy data files to build directory + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${EXAMPLE_NAME}) + file(GLOB DATA_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/${EXAMPLE_NAME}/*.gl" + "${CMAKE_CURRENT_SOURCE_DIR}/${EXAMPLE_NAME}/*.rtx" + "${CMAKE_CURRENT_SOURCE_DIR}/${EXAMPLE_NAME}/*.bin" + ) + foreach(DATA_FILE ${DATA_FILES}) + get_filename_component(FILENAME ${DATA_FILE} NAME) + add_custom_command(TARGET ${EXAMPLE_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${DATA_FILE} + ${CMAKE_CURRENT_BINARY_DIR}/${FILENAME} + COMMENT "Copying ${FILENAME}" + ) + endforeach() + endif() +endfunction() + +# Add examples +add_ps2gl_example(box box/box.cpp) +add_ps2gl_example(logo logo/logo.cpp) +add_ps2gl_example(performance performance/performance.cpp) +# Note: tricked_out has linking issues (missing CBillboardRenderer symbols) +# add_ps2gl_example(tricked_out tricked_out/tricked_out.cpp) + +# NeHe tutorials +add_ps2gl_example(nehe_lesson02 nehe/lesson02/lesson2.cpp) +add_ps2gl_example(nehe_lesson03 nehe/lesson03/lesson3.cpp) +add_ps2gl_example(nehe_lesson04 nehe/lesson04/lesson4.cpp) +add_ps2gl_example(nehe_lesson05 nehe/lesson05/lesson5.cpp) + +message(STATUS "") +message(STATUS "ps2gl examples configured:") +message(STATUS " box, logo, performance") +message(STATUS " nehe: lesson02, lesson03, lesson04, lesson05") +message(STATUS " Note: tricked_out disabled due to linking issues") +message(STATUS "") diff --git a/examples/box/Makefile b/examples/box/Makefile deleted file mode 100644 index 0d17e465..00000000 --- a/examples/box/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -EE_BIN = box.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CXXFLAGS) -EE_OBJS = box.o ../shared_code/text_stuff.o -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - $(EE_STRIP) --strip-all $(EE_BIN) - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/logo/Makefile b/examples/logo/Makefile deleted file mode 100644 index f6380bb0..00000000 --- a/examples/logo/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -EE_BIN = logo.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CXXFLAGS) -EE_OBJS = logo.o ../shared_code/text_stuff.o -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/nehe/lesson02/Makefile b/examples/nehe/lesson02/Makefile deleted file mode 100644 index b8f62e33..00000000 --- a/examples/nehe/lesson02/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -EE_BIN = lesson02.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include $(EE_CXXFLAGS) -EE_OBJS = lesson2.o -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - $(EE_STRIP) --strip-all $(EE_BIN) - -testpc: - g++ lesson2.cpp -lglut -lGL -o lesson02 - ./lesson02 - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/nehe/lesson03/Makefile b/examples/nehe/lesson03/Makefile deleted file mode 100644 index 0e2ebbd4..00000000 --- a/examples/nehe/lesson03/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -EE_BIN = lesson03.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include $(EE_CXXFLAGS) -EE_OBJS = lesson3.o -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - $(EE_STRIP) --strip-all $(EE_BIN) - -testpc: - g++ lesson3.cpp -lglut -lGL -o lesson03 - ./lesson03 - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/nehe/lesson04/Makefile b/examples/nehe/lesson04/Makefile deleted file mode 100644 index bdf1982f..00000000 --- a/examples/nehe/lesson04/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -EE_BIN = lesson04.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include $(EE_CXXFLAGS) -EE_OBJS = lesson4.o -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - $(EE_STRIP) --strip-all $(EE_BIN) - -testpc: - g++ lesson4.cpp -lglut -lGL -o lesson04 - ./lesson04 - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/nehe/lesson05/Makefile b/examples/nehe/lesson05/Makefile deleted file mode 100644 index 9ae34e23..00000000 --- a/examples/nehe/lesson05/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -EE_BIN = lesson05.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include $(EE_CXXFLAGS) -EE_OBJS = lesson5.o -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - $(EE_STRIP) --strip-all $(EE_BIN) - -testpc: - g++ lesson5.cpp -lglut -lGL -o lesson05 - ./lesson05 - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/performance/Makefile b/examples/performance/Makefile deleted file mode 100644 index 31f35ecc..00000000 --- a/examples/performance/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -EE_BIN = performance.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CXXFLAGS) -EE_OBJS = performance.o ../shared_code/text_stuff.o -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma -lgs -lpacket -lgraph - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - $(EE_STRIP) --strip-all $(EE_BIN) - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/shared_code/Makefile b/examples/shared_code/Makefile deleted file mode 100644 index dc870298..00000000 --- a/examples/shared_code/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -TARGET = libps2gl_sample.a -MAKEPARENTS = $(PS2GL)/glut - -# all the examples share the same makefile, just with a different -# executable name.. -include ../shared_code/Makefile.examples \ No newline at end of file diff --git a/examples/shared_code/Makefile.examples b/examples/shared_code/Makefile.examples deleted file mode 100644 index db772a99..00000000 --- a/examples/shared_code/Makefile.examples +++ /dev/null @@ -1,250 +0,0 @@ -########################################################################## -### Copyright (c) 1999, 2000, 2001, 2002 Sony Computer Entertainment America Inc. -### All rights reserved. -### -### Boilerplate Makefile by Bret Mogilefsky (mogul@playstation.sony.com) -### and Tyler Daniel (tyler_daniel@playstation.sony.com) -### -### Use this makefile as a template for new projects! -### -### General Features: -### -### Just specify SRCS and go! -### Automatic and minimal (fast!) dependency generation (for vu microcode as well) -### Allows keeping source and headers from src and include dirs, or elsewhere. -### Builds in a subdirectory. -### Allows additional defines, include dirs, and lib dirs without -### specifying -D, -I, and -L -### Easy to specify parallel builds (debug, optimized, release, etc) -### Easy to add flags on a per-file, per-build, or per-file-build basis -### Can specify parent projects to make first (libraries) -### Builds libraries -### Slices, dices, feeds your cat, calls your mum. -### -### VU microcode features: -### -### Generates depencies for microcode (for .include and #include) -### Uses a preprocessing script to manage registers (configurable) -### Runs the c preprocessor over microcode - you can use #define and #include -### freely (and share #defines with c/c++) -### Support for vcl -### -### Useful targets: -### -### run Run the executable. -### xrun Run the executable under a new xterminal. -### clean Remove everything we can rebuild. -### tags Generate source-browsing tags for Emacs. -### -### Using builds: -### -### To specify a particular build include the name of the build anywhere on -### the command line: -### make xrun optimized, -### make clean optimized, etc. -### -### Included builds (add your own!): -### debug -### optimized (default) -### release -### -### For more info see the "Build Options" section below -########################################################################## - - -########################################################################## -### Target -########################################################################## - - -# this is specified in the Makefiles that include this one - -# The name of the binary file we want to generate. Also handles libraries! (.a) -# TARGET = - - -########################################################################## -### Files and Paths - this is probably the only section you'll need to change -########################################################################## - - -# The source files for the project. -# get all cpp source files -SRCS += $(wildcard *.cpp) -SRCS += $(foreach DIR,$(SRCDIRS),$(subst $(DIR)/,,$(wildcard $(DIR)/*.cpp))) -# get all c source files -SRCS += $(wildcard *.c) -SRCS += $(foreach DIR,$(SRCDIRS),$(subst $(DIR)/,,$(wildcard $(DIR)/*.c))) - -# Additional objects to link. Only add things that aren't built from SRCS! -OBJS += - -# Additional libs to link with. (sce libs are listed in the section below) -LIBS += ps2glut ps2gl ps2stuff - -# Where to find the ps2stuff project -PS2STUFF = ../../../ps2stuff - -# ... and ps2gl -PS2GL = ../../ - -# Additional locations for header files -INCDIRS += $(PS2GL)/include/ -INCDIRS += $(PS2GL)/glut/include -INCDIRS += $(PS2STUFF)/include -INCDIRS += ../shared_code - -# Additional locations for library files -LIBDIRS = $(PS2GL)/objs_$(BUILDNAME) -LIBDIRS += $(PS2GL)/glut/objs_$(BUILDNAME) -LIBDIRS += $(PS2STUFF)/objs_$(BUILDNAME) -LIBDIRS += ../shared_code/objs_$(BUILDNAME) # shared sample code - -# Additional locations for source files -SRCDIRS = - -# Object files and the target will be placed in this directory with an -# underscore and the buildname appended (e.g., for the "debug" build: objs_debug/) -OBJDIRBASE = objs - -# Dependency files will be placed in this directory with an underscore and -# the buildname appended (e.g., for the "debug" build: deps_debug/) -DEPDIRBASE = deps - -# If this project depends other projects (a rendering library for example) that should -# be built with make before making this one, list the directories here. -ifndef MAKEPARENTS -MAKEPARENTS = ../shared_code -endif - -# Where to find PSX2 development stuff. -SCEDIR = /usr/local/sce -PS2DEVDIR = /usr/local/ps2 - -########################################################################## -### Common Options (shared across builds) -########################################################################## - -# not many options are shared across builds as this makefile is shared -# between native/cross-compiled, linux/cross-compiled, and linux - -# Additional preprocessor definitions -DEFINES = - -# Compiler optimization options -OPTFLAGS = -fno-rtti -G 0 - -# Compiler debug options - -# enable all warnings -DEBUGFLAGS = -Wall # -Winline -# output assembly listings with c/c++ code -DEBUGFLAGS += -Wa,-alh -# This is not recommended as it generates slower code, but let's leave it -# as the default so that "*(u_long128*)&someVar" behaves as you expect. -# It would be better to remove this and not do the above (try templates). -DEBUGFLAGS += -fno-strict-aliasing -# for multithreading to work properly? -DEBUGFLAGS += -fno-common - -# Command-line arguments to be passed to the target when we run it -RUNARGS = - - -########################################################################## -### Build Options - applied per-build -########################################################################## - - -# use ps2stuff's build configuration -include $(PS2STUFF)/Makefile.builds - -# since ps2stuff is a library, it's builds do not link to other libs... - -# link against sce libraries for the native builds - -SCE_LIBS += graph dma dev pkt vu0 pad cdvd ipu kernl lout mc mpeg msin mtap pc sdr ssyn -SCE_LIBDIRS += $(SCEDIR)/ee/lib - -debug_LIBS += $(SCE_LIBS) -debug_LIBDIRS += $(SCE_LIBDIRS) -optimized_LIBS += $(SCE_LIBS) -optimized_LIBDIRS += $(SCE_LIBDIRS) -release_LIBS += $(SCE_LIBS) -release_LIBDIRS += $(SCE_LIBDIRS) -cdrom_LIBS += $(SCE_LIBS) -cdrom_LIBDIRS += $(SCE_LIBDIRS) -perf_LIBS += $(SCE_LIBS) -perf_LIBDIRS += $(SCE_LIBDIRS) -debug_no_vu0_LIBS += $(SCE_LIBS) -debug_no_vu0_LIBDIRS += $(SCE_LIBDIRS) -optimized_no_vu0_LIBS += $(SCE_LIBS) -optimized_no_vu0_LIBDIRS += $(SCE_LIBDIRS) -release_no_vu0_LIBS += $(SCE_LIBS) -release_no_vu0_LIBDIRS += $(SCE_LIBDIRS) - -# link against ps2dev for linux builds - -LIN_LIBS = ps2dev - -linux_LIBS += $(LIN_LIBS) -linux_debug_LIBS += $(LIN_LIBS) -linux_release_LIBS += $(LIN_LIBS) - -# cross-compiled linux builds - -cross_linux_LIBS += $(LIN_LIBS) -# libs from linux -cross_linux_LIBDIRS += /usr/local/ps2/mipsEEel-linux/lib - - -########################################################################## -### Per-file Options -########################################################################## - - -# Additional defines and include dirs can be specified on a per-file basis -# by prefixing with the stem of the filename. For example, if I wanted special flags -# for building mucilage.cpp, I could add any of the following -# mucilage_INCDIRS = someincdirs -# mucilage_LIBDIRS = somelibdirs -# mucilage_DEFINES = somedefs -# mucilage_OPTFLAGS = someoptflags -# mucilage_DEBUGFLAGS = somedebugflags - - -########################################################################## -### Per-file, per-build Options -########################################################################## - - -# Similar to above.. To apply special flags for building mucilage.cpp for -# the debug build, I could add any of the following -# mucilage_debug_INCDIRS = someincdirs -# mucilage_debug_LIBDIRS = somelibdirs -# mucilage_debug_DEFINES = somedefs -# mucilage_debug_OPTFLAGS = someoptflags -# mucilage_debug_DEBUGFLAGS = somedebugflags - - -########################################################################## -### Makefile operation -########################################################################## - - -# Set this to 1 to print status messages (like 'Compiling somefile.cpp...') -PRINT_MSGS = 1 - -# Set this to 1 to print the exact command lines used to build files -PRINT_CMDS = 0 - - -########################################################################## -### include the makefile that does all the work -########################################################################## - -include $(PS2STUFF)/Makefile.work - -ifeq ($(GCC_MAJOR),3) -DEBUGFLAGS += -Wno-deprecated -endif diff --git a/examples/tricked_out/Makefile b/examples/tricked_out/Makefile deleted file mode 100644 index b08cfbd2..00000000 --- a/examples/tricked_out/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -EE_BIN = tricked_out.elf -EE_CFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ -I../../vu1 $(EE_CFLAGS) -EE_CXXFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ -I../../vu1 $(EE_CXXFLAGS) -EE_OBJS = tricked_out.o billboard_renderer.o ../shared_code/text_stuff.o billboard_renderer.vo -EE_LDFLAGS += -L$(PS2SDK)/ports/lib -EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -all: $(EE_BIN) - $(EE_STRIP) --strip-all $(EE_BIN) - -clean: - rm -f $(EE_BIN) $(EE_OBJS) - -run: $(EE_BIN) - ps2client -h 192.168.1.10 execee host:$(EE_BIN) - -reset: - ps2client -h 192.168.1.10 reset - -sim: $(EE_BIN) - PCSX2 --elf=$(PWD)/$(EE_BIN) - -include $(PS2SDK)/samples/Makefile.pref -include $(PS2SDK)/samples/Makefile.eeglobal_cpp - -%.vo: %_vcl.vsm - dvp-as -o $@ $< diff --git a/glut/CMakeLists.txt b/glut/CMakeLists.txt new file mode 100644 index 00000000..a58440f2 --- /dev/null +++ b/glut/CMakeLists.txt @@ -0,0 +1,86 @@ +cmake_minimum_required(VERSION 3.13) + +# GLUT library for ps2gl +project(ps2glut VERSION 1.0.0 LANGUAGES CXX C) + +# Set output library name +set(EE_LIB "libps2glut.a") + +# Include directories +include_directories( + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${PS2SDK}/ports/include +) + +# Link directories +link_directories( + ${PS2SDK}/ports/lib +) + +# Compiler flags +if(DEBUG) + add_compile_definitions(_DEBUG) +endif() + +# Warning flags (matching Makefile) +set(WARNING_FLAGS + -Wno-strict-aliasing + -Wno-conversion-null +) + +# VU0 code is broken so disable for now +add_compile_definitions( + NO_VU0_VECTORS + NO_ASM +) + +add_compile_options(${WARNING_FLAGS}) + +# ============================================================================ +# Source files +# ============================================================================ +set(PS2GLUT_SOURCES + src/glut_font_image.cpp + src/pads.cpp + src/ps2glut.cpp +) + +# ============================================================================ +# Build the library +# ============================================================================ +add_library(ps2glut STATIC ${PS2GLUT_SOURCES}) + +set_target_properties(ps2glut PROPERTIES + OUTPUT_NAME "ps2glut" + ARCHIVE_OUTPUT_NAME "ps2glut" +) + +target_include_directories(ps2glut PUBLIC + $ + $ +) + +# ============================================================================ +# Install targets +# ============================================================================ +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/${EE_LIB} + DESTINATION "${PS2SDK}/ports/lib" +) + +install( + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/GL + DESTINATION "${PS2SDK}/ports/include" + FILES_MATCHING PATTERN "*.h" +) + +# ============================================================================ +# Print configuration summary +# ============================================================================ +message(STATUS "") +message(STATUS "ps2glut configuration:") +message(STATUS " Version: ${PROJECT_VERSION}") +message(STATUS " Debug build: ${DEBUG}") +message(STATUS " Output library: ${EE_LIB}") +message(STATUS " Install prefix: ${PS2SDK}/ports") +message(STATUS "") diff --git a/glut/Makefile b/glut/Makefile deleted file mode 100644 index 7587e6a7..00000000 --- a/glut/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -EE_LIB = libps2glut.a - -EE_LDFLAGS += -L. -L$(PS2SDK)/ports/lib -EE_INCS += -I./include -I$(PS2SDK)/ports/include - -ifeq ($(DEBUG), 1) - EE_CFLAGS += -D_DEBUG - EE_CXXFLAGS += -D_DEBUG -endif - -# Disabling warnings -WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null - -# VU0 code is broken so disable for now -EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM -EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM - -EE_OBJS = \ - src/glut_font_image.o \ - src/pads.o \ - src/ps2glut.o - -all: $(EE_LIB) - -install: all - mkdir -p $(PS2SDK)/ports/include - mkdir -p $(PS2SDK)/ports/lib - cp -rf include/GL $(PS2SDK)/ports/include - cp -f $(EE_LIB) $(PS2SDK)/ports/lib - -clean: - rm -f $(EE_OBJS_LIB) $(EE_OBJS) $(EE_BIN) $(EE_LIB) - -realclean: clean - rm -f $(PS2SDK)/ports/lib/$(EE_LIB) - -include $(PS2SDK)/Defs.make -include $(PS2SDK)/samples/Makefile.eeglobal diff --git a/glut/Makefile.org b/glut/Makefile.org deleted file mode 100644 index 84e88f58..00000000 --- a/glut/Makefile.org +++ /dev/null @@ -1,215 +0,0 @@ -########################################################################## -### Copyright (c) 1999, 2000 Sony Computer Entertainment America Inc. -### All rights reserved. -### -### Boilerplate Makefile by Bret Mogilefsky (mogul@playstation.sony.com) -### and Tyler Daniel (tyler_daniel@playstation.sony.com) -### -### Use this makefile as a template for new projects! -### -### General Features: -### -### Just specify SRCS and go! -### Automatic and minimal (fast!) dependency generation (for vu microcode as well) -### Allows keeping source and headers from src and include dirs, or elsewhere. -### Builds in a subdirectory. -### Allows additional defines, include dirs, and lib dirs without -### specifying -D, -I, and -L -### Easy to specify parallel builds (debug, optimized, release, etc) -### Easy to add flags on a per-file, per-build, or per-file-build basis -### Can specify parent projects to make first (libraries) -### Builds libraries -### Slices, dices, feeds your cat, calls your mum. -### -### VU microcode features: -### -### Generates depencies for microcode (for .include and #include) -### Uses a preprocessing script to manage registers (configurable) -### Runs the c preprocessor over microcode - you can use #define and #include -### freely (and share #defines with c/c++) -### Support for vcl -### -### Useful targets: -### -### run Run the executable. -### xrun Run the executable under a new xterminal. -### clean Remove everything we can rebuild. -### tags Generate source-browsing tags for Emacs. -### -### Using builds: -### -### To specify a particular build include the name of the build anywhere on -### the command line: -### make xrun optimized, -### make clean optimized, etc. -### -### Included builds (add your own!): -### debug -### optimized (default) -### release -### -### For more info see the "Build Options" section below -########################################################################## - - -########################################################################## -### Target -########################################################################## - - -# The name of the binary file we want to generate. Also handles libraries! (.a) -TARGET = libps2glut.a - - -########################################################################## -### Files and Paths - this is probably the only section you'll need to change -########################################################################## - - -# The source files for the project. -# get all cpp source files -SRCS += $(wildcard *.cpp) -SRCS += $(foreach DIR,$(SRCDIRS),$(subst $(DIR)/,,$(wildcard $(DIR)/*.cpp))) -# get all c source files -SRCS += $(wildcard *.c) -SRCS += $(foreach DIR,$(SRCDIRS),$(subst $(DIR)/,,$(wildcard $(DIR)/*.c))) - -# Additional objects to link. Only add things that aren't built from SRCS! -OBJS = - -# Additional libs to link with. (sce libs are listed in the section below) -LIBS = - -# Additional locations for header files -INCDIRS = include ../include/ ../../ps2stuff/include - -# Additional locations for library files -LIBDIRS = - -# Additional locations for source files -SRCDIRS = - -# Object files and the target will be placed in this directory with an -# underscore and the buildname appended (e.g., for the "debug" build: objs_debug/) -OBJDIRBASE = objs - -# Dependency files will be placed in this directory with an underscore and -# the buildname appended (e.g., for the "debug" build: deps_debug/) -DEPDIRBASE = deps - -# If this project depends other projects (a ps2 rendering library for example) that should -# be built with make before making this one, list the directories here. -MAKEPARENTS = ../ - -# Where to find PSX2 development stuff. -SCEDIR = $(PS2SDK) -PS2DEVDIR = $(PS2SDK) - -# Where to find the ps2stuff project -PS2STUFF = ../../ps2stuff - -########################################################################## -### Common Options (shared across builds) -########################################################################## - -# Additional preprocessor definitions -DEFINES = - -# Compiler optimization options -OPTFLAGS = -fno-rtti -fno-exceptions -G 0 - -# Compiler debug options - -# enable all warnings -DEBUGFLAGS = -Wall -# output assembly listings with c/c++ code -DEBUGFLAGS += -Wa,-alh -# This is not recommended as it generates slower code, but let's leave it -# as the default so that "*(u_long128*)&someVar" behaves as you expect. -# It would be better to remove this and not do the above (try templates). -DEBUGFLAGS += -fno-strict-aliasing -# for multithreading to work properly? -DEBUGFLAGS += -fno-common - -# Command-line arguments to be passed to the target when we run it -RUNARGS = - - -########################################################################## -### Build Options - applied per-build -########################################################################## - - -# use ps2stuff's build configuration -include $(PS2STUFF)/Makefile.builds - -# we want to use source from 'ps2/' for ps2 (native) builds and source -# from 'linux/' for linux builds.. - -debug_SRCDIRS += ps2 -optimized_SRCDIRS += ps2 -release_SRCDIRS += ps2 -cdrom_SRCDIRS += ps2 -perf_SRCDIRS += ps2 -debug_no_vu0_SRCDIRS += ps2 -optimized_no_vu0_SRCDIRS += ps2 -release_no_vu0_SRCDIRS += ps2 - -# linux - -linux_SRCDIRS += linux -linux_debug_SRCDIRS += linux -linux_release_SRCDIRS += linux -cross_linux_SRCDIRS += linux - - -########################################################################## -### Per-file Options -########################################################################## - - -# Additional defines and include dirs can be specified on a per-file basis -# by prefixing with the stem of the filename. For example, if I wanted special flags -# for building mucilage.cpp, I could add any of the following -# mucilage_INCDIRS = someincdirs -# mucilage_LIBDIRS = somelibdirs -# mucilage_DEFINES = somedefs -# mucilage_OPTFLAGS = someoptflags -# mucilage_DEBUGFLAGS = somedebugflags - - -########################################################################## -### Per-file, per-build Options -########################################################################## - - -# Similar to above.. To apply special flags for building mucilage.cpp for -# the debug build, I could add any of the following -# mucilage_debug_INCDIRS = someincdirs -# mucilage_debug_LIBDIRS = somelibdirs -# mucilage_debug_DEFINES = somedefs -# mucilage_debug_OPTFLAGS = someoptflags -# mucilage_debug_DEBUGFLAGS = somedebugflags - - -########################################################################## -### Makefile operation -########################################################################## - - -# Set this to 1 to print status messages (like 'Compiling somefile.cpp...') -PRINT_MSGS = 1 - -# Set this to 1 to print the exact command lines used to build files -PRINT_CMDS = 0 - - -########################################################################## -### include the makefile that does all the work -########################################################################## - -include $(PS2STUFF)/Makefile.work - -ifeq ($(GCC_MAJOR),3) -DEBUGFLAGS += -Wno-deprecated -endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..f0a211e4 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,4 @@ +# Tests for ps2gl +# TODO: Add test executables here when test sources are available + +message(STATUS "Tests directory configured (no tests defined yet)") diff --git a/vu1/fast_nolights_vcl.vsm b/vu1/fast_nolights_vcl.vsm deleted file mode 100644 index 80532a0a..00000000 --- a/vu1/fast_nolights_vcl.vsm +++ /dev/null @@ -1,179 +0,0 @@ -; === __LP__ EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: -; === ldumb : optimal=12 clid=0 mlid=2 size=(12) -; === normal1 : optimal=12 clid=0 mlid=3 size=(12) -; === vuta : optimal=12 clid=0 mlid=2 size=(12) -; === dUp : optimal=12 clid=0 mlid=3 size=(12) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmFastNoLights_CodeStart - .global vsmFastNoLights_CodeEnd -vsmFastNoLights_CodeStart: -__v_vu1_fast_nolights_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 27 [27 0] 28 [__v_vu1_fast_nolights_pp4_vcl_4] - sub VF07,VF00,VF00 lq.w VF08,60(VI00) - sub VF06,VF00,VF00 lq.w VF01,57(VI00) - sub VF05,VF00,VF00 lq VF02,62(VI00) - maxw.x VF07,VF07,VF00w loi 0x44fff000 ; STALL_LATENCY ?1 - addi.xy VF08,VF00,I loi 0x43000000 - muli.w VF01,VF08,I mr32.z VF08,VF01 - maxw.y VF06,VF06,VF00w NOP - maxw.z VF05,VF05,VF00w lq.xyz VF01,58(VI00) - mulax ACC,VF07,VF02x loi 0x437f0000 - minii.w VF01,VF01,I move.xyz VF08,VF08 - max.w VF08,VF00,VF00 NOP - madday ACC,VF06,VF02y lq VF03,63(VI00) - maddaz ACC,VF05,VF02z NOP - ftoi0 VF01,VF01 NOP - maddw VF02,VF08,VF02w NOP - mulax ACC,VF07,VF03x lq VF04,64(VI00) - madday ACC,VF06,VF03y NOP - maddaz ACC,VF05,VF03z NOP - maddw VF03,VF08,VF03w NOP - mulax ACC,VF07,VF04x lq VF09,65(VI00) - madday ACC,VF06,VF04y NOP - maddaz ACC,VF05,VF04z NOP - maddw VF04,VF08,VF04w NOP - mulax ACC,VF07,VF09x NOP - madday ACC,VF06,VF09y NOP - maddaz[E] ACC,VF05,VF09z NOP - maddw VF05,VF08,VF09w NOP -main_loop_lid: -; _LNOPT_w=[ normal2 ] 19 [19 0] 19 [main_loop_lid] - NOP xtop VI01 - NOP lq VF06,75(VI00) - NOP ilw.x VI04,0(VI01) - NOP iaddiu VI06,VI01,0x00000001 - NOP iaddiu VI02,VI01,0x00000005 - NOP mtir VI03,VF06x - NOP iadd VI05,VI02,VI04 - NOP ior VI03,VI03,VI04 - NOP iadd VI05,VI05,VI04 - NOP mfir.x VF06,VI03 - NOP iaddiu VI03,VI01,0 - NOP iadd VI04,VI05,VI04 - NOP iaddiu VI05,VI01,0x00000005 - NOP sq VF06,236(VI03) - NOP iaddiu VI07,VI06,0x00000004 - NOP iaddiu VI08,VI00,0x000003ff - NOP iaddiu VI09,VI00,0x00000800 - NOP iaddiu VI10,VI00,0x00000400 - NOP iaddiu VI11,VI00,0x00000020 -adcLoop_lid: -; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] - NOP lq VF06,0(VI06) - ftoi0 VF06,VF06 NOP ; STALL_LATENCY ?3 - NOP mtir VI12,VF06x ; STALL_LATENCY ?3 - NOP iand VI13,VI12,VI10 - NOP NOP - NOP ibeq VI13,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP NOP -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_nolights_pp4_vcl_8] - NOP iand VI14,VI12,VI08 - NOP mtir VI13,VF06y - NOP iand VI12,VI12,VI09 - NOP iadd VI14,VI14,VI05 - NOP iand VI15,VI13,VI10 - NOP isw.w VI11,0(VI14) - NOP ibeq VI15,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP isw.w VI12,3(VI14) -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_nolights_pp4_vcl_9] - NOP iand VI14,VI13,VI08 - NOP mtir VI12,VF06z - NOP iand VI13,VI13,VI09 - NOP iadd VI14,VI14,VI05 - NOP iand VI15,VI12,VI10 - NOP isw.w VI11,0(VI14) - NOP ibeq VI15,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP isw.w VI13,3(VI14) -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_nolights_pp4_vcl_10] - NOP iand VI14,VI12,VI08 - NOP mtir VI13,VF06w - NOP iand VI12,VI12,VI09 - NOP iadd VI14,VI14,VI05 - NOP iand VI15,VI13,VI10 - NOP isw.w VI11,0(VI14) - NOP ibeq VI15,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP isw.w VI12,3(VI14) -; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_fast_nolights_pp4_vcl_11] - NOP iand VI12,VI13,VI08 - NOP iaddiu VI06,VI06,0x00000001 - NOP iand VI13,VI13,VI09 - NOP iadd VI12,VI12,VI05 - NOP isw.w VI11,0(VI12) - NOP ibne VI06,VI07,adcLoop_lid - NOP isw.w VI13,3(VI12) -EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT: -; _LNOPT_w=[ ] 8 [12 0] 12 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT] - NOP lq.xyz VF06,0(VI02) - mulax ACC,VF02,VF06x ilw.w VI05,0(VI02) ; STALL_LATENCY ?3 - madday ACC,VF03,VF06y NOP - maddaz ACC,VF04,VF06z sq VF01,238(VI03) - maddw VF06,VF05,VF00w iaddiu VI02,VI02,0x00000003 - NOP iaddiu VI03,VI03,0 - NOP ibeq VI02,VI04,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI1 - NOP div Q,VF00w,VF06w ; STALL_LATENCY ?1 -; _LNOPT_w=[ ] 9 [12 0] 12 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__PRO1] - max.xyz VF08,VF06,VF06 lq.xyz VF10,0(VI02) - mulax ACC,VF02,VF10x sq VF01,241(VI03) ; STALL_LATENCY ?3 - madday ACC,VF03,VF10y lq.xyz VF07,-1(VI02) - maddaz ACC,VF04,VF10z iaddiu VI02,VI02,0x00000003 - maddw VF06,VF05,VF00w iaddiu VI06,VI05,0x00007fff - mulq.xyz VF10,VF08,Q iaddiu VI03,VI03,0x000000f3 - NOP ilw.w VI05,-3(VI02) - NOP ibeq VI02,VI04,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI0 - mulq.xyz VF07,VF07,Q div Q,VF00w,VF06w -EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 12 [12 12] 12 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP] - ftoi4.xyz VF10,VF10 lq.xyz VF08,0(VI02) - NOP mfir.w VF10,VI06 - NOP sq VF01,1(VI03) - max.xyz VF09,VF06,VF06 sq.xyz VF07,-6(VI03) - mulax ACC,VF02,VF08x lq.xyz VF07,-1(VI02) - madday ACC,VF03,VF08y sq VF10,-4(VI03) - maddaz ACC,VF04,VF08z iaddiu VI02,VI02,0x00000003 - maddw VF06,VF05,VF00w iaddiu VI06,VI05,0x00007fff - mulq.xyz VF10,VF09,Q iaddiu VI03,VI03,0x00000003 - NOP ilw.w VI05,-3(VI02) - NOP ibne VI02,VI04,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP - mulq.xyz VF07,VF07,Q div Q,VF00w,VF06w -EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI0: -; _LNOPT_w=[ ] 12 [13 0] 15 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI0] - NOP NOP - ftoi4.xyz VF10,VF10 NOP - max.xyz VF06,VF06,VF06 mfir.w VF10,VI06 - NOP sq.xyz VF07,-6(VI03) - NOP lq.xyz VF07,-1(VI02) - mulq.xyz VF10,VF06,Q sq VF10,-4(VI03) ; STALL_LATENCY ?1 - mulq.xyz VF07,VF07,Q iaddiu VI06,VI05,0x00007fff ; STALL_LATENCY ?1 - NOP mfir.w VF10,VI06 - ftoi4.xyz VF10,VF10 NOP - NOP sq.xyz VF07,-3(VI03) ; STALL_LATENCY ?1 - NOP b EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EXIT_POINT - NOP sq VF10,-1(VI03) -EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI1: -; _LNOPT_w=[ ] 8 [13 0] 15 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI1] - NOP NOP - NOP NOP - max.xyz VF07,VF06,VF06 lq.xyz VF06,-1(VI02) - mulq.xyz VF08,VF07,Q iaddiu VI05,VI05,0x00007fff ; STALL_LATENCY ?3 - mulq.xyz VF07,VF06,Q mfir.w VF06,VI05 - ftoi4.xyz VF06,VF08 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF07,237(VI03) - NOP sq VF06,239(VI03) ; STALL_LATENCY ?2 -EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_fast_nolights_pp4_vcl_15] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmFastNoLights_CodeEnd: -; iCount=139 -; register stats: -; 16 VU User integer -; 11 VU User floating point diff --git a/vu1/fast_vcl.vsm b/vu1/fast_vcl.vsm deleted file mode 100644 index da5e9417..00000000 --- a/vu1/fast_vcl.vsm +++ /dev/null @@ -1,257 +0,0 @@ -; === __LP__ EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: -; === ldumb : optimal=16 clid=0 mlid=3 size=(16) -; === normal1 : optimal=16 clid=0 mlid=2 size=(16) -; === vuta : optimal=16 clid=0 mlid=3 size=(16) -; === dUp : optimal=16 clid=0 mlid=2 size=(16) -; === normal : optimal=16 clid=0 mlid=3 size=(16) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmFast_CodeStart - .global vsmFast_CodeEnd -vsmFast_CodeStart: -__v_vu1_fast_pp4_vcl_4: -; _LNOPT_w=[ normal ] 10 [10 0] 10 [__v_vu1_fast_pp4_vcl_4] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0 - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - max.xyz VF06,VF00,VF00 lq VF03,64(VI00) - max.xyz VF07,VF00,VF00 lq VF04,65(VI00) - max.xyz VF08,VF00,VF00 lq.xyz VF05,58(VI00) - max.xyz VF09,VF00,VF00 ilw.x VI03,1(VI02) - max.xyz VF10,VF00,VF00 ibeq VI01,VI00,finish_init_lid - max.xyz VF11,VF00,VF00 lq.xyz VF12,59(VI00) -; _LNOPT_w=[ vuta1 ] 12 [9 0] 12 [__v_vu1_fast_pp4_vcl_5] - NOP lq.xyz VF14,67(VI00) - NOP lq.xyz VF06,3(VI03) - NOP lq.xyz VF15,68(VI00) - NOP lq.xyz VF17,0(VI03) - NOP lq.xyz VF09,1(VI03) - NOP lq.xyz VF16,69(VI00) - mulax.xyz ACC,VF14,VF06x lq.xyz VF13,60(VI00) - mul.xyz VF17,VF12,VF17 NOP - madday.xyz ACC,VF15,VF06y isubiu VI04,VI01,0x00000001 - maddz.xyz VF06,VF16,VF06z NOP - mul.xyz VF09,VF09,VF13 ibeq VI04,VI00,finish_init_lid - add.xyz VF05,VF05,VF17 NOP -; _LNOPT_w=[ another ] 10 [14 0] 15 [__v_vu1_fast_pp4_vcl_6] - NOP iaddiu VI01,VI02,0 - NOP ilw.x VI02,2(VI01) - NOP lq.xyz VF07,3(VI02) ; STALL_LATENCY ?3 - NOP lq.xyz VF17,0(VI02) - mulax.xyz ACC,VF14,VF07x lq.xyz VF10,1(VI02) ; STALL_LATENCY ?2 - mul.xyz VF17,VF12,VF17 NOP - madday.xyz ACC,VF15,VF07y isubiu VI04,VI04,0x00000001 - maddz.xyz VF07,VF16,VF07z NOP - mul.xyz VF10,VF10,VF13 ibeq VI04,VI00,finish_init_lid - add.xyz VF05,VF05,VF17 NOP -; _LNOPT_w=[ normal2 ] 10 [14 0] 16 [__v_vu1_fast_pp4_vcl_7] - NOP iaddiu VI01,VI01,0 - NOP ilw.x VI01,3(VI01) - NOP lq.xyz VF08,3(VI01) ; STALL_LATENCY ?3 - NOP lq.xyz VF11,0(VI01) - mulax.xyz ACC,VF14,VF08x NOP ; STALL_LATENCY ?2 - madday.xyz ACC,VF15,VF08y lq.xyz VF14,1(VI01) - mul.xyz VF15,VF12,VF11 NOP - maddz.xyz VF08,VF16,VF08z NOP - mul.xyz VF11,VF14,VF13 NOP ; STALL_LATENCY ?1 - add.xyz VF05,VF05,VF15 NOP -finish_init_lid: -; _LNOPT_w=[ normal2 ] 35 [35 0] 35 [finish_init_lid] - sub VF17,VF00,VF00 lq.xyz VF15,57(VI00) - addy.x VF13,VF00,VF06y NOP - addz.x VF14,VF00,VF06z lq.w VF09,60(VI00) - sub VF18,VF00,VF00 mr32.x VF19,VF08 - mul.xyz VF15,VF12,VF15 lq.w VF11,57(VI00) - sub VF12,VF00,VF00 loi 0x43000000 - muli.w VF10,VF09,I loi 0x44fff000 - addi.xy VF19,VF00,I mr32.w VF09,VF19 - add.xyz VF05,VF05,VF15 mr32.z VF19,VF11 - maxw.x VF17,VF17,VF00w NOP - maxw.y VF18,VF18,VF00w NOP - maxw.z VF12,VF12,VF00w NOP - max.xyz VF19,VF19,VF19 NOP - mulax ACC,VF17,VF01x NOP - madday ACC,VF18,VF01y mr32.y VF08,VF07 - maddaz ACC,VF12,VF01z move.w VF19,VF00 - addx.y VF06,VF00,VF07x loi 0x4b4000ff - addx.z VF06,VF00,VF08x mr32.z VF07,VF09 - maxi.w VF09,VF00,I loi 0x437f0000 - minii.w VF10,VF10,I loi 0x4b400000 - addi.xyz VF13,VF05,I move.x VF07,VF13 - maddw VF14,VF19,VF01w move.x VF08,VF14 - mulax ACC,VF17,VF02x NOP - addi.w VF05,VF10,I NOP - madday ACC,VF18,VF02y NOP - maddaz ACC,VF12,VF02z NOP - maddw VF15,VF19,VF02w NOP - mulax ACC,VF17,VF03x NOP - madday ACC,VF18,VF03y NOP - maddaz ACC,VF12,VF03z NOP - maddw VF16,VF19,VF03w NOP - mulax ACC,VF17,VF04x NOP - madday ACC,VF18,VF04y NOP - maddaz[E] ACC,VF12,VF04z NOP - maddw VF17,VF19,VF04w NOP -main_loop_lid: -; _LNOPT_w=[ normal2 ] 19 [19 0] 19 [main_loop_lid] - NOP xtop VI01 - NOP lq VF01,75(VI00) - NOP ilw.x VI04,0(VI01) - NOP iaddiu VI06,VI01,0x00000001 - NOP iaddiu VI02,VI01,0x00000005 - NOP mtir VI03,VF01x - NOP iadd VI05,VI02,VI04 - NOP ior VI03,VI03,VI04 - NOP iadd VI05,VI05,VI04 - NOP mfir.x VF01,VI03 - NOP iaddiu VI03,VI01,0 - NOP iadd VI04,VI05,VI04 - NOP iaddiu VI05,VI01,0x00000005 - NOP sq VF01,236(VI03) - NOP iaddiu VI07,VI06,0x00000004 - NOP iaddiu VI08,VI00,0x000003ff - NOP iaddiu VI09,VI00,0x00000800 - NOP iaddiu VI10,VI00,0x00000400 - NOP iaddiu VI11,VI00,0x00000020 -adcLoop_lid: -; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] - NOP lq VF01,0(VI06) - ftoi0 VF01,VF01 NOP ; STALL_LATENCY ?3 - NOP mtir VI12,VF01x ; STALL_LATENCY ?3 - NOP iand VI13,VI12,VI10 - NOP NOP - NOP ibeq VI13,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP NOP -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_pp4_vcl_12] - NOP iand VI14,VI12,VI08 - NOP mtir VI13,VF01y - NOP iand VI12,VI12,VI09 - NOP iadd VI14,VI14,VI05 - NOP iand VI15,VI13,VI10 - NOP isw.w VI11,0(VI14) - NOP ibeq VI15,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP isw.w VI12,3(VI14) -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_pp4_vcl_13] - NOP iand VI14,VI13,VI08 - NOP mtir VI12,VF01z - NOP iand VI13,VI13,VI09 - NOP iadd VI14,VI14,VI05 - NOP iand VI15,VI12,VI10 - NOP isw.w VI11,0(VI14) - NOP ibeq VI15,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP isw.w VI13,3(VI14) -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_pp4_vcl_14] - NOP iand VI14,VI12,VI08 - NOP mtir VI13,VF01w - NOP iand VI12,VI12,VI09 - NOP iadd VI14,VI14,VI05 - NOP iand VI15,VI13,VI10 - NOP isw.w VI11,0(VI14) - NOP ibeq VI15,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT - NOP isw.w VI12,3(VI14) -; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_fast_pp4_vcl_15] - NOP iand VI12,VI13,VI08 - NOP iaddiu VI06,VI06,0x00000001 - NOP iand VI13,VI13,VI09 - NOP iadd VI12,VI12,VI05 - NOP isw.w VI11,0(VI12) - NOP ibne VI06,VI07,adcLoop_lid - NOP isw.w VI13,3(VI12) -EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT: -; _LNOPT_w=[ ] 11 [16 0] 16 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT] - NOP lq.xyz VF05,1(VI02) - mulax.xyz ACC,VF06,VF05x NOP ; STALL_LATENCY ?3 - madday.xyz ACC,VF07,VF05y lq.xyz VF01,0(VI02) - maddz.xyz VF05,VF08,VF05z NOP - mulax ACC,VF14,VF01x NOP ; STALL_LATENCY ?2 - max.xyz VF05,VF05,VF00 NOP - madday ACC,VF15,VF01y NOP - maddaz ACC,VF16,VF01z iaddiu VI02,VI02,0x00000003 - maddw VF01,VF17,VF00w NOP - mulax.xyz ACC,VF09,VF05x ibeq VI02,VI04,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI1 - madday.xyz ACC,VF10,VF05y iaddiu VI03,VI03,0 -; _LNOPT_w=[ ] 15 [16 0] 16 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__PRO1] - maddz.xyz VF03,VF11,VF05z lq.xyz VF05,1(VI02) - NOP div Q,VF00w,VF01w - NOP lq.xyz VF12,0(VI02) - mulax.xyz ACC,VF06,VF05x NOP ; STALL_LATENCY ?1 - madday.xyz ACC,VF07,VF05y NOP - maddz.xyz VF05,VF08,VF05z NOP - mulax ACC,VF14,VF12x NOP - madday ACC,VF15,VF12y ilw.w VI05,-3(VI02) - maddaz ACC,VF16,VF12z move.xyz VF12,VF01 - max.xyz VF05,VF05,VF00 iaddiu VI03,VI03,0x000000f3 - maddw VF01,VF17,VF00w iaddiu VI02,VI02,0x00000003 - add.xyz VF03,VF03,VF13 iaddiu VI05,VI05,0x00007fff - mulq.xyz VF04,VF12,Q mfir.w VF03,VI05 - mulax.xyz ACC,VF09,VF05x ibeq VI02,VI04,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI0 - madday.xyz ACC,VF10,VF05y lq.xyz VF12,-4(VI02) -EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: -; _LPOPT_w=[ normal1 ] 16 [16 16] 16 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP] - maddz.xyz VF02,VF11,VF05z lq.xyz VF18,1(VI02) - miniw.xyz VF05,VF03,VF09w iaddiu VI03,VI03,0x00000003 - ftoi4.xyz VF03,VF04 NOP - mulq.xyz VF04,VF12,Q lq.xyz VF12,0(VI02) - mulax.xyz ACC,VF06,VF18x div Q,VF00w,VF01w - madday.xyz ACC,VF07,VF18y sq VF05,-8(VI03) - maddz.xyz VF05,VF08,VF18z NOP - mulax ACC,VF14,VF12x ilw.w VI05,-3(VI02) - madday ACC,VF15,VF12y iaddiu VI02,VI02,0x00000003 - maddaz ACC,VF16,VF12z move.xyz VF12,VF01 - max.xyz VF05,VF05,VF00 sq.xyz VF04,-9(VI03) - maddw VF01,VF17,VF00w iaddiu VI05,VI05,0x00007fff - add.xyz VF03,VF02,VF13 sq VF03,-7(VI03) - mulq.xyz VF04,VF12,Q mfir.w VF03,VI05 - mulax.xyz ACC,VF09,VF05x ibne VI02,VI04,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP - madday.xyz ACC,VF10,VF05y lq.xyz VF12,-4(VI02) -EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI0: -; _LNOPT_w=[ ] 16 [16 0] 18 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI0] - maddz.xyz VF02,VF11,VF05z div Q,VF00w,VF01w - miniw.xyz VF05,VF03,VF09w ilw.w VI04,-3(VI02) - ftoi4.xyz VF03,VF04 NOP - mulq.xyz VF04,VF12,Q move.xyz VF12,VF01 - NOP sq VF05,-5(VI03) ; STALL_LATENCY ?1 - add.xyz VF03,VF02,VF13 sq VF03,-4(VI03) - mulq.xyz VF04,VF12,Q sq.xyz VF04,-6(VI03) - NOP iaddiu VI04,VI04,0x00007fff - NOP lq.xyz VF12,-1(VI02) - miniw.xyz VF05,VF03,VF09w mfir.w VF03,VI04 - ftoi4.xyz VF03,VF04 NOP - mulq.xyz VF04,VF12,Q NOP ; STALL_LATENCY ?1 - NOP sq VF05,-2(VI03) - NOP sq VF03,-1(VI03) - NOP b EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EXIT_POINT - NOP sq.xyz VF04,-3(VI03) -EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI1: -; _LNOPT_w=[ ] 13 [16 0] 18 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI1] - NOP NOP - maddz.xyz VF05,VF11,VF05z div Q,VF00w,VF01w - NOP move.xyz VF01,VF01 - add.xyz VF05,VF05,VF13 lq.xyz VF03,-1(VI02) ; STALL_LATENCY ?2 - NOP ilw.w VI04,-3(VI02) - mulq.xyz VF02,VF01,Q waitq ; STALL_LATENCY ?1 - NOP iaddiu VI04,VI04,0x00007fff ; STALL_LATENCY ?1 - miniw.xyz VF05,VF05,VF09w mfir.w VF01,VI04 - ftoi4.xyz VF01,VF02 NOP - mulq.xyz VF02,VF03,Q NOP - NOP sq VF05,238(VI03) ; STALL_LATENCY ?1 - NOP sq VF01,239(VI03) - NOP sq.xyz VF02,237(VI03) -EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_fast_pp4_vcl_19] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmFast_CodeEnd: -; iCount=211 -; register stats: -; 16 VU User integer -; 20 VU User floating point diff --git a/vu1/general_nospec_quad_vcl.vsm b/vu1/general_nospec_quad_vcl.vsm deleted file mode 100644 index 8519718f..00000000 --- a/vu1/general_nospec_quad_vcl.vsm +++ /dev/null @@ -1,593 +0,0 @@ -; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === normal1 : optimal=46 clid=1 mlid=3 size=(47) -; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=8 clid=5 mlid=5 size=(8) -; === ldumb : optimal=8 clid=0 mlid=4 size=(8) -; === normal1 : optimal=8 clid=0 mlid=4 size=(8) -; === hDown : optimal=8 clid=0 mlid=4 size=(8) -; === vuta : optimal=8 clid=0 mlid=4 size=(8) -; === dUp : optimal=8 clid=0 mlid=4 size=(8) -; === normal : optimal=8 clid=0 mlid=4 size=(8) -; === another : optimal=8 clid=0 mlid=4 size=(8) -; === vuta1 : optimal=8 clid=0 mlid=6 size=(8) -; === normal2 : optimal=8 clid=0 mlid=6 size=(8) -; === dumb2 : optimal=8 clid=4 mlid=6 size=(8) -; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=21 clid=3 mlid=3 size=(26) -; === ldumb : optimal=21 clid=0 mlid=3 size=(26) -; === normal1 : optimal=21 clid=0 mlid=3 size=(26) -; === hDown : optimal=21 clid=0 mlid=3 size=(26) -; === vuta : optimal=21 clid=0 mlid=2 size=(26) -; === dUp : optimal=21 clid=0 mlid=3 size=(26) -; === normal : optimal=21 clid=0 mlid=3 size=(26) -; === another : optimal=21 clid=0 mlid=3 size=(26) -; === vuta1 : optimal=21 clid=0 mlid=2 size=(26) -; === normal2 : optimal=21 clid=0 mlid=3 size=(26) -; === dumb2 : optimal=21 clid=3 mlid=3 size=(26) -; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralNoSpecQuad_CodeStart - .global vsmGeneralNoSpecQuad_CodeEnd -vsmGeneralNoSpecQuad_CodeStart: -__v_vu1_general_nospec_quad_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_nospec_quad_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ vuta1 ] 45 [45 0] 45 [main_loop_lid] - NOP xtop VI05 - NOP lq.xyz VF09,57(VI00) - NOP lq.xyz VF08,59(VI00) - NOP iaddiu VI03,VI05,0x00000005 - NOP lq.xyz VF20,0(VI03) - NOP lq.w VF05,57(VI00) - NOP loi 0x44fff000 - mul.xyz VF09,VF09,VF08 iaddiu VI04,VI05,0x000000ed - mulax ACC,VF01,VF20x ilw.x VI05,0(VI05) - madday ACC,VF02,VF20y lq.xyz VF08,6(VI03) - maddaz ACC,VF03,VF20z mr32.z VF05,VF05 - maddw VF20,VF04,VF00w lq VF06,75(VI00) - addi.xy VF05,VF00,I lq.xyz VF14,9(VI03) - mulax ACC,VF01,VF08x loi 0x45000000 - madday ACC,VF02,VF08y ilw.w VI07,0(VI00) - maddaz ACC,VF03,VF08z div Q,VF00w,VF20w - maddw VF16,VF04,VF00w iadd VI06,VI03,VI05 - mulax ACC,VF01,VF14x lq.xyz VF13,3(VI03) - madday ACC,VF02,VF14y mtir VI08,VF06x - maddaz ACC,VF03,VF14z ior VI08,VI08,VI05 - maddw VF14,VF04,VF00w mfir.x VF06,VI08 - mulax ACC,VF01,VF13x iadd VI06,VI06,VI05 - madday ACC,VF02,VF13y iadd VI06,VI06,VI05 - maddaz ACC,VF03,VF13z lq.xyz VF07,58(VI00) - maddw VF13,VF04,VF00w sq VF06,-1(VI04) - NOP iaddiu VI08,VI00,0x00007fff - NOP iaddiu VI08,VI08,0x00000001 - NOP ilw.w VI02,76(VI00) - NOP div Q,VF00w,VF13w - add.xyz VF09,VF07,VF09 lq.xyz VF07,7(VI03) - NOP lq.xyz VF15,2(VI03) - mulq.xyz VF08,VF20,Q lq.xyz VF06,76(VI00) - NOP fcset 0 - maxi.w VF07,VF00,I lq.xyz VF17,10(VI03) - mulq.xyz VF15,VF15,Q sq.xyz VF07,10(VI03) - mul.xyz VF10,VF08,VF06 div Q,VF00w,VF16w - NOP lq.xyz VF07,5(VI03) - mulq.xyz VF13,VF13,Q iaddiu VI01,VI03,0 - add.xyz VF11,VF08,VF05 sq.xyz VF17,7(VI03) - clipw.xyz VF10xyz,VF07w lq.xyz VF17,11(VI03) - mulq.xyz VF10,VF07,Q lq.xyz VF20,8(VI03) - sub.xyz VF12,VF08,VF13 iaddiu VI03,VI01,0x0000000c - mul.xyz VF21,VF13,VF06 div Q,VF00w,VF14w - add.xyz VF08,VF13,VF05 ibeq VI03,VI06,EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI1 - mulq.xyz VF16,VF16,Q lq.w VF08,0(VI00) -; _LNOPT_w=[ ] 47 [45 0] 47 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__PRO1] - mulq.xyz VF19,VF20,Q NOP - NOP NOP - NOP NOP - clipw.xyz VF21xyz,VF07w NOP - mulq.xyz VF17,VF17,Q lq.xyz VF20,0(VI03) - mulq.xyz VF18,VF14,Q NOP - sub.xyz VF13,VF16,VF13 NOP - add.xyz VF07,VF16,VF05 NOP - mulax ACC,VF01,VF20x NOP - madday ACC,VF02,VF20y lq.xyz VF22,6(VI03) - maddaz ACC,VF03,VF20z NOP - maddw VF20,VF04,VF00w iaddiu VI01,VI03,0 - mul.xyz VF21,VF16,VF06 iaddiu VI09,VI04,0 - mulax ACC,VF01,VF22x lq.xyz VF14,9(VI03) - madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) - maddaz ACC,VF03,VF22z div Q,VF00w,VF20w - maddw VF16,VF04,VF00w lq.xyz VF23,7(VI03) - mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) - madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) - mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) - maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) - maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) - mulq.xyz VF22,VF20,Q lq.xyz VF17,11(VI03) - mulax ACC,VF01,VF24x lq.xyz VF23,5(VI03) - madday ACC,VF02,VF24y lq.xyz VF25,2(VI03) - maddaz ACC,VF03,VF24z sq.xyz VF19,9(VI04) - maddw VF24,VF04,VF00w sq.xyz VF10,3(VI04) - ftoi4.xyz VF11,VF11 sq.xyz VF09,4(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF09,7(VI04) - mul.xyz VF19,VF22,VF06 sq.xyz VF09,1(VI04) - clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w - mulw.xyz VF10,VF12,VF08w mfir.w VF11,VI08 - add.xyz VF12,VF18,VF05 lq.xyz VF20,8(VI03) - clipw.xyz VF19xyz,VF07w iaddiu VI03,VI01,0x0000000c - mulq.xyz VF15,VF25,Q fcand VI01,16777215 - opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,10(VI04) - opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq VF11,2(VI04) - ftoi4.xyz VF18,VF08 div Q,VF00w,VF16w - mulq.xyz VF13,VF24,Q mfir.w VF18,VI08 - ftoi4.xyz VF19,VF12 iand VI01,VI01,VI02 - mulq.xyz VF10,VF23,Q fmand VI10,VI07 - add.xyz VF11,VF22,VF05 ior VI01,VI01,VI10 - sub.xyz VF12,VF22,VF13 iaddiu VI01,VI01,0x00007fff - mul.xyz VF21,VF13,VF06 mfir.w VF19,VI01 - add.xyz VF08,VF13,VF05 div Q,VF00w,VF14w - ftoi4.xyz VF22,VF07 ibeq VI03,VI06,EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI0 - mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 -EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ normal1 ] 47 [45 45] 47 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP] - NOP sq VF19,8(VI04) - mulq.xyz VF19,VF20,Q sq VF18,5(VI04) - ftoi4.xyz VF11,VF11 NOP - sub.xyz VF13,VF16,VF13 sq VF22,11(VI04) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI09,0x0000000c - add.xyz VF07,VF16,VF05 lq.xyz VF20,0(VI03) - mulq.xyz VF18,VF14,Q NOP - clipw.xyz VF21xyz,VF07w lq.xyz VF22,6(VI03) - mul.xyz VF21,VF16,VF06 iaddiu VI01,VI03,0 - mulax ACC,VF01,VF20x iaddiu VI09,VI04,0 - madday ACC,VF02,VF20y NOP - maddaz ACC,VF03,VF20z lq.xyz VF14,9(VI03) - maddw VF20,VF04,VF00w NOP - mulax ACC,VF01,VF22x lq.xyz VF23,7(VI03) - madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) - maddaz ACC,VF03,VF22z NOP - maddw VF16,VF04,VF00w div Q,VF00w,VF20w - mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) - madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) - mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) - maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) - maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) - mulax ACC,VF01,VF24x lq.xyz VF17,11(VI03) - mulq.xyz VF22,VF20,Q lq.xyz VF23,5(VI03) - madday ACC,VF02,VF24y lq.xyz VF20,8(VI03) - maddaz ACC,VF03,VF24z lq.xyz VF25,2(VI03) - maddw VF24,VF04,VF00w iaddiu VI03,VI01,0x0000000c - mul.xyz VF19,VF22,VF06 sq.xyz VF19,9(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF10,3(VI04) - mulw.xyz VF10,VF12,VF08w sq.xyz VF09,4(VI04) - clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w - clipw.xyz VF19xyz,VF07w sq.xyz VF09,7(VI04) - add.xyz VF12,VF18,VF05 sq.xyz VF09,1(VI04) - mulq.xyz VF15,VF25,Q mfir.w VF11,VI08 - opmula.xyz ACCxyz,VF10xyz,VF13xyz fcand VI01,16777215 - ftoi4.xyz VF18,VF08 iand VI01,VI01,VI02 - opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,10(VI04) - mulq.xyz VF13,VF24,Q div Q,VF00w,VF16w - mulq.xyz VF10,VF23,Q sq VF11,2(VI04) - add.xyz VF11,VF22,VF05 mfir.w VF18,VI08 - ftoi4.xyz VF19,VF12 fmand VI10,VI07 - sub.xyz VF12,VF22,VF13 ior VI01,VI01,VI10 - mul.xyz VF21,VF13,VF06 iaddiu VI01,VI01,0x00007fff - add.xyz VF08,VF13,VF05 mfir.w VF19,VI01 - NOP div Q,VF00w,VF14w - ftoi4.xyz VF22,VF07 ibne VI03,VI06,EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP - mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 -EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 32 [26 0] 32 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI0] - mulq.xyz VF19,VF20,Q sq VF19,8(VI04) - NOP NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF17,Q mfir.w VF11,VI08 - NOP NOP - mulq.xyz VF18,VF14,Q sq VF18,5(VI04) - NOP NOP - NOP NOP - clipw.xyz VF21xyz,VF07w sq VF22,11(VI04) - sub.xyz VF13,VF16,VF13 iaddiu VI04,VI09,0 - mul.xyz VF15,VF18,VF06 sq.xyz VF15,12(VI04) - mul.xyz VF21,VF16,VF06 sq.xyz VF19,21(VI04) - mulw.xyz VF10,VF12,VF08w sq.xyz VF10,15(VI04) - NOP sq.xyz VF17,18(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF09,16(VI04) - clipw.xyz VF21xyz,VF07w sq.xyz VF09,19(VI04) - opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,13(VI04) - opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,22(VI04) - NOP mfir.w VF18,VI08 - add.xyz VF12,VF18,VF05 fcand VI01,16777215 - ftoi4.xyz VF11,VF11 iand VI02,VI01,VI02 - add.xyz VF07,VF16,VF05 fmand VI07,VI07 - ftoi4.xyz VF18,VF08 ior VI02,VI02,VI07 - ftoi4.xyz VF19,VF12 iaddiu VI02,VI02,0x00007fff - NOP mfir.w VF19,VI02 - ftoi4.xyz VF22,VF07 sq VF11,14(VI04) - NOP mfir.w VF22,VI02 - NOP sq VF18,17(VI04) - NOP sq VF19,20(VI04) - NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF22,23(VI04) -EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI1: -; _LNOPT_w=[ ] 29 [22 0] 29 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI1] - mulq.xyz VF20,VF20,Q NOP - NOP NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF17,Q NOP - mulq.xyz VF14,VF14,Q NOP - ftoi4.xyz VF11,VF11 NOP - sub.xyz VF13,VF16,VF13 NOP - mulw.xyz VF10,VF12,VF08w sq.xyz VF10,3(VI04) - clipw.xyz VF21xyz,VF07w sq.xyz VF15,0(VI04) - mul.xyz VF15,VF14,VF06 sq.xyz VF09,4(VI04) - mul.xyz VF21,VF16,VF06 sq.xyz VF09,7(VI04) - opmula.xyz ACCxyz,VF10xyz,VF13xyz mfir.w VF11,VI08 - opmsub.xyz VF18xyz,VF13xyz,VF10xyz sq.xyz VF20,9(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF09,1(VI04) - clipw.xyz VF21xyz,VF07w sq.xyz VF09,10(VI04) - NOP sq VF11,2(VI04) - abs.xyz VF00,VF18 mfir.w VF08,VI08 - add.xyz VF12,VF14,VF05 fmand VI07,VI07 - add.xyz VF07,VF16,VF05 fcand VI01,16777215 - NOP iand VI02,VI01,VI02 - ftoi4.xyz VF08,VF08 ior VI02,VI02,VI07 - ftoi4.xyz VF11,VF12 iaddiu VI02,VI02,0x00007fff - ftoi4.xyz VF07,VF07 mfir.w VF11,VI02 - NOP mfir.w VF07,VI02 - NOP sq.xyz VF17,6(VI04) - NOP sq VF08,5(VI04) - NOP sq VF11,8(VI04) - NOP sq VF07,11(VI04) -EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 6 [6 0] 6 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EXIT_POINT] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,60(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_quad_pp4_vcl_9] - maxw.z VF07,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ normal2 ] 17 [17 0] 17 [dir_light_loop_lid] - NOP xtop VI06 - NOP ilw.x VI05,0(VI06) - NOP lq.xyz VF10,67(VI00) - NOP lq.xyz VF08,3(VI03) - NOP lq.xyz VF11,68(VI00) - NOP lq.xyz VF09,69(VI00) - NOP iaddiu VI04,VI06,0x00000005 - mulax.xyz ACC,VF10,VF08x iadd VI07,VI04,VI05 - madday.xyz ACC,VF11,VF08y iadd VI07,VI07,VI05 - maddz.xyz VF10,VF09,VF08z lq.xyz VF17,1(VI04) - NOP iaddiu VI06,VI06,0 - NOP iadd VI07,VI07,VI05 - NOP lq.xyz VF08,0(VI03) - mul.xyz VF17,VF10,VF17 iaddiu VI04,VI04,0x00000003 - NOP lq.xyz VF09,1(VI03) - NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI3 - NOP iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__PRO1] - adday.z ACC,VF17,VF17y lq.xyz VF12,1(VI04) - maddx.z VF11,VF07,VF17x NOP - mul.xyz VF17,VF10,VF12 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 - NOP NOP - NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2 - maxx.z VF11,VF11,VF00x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__PRO2] - adday.z ACC,VF17,VF17y lq.xyz VF13,1(VI04) - maddx.z VF12,VF07,VF17x NOP - mul.xyz VF17,VF10,VF13 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 - NOP NOP - mulz.xyz VF13,VF09,VF11z ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1 - maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 7 [7 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__PRO3] - adday.z ACC,VF17,VF17y lq.xyz VF14,1(VI04) - maddx.z VF12,VF07,VF17x NOP - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF17,VF10,VF14 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 - mulz.xyz VF13,VF09,VF11z lq.xyz VF15,238(VI06) - madd.xyz VF14,VF08,VF05 ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0 - maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0x000000f9 -EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 8 [8 8] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - adday.z ACC,VF17,VF17y lq.xyz VF18,1(VI04) - maddx.z VF12,VF07,VF17x NOP - add.xyz VF16,VF15,VF14 iaddiu VI06,VI06,0x00000003 - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF17,VF10,VF18 iaddiu VI04,VI04,0x00000003 - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-11(VI06) - mulz.xyz VF13,VF09,VF11z ibne VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - maxx.z VF11,VF12,VF00x sq.xyz VF16,-14(VI06) -EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 19 [23 0] 25 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - add.xyz VF10,VF15,VF14 NOP - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-8(VI06) - mulz.xyz VF13,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP - add.xyz VF17,VF15,VF14 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-5(VI06) - mulz.xyz VF13,VF09,VF11z NOP - NOP sq.xyz VF17,-8(VI06) - add.xyz VF09,VF15,VF14 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-2(VI06) - NOP sq.xyz VF09,-5(VI06) ; STALL_LATENCY ?1 - add.xyz VF08,VF15,VF14 sq.xyz VF10,-11(VI06) ; STALL_LATENCY ?1 - NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 18 [23 0] 23 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - mula.xyz ACC,VF13,VF06 NOP - mulz.xyz VF13,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 - madd.xyz VF12,VF08,VF05 lq.xyz VF10,238(VI06) - mula.xyz ACC,VF13,VF06 NOP - mulz.xyz VF13,VF09,VF11z NOP ; STALL_LATENCY ?1 - add.xyz VF12,VF10,VF12 NOP - madd.xyz VF10,VF08,VF05 lq.xyz VF17,241(VI06) - mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?1 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,244(VI06) - NOP sq.xyz VF12,238(VI06) - add.xyz VF11,VF17,VF10 NOP - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?1 - NOP sq.xyz VF11,241(VI06) ; STALL_LATENCY ?1 - NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,244(VI06) -EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 15 [23 0] 23 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - NOP NOP - mulz.xyz VF10,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF06 NOP ; STALL_LATENCY ?1 - mulz.xyz VF09,VF09,VF11z NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF08,VF05 lq.xyz VF17,238(VI06) - mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?2 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,241(VI06) - add.xyz VF11,VF17,VF10 NOP - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,238(VI06) - NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,241(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI3: -; _LNOPT_w=[ ] 8 [23 0] 23 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI3] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - maxx.z VF17,VF17,VF00x NOP ; STALL_LATENCY ?3 - mulz.xyz VF09,VF09,VF17z NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?3 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,238(VI06) - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_quad_pp4_vcl_15] - maxw.z VF07,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ normal2 ] 25 [32 0] 36 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,71(VI00) - NOP lq.xyz VF09,3(VI03) - NOP lq.xyz VF08,72(VI00) - NOP lq.xyz VF11,73(VI00) - NOP lq.xyz VF10,74(VI00) - mulax.xyz ACC,VF12,VF09x NOP - madday.xyz ACC,VF08,VF09y xtop VI06 - maddaz.xyz ACC,VF11,VF09z iaddiu VI04,VI06,0x00000005 - maddw.xyz VF11,VF10,VF00w lq.xyz VF12,0(VI04) - sub.xyz VF12,VF11,VF12 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF13,VF12,VF12 iaddiu VI06,VI06,0 ; STALL_LATENCY ?3 - NOP iaddiu VI06,VI06,0 - adday.z ACC,VF13,VF13y lq.xyz VF08,0(VI03) ; STALL_LATENCY ?2 - maddx.z VF13,VF07,VF13x lq.xyz VF09,1(VI03) - NOP sqrt Q,VF13z ; STALL_LATENCY ?3 - NOP NOP - NOP iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP iaddiu VI04,VI04,0x00000003 - addw.x VF13,VF00,VF00w lq.xyz VF10,5(VI03) - addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1 - NOP NOP -; _LNOPT_w=[ ] 20 [25 0] 26 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__PRO1] - NOP lq.xyz VF15,0(VI04) - NOP NOP - mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y - sub.xyz VF12,VF11,VF15 move.xyz VF15,VF12 ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF14x iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 - mul.xyz VF13,VF12,VF12 NOP ; STALL_LATENCY ?1 - NOP waitq - mulq.xyz VF15,VF15,Q lq.xyz VF16,-5(VI04) - adday.z ACC,VF13,VF13y NOP ; STALL_LATENCY ?1 - maddx.z VF13,VF07,VF13x NOP - mul.xyz VF15,VF15,VF16 NOP - madday.w ACC,VF00,VF14y sqrt Q,VF13z ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF14z NOP - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - maddz.w VF06,VF00,VF15z NOP - NOP NOP - addw.x VF13,VF00,VF00w NOP - addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0 - maxx.w VF06,VF06,VF00x iaddiu VI06,VI06,0x000000f3 -EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 26 [25 21] 26 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - NOP lq.xyz VF16,0(VI04) - NOP NOP - mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y - mulw.xyz VF13,VF09,VF06w iaddiu VI06,VI06,0x00000003 - sub.xyz VF12,VF11,VF16 move.xyz VF16,VF12 - NOP NOP - NOP NOP - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF13,VF12,VF12 NOP - mulq.xyz VF16,VF16,Q div Q,VF00w,VF05w - madd.xyz VF15,VF08,VF05 lq.xyz VF17,-2(VI04) - mulax.w ACC,VF00,VF14x NOP - adday.z ACC,VF13,VF13y NOP - maddx.z VF13,VF07,VF13x NOP - mul.xyz VF16,VF16,VF17 NOP - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z lq.xyz VF14,-8(VI06) - mulq.xyz VF15,VF15,Q sqrt Q,VF13z - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - maddz.w VF06,VF00,VF16z NOP - add.xyz VF14,VF14,VF15 NOP - NOP iaddiu VI04,VI04,0x00000003 - addw.x VF13,VF00,VF00w NOP - addq.y VF13,VF00,Q ibne VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - maxx.w VF06,VF06,VF00x sq.xyz VF14,-8(VI06) -EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 27 [43 0] 45 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0] - NOP NOP - NOP NOP - mul.xyz VF10,VF13,VF10 div Q,VF00w,VF13y - mulw.xyz VF13,VF09,VF06w move.xyz VF12,VF12 - mulax.w ACC,VF00,VF10x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF10y NOP - NOP NOP - mulq.xyz VF12,VF12,Q lq.xyz VF11,-2(VI04) - mul.xyz VF12,VF12,VF11 NOP ; STALL_LATENCY ?3 - maddz.w VF05,VF00,VF10z div Q,VF00w,VF05w - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF13,VF08,VF05 NOP - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF06,VF00,VF12z NOP - NOP div Q,VF00w,VF05w ; STALL_THRUPUT ?1 - mulq.xyz VF13,VF13,Q lq.xyz VF10,-5(VI06) - maxx.w VF06,VF06,VF00x NOP - add.xyz VF10,VF10,VF13 NOP ; STALL_LATENCY ?2 - mulw.xyz VF13,VF09,VF06w NOP - NOP sq.xyz VF10,-5(VI06) ; STALL_LATENCY ?2 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF08,VF08,VF05 NOP - mulq.xyz VF08,VF08,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 - add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 20 [43 0] 45 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1] - NOP NOP - NOP NOP - mul.xyz VF13,VF13,VF10 div Q,VF00w,VF13y - NOP move.xyz VF12,VF12 - mulax.w ACC,VF00,VF13x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF13y NOP - maddz.w VF05,VF00,VF13z NOP - mulq.xyz VF12,VF12,Q lq.xyz VF10,-2(VI04) - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?2 - mul.xyz VF12,VF12,VF10 NOP - mulax.w ACC,VF00,VF12x NOP ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF12y NOP - maddz.w VF06,VF00,VF12z NOP - maxx.w VF06,VF06,VF00x NOP ; STALL_LATENCY ?3 - mulw.xyz VF13,VF09,VF06w NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?3 - madd.xyz VF08,VF08,VF05 NOP - mulq.xyz VF08,VF08,Q lq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_nospec_quad_pp4_vcl_23] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralNoSpecQuad_CodeEnd: -; iCount=485 -; register stats: -; 11 VU User integer -; 26 VU User floating point diff --git a/vu1/general_nospec_tri_vcl.vsm b/vu1/general_nospec_tri_vcl.vsm deleted file mode 100644 index e95d85f9..00000000 --- a/vu1/general_nospec_tri_vcl.vsm +++ /dev/null @@ -1,490 +0,0 @@ -; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === hDown : optimal=35 clid=0 mlid=2 size=(36) -; === dUp : optimal=35 clid=0 mlid=1 size=(36) -; === normal : optimal=35 clid=0 mlid=1 size=(36) -; === another : optimal=35 clid=0 mlid=2 size=(36) -; === normal2 : optimal=35 clid=0 mlid=2 size=(36) -; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=8 clid=5 mlid=5 size=(8) -; === ldumb : optimal=8 clid=0 mlid=4 size=(8) -; === normal1 : optimal=8 clid=0 mlid=4 size=(8) -; === hDown : optimal=8 clid=0 mlid=4 size=(8) -; === vuta : optimal=8 clid=0 mlid=4 size=(8) -; === dUp : optimal=8 clid=0 mlid=4 size=(8) -; === normal : optimal=8 clid=0 mlid=4 size=(8) -; === another : optimal=8 clid=0 mlid=4 size=(8) -; === vuta1 : optimal=8 clid=0 mlid=6 size=(8) -; === normal2 : optimal=8 clid=0 mlid=6 size=(8) -; === dumb2 : optimal=8 clid=4 mlid=6 size=(8) -; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=21 clid=3 mlid=3 size=(26) -; === ldumb : optimal=21 clid=0 mlid=3 size=(26) -; === normal1 : optimal=21 clid=0 mlid=3 size=(26) -; === hDown : optimal=21 clid=0 mlid=3 size=(26) -; === vuta : optimal=21 clid=0 mlid=2 size=(26) -; === dUp : optimal=21 clid=0 mlid=3 size=(26) -; === normal : optimal=21 clid=0 mlid=3 size=(26) -; === another : optimal=21 clid=0 mlid=3 size=(26) -; === vuta1 : optimal=21 clid=0 mlid=2 size=(26) -; === normal2 : optimal=21 clid=0 mlid=3 size=(26) -; === dumb2 : optimal=21 clid=3 mlid=3 size=(26) -; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralNoSpecTri_CodeStart - .global vsmGeneralNoSpecTri_CodeEnd -vsmGeneralNoSpecTri_CodeStart: -__v_vu1_general_nospec_tri_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_nospec_tri_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ vuta1 ] 48 [48 0] 48 [main_loop_lid] - NOP lq.w VF05,57(VI00) - NOP loi 0x44fff000 - NOP xtop VI05 - NOP iaddiu VI03,VI05,0x00000005 - NOP mr32.z VF05,VF05 - NOP iaddiu VI04,VI05,0x000000ed - NOP ilw.x VI05,0(VI05) - NOP lq.xyz VF09,57(VI00) - NOP lq.xyz VF08,59(VI00) - NOP lq VF06,75(VI00) - addi.xy VF05,VF00,I loi 0x45000000 - NOP ilw.w VI07,0(VI00) - mul.xyz VF09,VF09,VF08 lq.xyz VF08,3(VI03) - NOP iadd VI06,VI03,VI05 - NOP iadd VI06,VI06,VI05 - NOP mtir VI08,VF06x - mulax ACC,VF01,VF08x ior VI08,VI08,VI05 - madday ACC,VF02,VF08y mfir.x VF06,VI08 - maddaz ACC,VF03,VF08z iadd VI06,VI06,VI05 - maddw VF15,VF04,VF00w lq.xyz VF14,0(VI03) - NOP lq.xyz VF07,58(VI00) - NOP sq VF06,-1(VI04) - NOP iaddiu VI08,VI00,0x00007fff - mulax ACC,VF01,VF14x div Q,VF00w,VF15w - madday ACC,VF02,VF14y lq.xyz VF08,6(VI03) - maddaz ACC,VF03,VF14z iaddiu VI08,VI08,0x00000001 - maddw VF14,VF04,VF00w ilw.w VI02,76(VI00) - add.xyz VF09,VF07,VF09 lq.xyz VF06,76(VI00) - mulax ACC,VF01,VF08x lq.xyz VF07,5(VI03) - madday ACC,VF02,VF08y lq.w VF08,0(VI00) - maddaz ACC,VF03,VF08z div Q,VF00w,VF14w - mulq.xyz VF08,VF15,Q fcset 0 - mulq.xyz VF07,VF07,Q iaddiu VI09,VI03,0 - maddw VF12,VF04,VF00w iaddiu VI10,VI04,0 - NOP sq.xyz VF09,1(VI04) - add.xyz VF10,VF08,VF05 lq.xyz VF15,2(VI03) - maxi.w VF07,VF00,I sq.xyz VF07,3(VI04) - mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w - mul.xyz VF13,VF08,VF06 mfir.w VF10,VI08 - ftoi4.xyz VF10,VF10 lq.xyz VF11,8(VI03) - mulq.xyz VF15,VF15,Q sq.xyz VF09,4(VI04) - sub.xyz VF14,VF07,VF08 sq.xyz VF09,7(VI04) - mul.xyz VF16,VF07,VF06 iaddiu VI03,VI09,0 - add.xyz VF10,VF07,VF05 sq VF10,5(VI04) - mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 - mulq.xyz VF15,VF11,Q sq.xyz VF15,0(VI04) - clipw.xyz VF16xyz,VF07w ibeq VI03,VI06,EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 -EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ dUp ] 36 [35 35] 36 [EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP] - ftoi4.xyz VF12,VF10 lq.xyz VF11,3(VI03) - sub.xyz VF10,VF07,VF08 NOP - mul.xyz VF08,VF07,VF06 iaddiu VI04,VI10,0x00000009 - mulw.xyz VF13,VF14,VF08w lq.xyz VF14,0(VI03) - mulax ACC,VF01,VF11x sq.xyz VF15,6(VI09) - madday ACC,VF02,VF11y mfir.w VF12,VI08 - maddaz ACC,VF03,VF11z NOP - maddw VF15,VF04,VF00w NOP - mulax ACC,VF01,VF14x lq.xyz VF11,6(VI03) - madday ACC,VF02,VF14y sq VF12,2(VI09) - maddaz ACC,VF03,VF14z iaddiu VI11,VI03,0 - maddw VF14,VF04,VF00w div Q,VF00w,VF15w - mulax ACC,VF01,VF11x lq.xyz VF16,5(VI03) - madday ACC,VF02,VF11y iaddiu VI10,VI04,0 - maddaz ACC,VF03,VF11z sq.xyz VF09,1(VI04) - maddw VF12,VF04,VF00w lq.xyz VF11,8(VI03) - clipw.xyz VF08xyz,VF07w sq.xyz VF09,4(VI04) - opmula.xyz ACCxyz,VF13xyz,VF10xyz mfir.w VF10,VI08 - mulq.xyz VF08,VF15,Q div Q,VF00w,VF14w - mulq.xyz VF16,VF16,Q lq.xyz VF15,2(VI03) - opmsub.xyz VF00xyz,VF10xyz,VF13xyz fcand VI01,262143 - NOP iand VI03,VI01,VI02 - mul.xyz VF13,VF08,VF06 sq.xyz VF09,7(VI04) - add.xyz VF10,VF08,VF05 sq.xyz VF16,3(VI04) - add.xyz VF17,VF07,VF05 fmand VI01,VI07 - mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w - mulq.xyz VF16,VF15,Q ior VI01,VI03,VI01 - ftoi4.xyz VF10,VF10 iaddiu VI03,VI11,0 - ftoi4.xyz VF15,VF17 iaddiu VI11,VI01,0x00007fff - sub.xyz VF14,VF07,VF08 mfir.w VF15,VI11 - mul.xyz VF16,VF07,VF06 sq.xyz VF16,0(VI04) - add.xyz VF10,VF07,VF05 sq VF10,5(VI04) - mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 - mulq.xyz VF15,VF11,Q sq VF15,8(VI09) - clipw.xyz VF16xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP - clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 -EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 20 [17 0] 21 [EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__EPI0] - ftoi4.xyz VF09,VF10 NOP - sub.xyz VF10,VF07,VF08 NOP - mul.xyz VF08,VF07,VF06 NOP - mulw.xyz VF14,VF14,VF08w NOP - add.xyz VF07,VF07,VF05 mfir.w VF09,VI08 - clipw.xyz VF08xyz,VF07w sq.xyz VF15,6(VI09) ; STALL_LATENCY ?1 - opmula.xyz ACCxyz,VF14xyz,VF10xyz lq.xyz VF06,60(VI00) - opmsub.xyz VF11xyz,VF10xyz,VF14xyz lq.xyz VF05,59(VI00) - NOP sq VF09,2(VI09) - NOP fcand VI01,262143 - NOP iand VI02,VI01,VI02 - abs.xyz VF00,VF11 fmand VI07,VI07 - NOP ior VI02,VI02,VI07 - NOP iaddiu VI02,VI02,0x00007fff - ftoi4.xyz VF15,VF07 ilw.x VI01,0(VI00) - NOP mfir.w VF15,VI02 - NOP iaddiu VI02,VI00,0x00000001 - NOP ilw.x VI03,0(VI02) - NOP ibeq VI01,VI00,pt_lights_lid - NOP sq VF15,8(VI09) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_tri_pp4_vcl_9] - maxw.z VF07,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ normal2 ] 17 [17 0] 17 [dir_light_loop_lid] - NOP xtop VI06 - NOP ilw.x VI05,0(VI06) - NOP lq.xyz VF10,67(VI00) - NOP lq.xyz VF08,3(VI03) - NOP lq.xyz VF11,68(VI00) - NOP lq.xyz VF09,69(VI00) - NOP iaddiu VI04,VI06,0x00000005 - mulax.xyz ACC,VF10,VF08x iadd VI07,VI04,VI05 - madday.xyz ACC,VF11,VF08y iadd VI07,VI07,VI05 - maddz.xyz VF10,VF09,VF08z lq.xyz VF17,1(VI04) - NOP iaddiu VI06,VI06,0 - NOP iadd VI07,VI07,VI05 - NOP lq.xyz VF08,0(VI03) - mul.xyz VF17,VF10,VF17 iaddiu VI04,VI04,0x00000003 - NOP lq.xyz VF09,1(VI03) - NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI3 - NOP iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__PRO1] - adday.z ACC,VF17,VF17y lq.xyz VF12,1(VI04) - maddx.z VF11,VF07,VF17x NOP - mul.xyz VF17,VF10,VF12 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 - NOP NOP - NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2 - maxx.z VF11,VF11,VF00x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__PRO2] - adday.z ACC,VF17,VF17y lq.xyz VF13,1(VI04) - maddx.z VF12,VF07,VF17x NOP - mul.xyz VF17,VF10,VF13 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 - NOP NOP - mulz.xyz VF13,VF09,VF11z ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1 - maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 7 [7 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__PRO3] - adday.z ACC,VF17,VF17y lq.xyz VF14,1(VI04) - maddx.z VF12,VF07,VF17x NOP - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF17,VF10,VF14 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 - mulz.xyz VF13,VF09,VF11z lq.xyz VF15,238(VI06) - madd.xyz VF14,VF08,VF05 ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0 - maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0x000000f9 -EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 8 [8 8] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - adday.z ACC,VF17,VF17y lq.xyz VF18,1(VI04) - maddx.z VF12,VF07,VF17x NOP - add.xyz VF16,VF15,VF14 iaddiu VI06,VI06,0x00000003 - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF17,VF10,VF18 iaddiu VI04,VI04,0x00000003 - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-11(VI06) - mulz.xyz VF13,VF09,VF11z ibne VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - maxx.z VF11,VF12,VF00x sq.xyz VF16,-14(VI06) -EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 19 [23 0] 25 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - add.xyz VF10,VF15,VF14 NOP - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-8(VI06) - mulz.xyz VF13,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP - add.xyz VF17,VF15,VF14 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-5(VI06) - mulz.xyz VF13,VF09,VF11z NOP - NOP sq.xyz VF17,-8(VI06) - add.xyz VF09,VF15,VF14 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-2(VI06) - NOP sq.xyz VF09,-5(VI06) ; STALL_LATENCY ?1 - add.xyz VF08,VF15,VF14 sq.xyz VF10,-11(VI06) ; STALL_LATENCY ?1 - NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 18 [23 0] 23 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - mula.xyz ACC,VF13,VF06 NOP - mulz.xyz VF13,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 - madd.xyz VF12,VF08,VF05 lq.xyz VF10,238(VI06) - mula.xyz ACC,VF13,VF06 NOP - mulz.xyz VF13,VF09,VF11z NOP ; STALL_LATENCY ?1 - add.xyz VF12,VF10,VF12 NOP - madd.xyz VF10,VF08,VF05 lq.xyz VF17,241(VI06) - mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?1 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,244(VI06) - NOP sq.xyz VF12,238(VI06) - add.xyz VF11,VF17,VF10 NOP - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?1 - NOP sq.xyz VF11,241(VI06) ; STALL_LATENCY ?1 - NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,244(VI06) -EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 15 [23 0] 23 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - NOP NOP - mulz.xyz VF10,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF06 NOP ; STALL_LATENCY ?1 - mulz.xyz VF09,VF09,VF11z NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF08,VF05 lq.xyz VF17,238(VI06) - mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?2 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,241(VI06) - add.xyz VF11,VF17,VF10 NOP - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,238(VI06) - NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,241(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI3: -; _LNOPT_w=[ ] 8 [23 0] 23 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI3] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - maxx.z VF17,VF17,VF00x NOP ; STALL_LATENCY ?3 - mulz.xyz VF09,VF09,VF17z NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?3 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,238(VI06) - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_tri_pp4_vcl_15] - maxw.z VF07,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ normal2 ] 25 [32 0] 36 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,71(VI00) - NOP lq.xyz VF09,3(VI03) - NOP lq.xyz VF08,72(VI00) - NOP lq.xyz VF11,73(VI00) - NOP lq.xyz VF10,74(VI00) - mulax.xyz ACC,VF12,VF09x NOP - madday.xyz ACC,VF08,VF09y xtop VI06 - maddaz.xyz ACC,VF11,VF09z iaddiu VI04,VI06,0x00000005 - maddw.xyz VF11,VF10,VF00w lq.xyz VF12,0(VI04) - sub.xyz VF12,VF11,VF12 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF13,VF12,VF12 iaddiu VI06,VI06,0 ; STALL_LATENCY ?3 - NOP iaddiu VI06,VI06,0 - adday.z ACC,VF13,VF13y lq.xyz VF08,0(VI03) ; STALL_LATENCY ?2 - maddx.z VF13,VF07,VF13x lq.xyz VF09,1(VI03) - NOP sqrt Q,VF13z ; STALL_LATENCY ?3 - NOP NOP - NOP iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP iaddiu VI04,VI04,0x00000003 - addw.x VF13,VF00,VF00w lq.xyz VF10,5(VI03) - addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1 - NOP NOP -; _LNOPT_w=[ ] 20 [25 0] 26 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__PRO1] - NOP lq.xyz VF15,0(VI04) - NOP NOP - mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y - sub.xyz VF12,VF11,VF15 move.xyz VF15,VF12 ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF14x iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 - mul.xyz VF13,VF12,VF12 NOP ; STALL_LATENCY ?1 - NOP waitq - mulq.xyz VF15,VF15,Q lq.xyz VF16,-5(VI04) - adday.z ACC,VF13,VF13y NOP ; STALL_LATENCY ?1 - maddx.z VF13,VF07,VF13x NOP - mul.xyz VF15,VF15,VF16 NOP - madday.w ACC,VF00,VF14y sqrt Q,VF13z ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF14z NOP - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - maddz.w VF06,VF00,VF15z NOP - NOP NOP - addw.x VF13,VF00,VF00w NOP - addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0 - maxx.w VF06,VF06,VF00x iaddiu VI06,VI06,0x000000f3 -EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 26 [25 21] 26 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - NOP lq.xyz VF16,0(VI04) - NOP NOP - mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y - mulw.xyz VF13,VF09,VF06w iaddiu VI06,VI06,0x00000003 - sub.xyz VF12,VF11,VF16 move.xyz VF16,VF12 - NOP NOP - NOP NOP - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF13,VF12,VF12 NOP - mulq.xyz VF16,VF16,Q div Q,VF00w,VF05w - madd.xyz VF15,VF08,VF05 lq.xyz VF17,-2(VI04) - mulax.w ACC,VF00,VF14x NOP - adday.z ACC,VF13,VF13y NOP - maddx.z VF13,VF07,VF13x NOP - mul.xyz VF16,VF16,VF17 NOP - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z lq.xyz VF14,-8(VI06) - mulq.xyz VF15,VF15,Q sqrt Q,VF13z - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - maddz.w VF06,VF00,VF16z NOP - add.xyz VF14,VF14,VF15 NOP - NOP iaddiu VI04,VI04,0x00000003 - addw.x VF13,VF00,VF00w NOP - addq.y VF13,VF00,Q ibne VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - maxx.w VF06,VF06,VF00x sq.xyz VF14,-8(VI06) -EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 27 [43 0] 45 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0] - NOP NOP - NOP NOP - mul.xyz VF10,VF13,VF10 div Q,VF00w,VF13y - mulw.xyz VF13,VF09,VF06w move.xyz VF12,VF12 - mulax.w ACC,VF00,VF10x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF10y NOP - NOP NOP - mulq.xyz VF12,VF12,Q lq.xyz VF11,-2(VI04) - mul.xyz VF12,VF12,VF11 NOP ; STALL_LATENCY ?3 - maddz.w VF05,VF00,VF10z div Q,VF00w,VF05w - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF13,VF08,VF05 NOP - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF06,VF00,VF12z NOP - NOP div Q,VF00w,VF05w ; STALL_THRUPUT ?1 - mulq.xyz VF13,VF13,Q lq.xyz VF10,-5(VI06) - maxx.w VF06,VF06,VF00x NOP - add.xyz VF10,VF10,VF13 NOP ; STALL_LATENCY ?2 - mulw.xyz VF13,VF09,VF06w NOP - NOP sq.xyz VF10,-5(VI06) ; STALL_LATENCY ?2 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF08,VF08,VF05 NOP - mulq.xyz VF08,VF08,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 - add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 20 [43 0] 45 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1] - NOP NOP - NOP NOP - mul.xyz VF13,VF13,VF10 div Q,VF00w,VF13y - NOP move.xyz VF12,VF12 - mulax.w ACC,VF00,VF13x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF13y NOP - maddz.w VF05,VF00,VF13z NOP - mulq.xyz VF12,VF12,Q lq.xyz VF10,-2(VI04) - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?2 - mul.xyz VF12,VF12,VF10 NOP - mulax.w ACC,VF00,VF12x NOP ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF12y NOP - maddz.w VF06,VF00,VF12z NOP - maxx.w VF06,VF06,VF00x NOP ; STALL_LATENCY ?3 - mulw.xyz VF13,VF09,VF06w NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?3 - madd.xyz VF08,VF08,VF05 NOP - mulq.xyz VF08,VF08,Q lq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_nospec_tri_pp4_vcl_23] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralNoSpecTri_CodeEnd: -; iCount=383 -; register stats: -; 12 VU User integer -; 19 VU User floating point diff --git a/vu1/general_nospec_vcl.vsm b/vu1/general_nospec_vcl.vsm deleted file mode 100644 index e6d086fc..00000000 --- a/vu1/general_nospec_vcl.vsm +++ /dev/null @@ -1,570 +0,0 @@ -; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === ldumb : optimal=22 clid=0 mlid=2 size=(22) -; === normal1 : optimal=22 clid=0 mlid=2 size=(22) -; === vuta : optimal=22 clid=0 mlid=2 size=(22) -; === dUp : optimal=22 clid=0 mlid=2 size=(22) -; === vuta1 : optimal=22 clid=0 mlid=2 size=(22) -; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=8 clid=5 mlid=5 size=(8) -; === ldumb : optimal=8 clid=0 mlid=4 size=(8) -; === normal1 : optimal=8 clid=0 mlid=4 size=(8) -; === hDown : optimal=8 clid=0 mlid=4 size=(8) -; === vuta : optimal=8 clid=0 mlid=4 size=(8) -; === dUp : optimal=8 clid=0 mlid=4 size=(8) -; === normal : optimal=8 clid=0 mlid=4 size=(8) -; === another : optimal=8 clid=0 mlid=4 size=(8) -; === vuta1 : optimal=8 clid=0 mlid=6 size=(8) -; === normal2 : optimal=8 clid=0 mlid=6 size=(8) -; === dumb2 : optimal=8 clid=4 mlid=6 size=(8) -; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=21 clid=3 mlid=3 size=(26) -; === ldumb : optimal=21 clid=0 mlid=3 size=(26) -; === normal1 : optimal=21 clid=0 mlid=3 size=(26) -; === hDown : optimal=21 clid=0 mlid=3 size=(26) -; === vuta : optimal=21 clid=0 mlid=2 size=(26) -; === dUp : optimal=21 clid=0 mlid=3 size=(26) -; === normal : optimal=21 clid=0 mlid=3 size=(26) -; === another : optimal=21 clid=0 mlid=3 size=(26) -; === vuta1 : optimal=21 clid=0 mlid=2 size=(26) -; === normal2 : optimal=21 clid=0 mlid=3 size=(26) -; === dumb2 : optimal=21 clid=3 mlid=3 size=(26) -; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralNoSpec_CodeStart - .global vsmGeneralNoSpec_CodeEnd -vsmGeneralNoSpec_CodeStart: -__v_vu1_general_nospec_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_nospec_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ normal2 ] 26 [26 0] 26 [main_loop_lid] - NOP loi 0x44fff000 - NOP lq.w VF06,57(VI00) - addi.xy VF05,VF00,I loi 0x45000000 - NOP xtop VI01 - NOP iaddiu VI03,VI01,0x00000005 - NOP mr32.z VF05,VF06 - NOP iaddiu VI04,VI01,0 - NOP ilw.x VI05,0(VI01) - NOP fcset 0 - NOP lq VF08,75(VI00) - NOP lq.xyz VF06,76(VI00) - NOP iadd VI06,VI03,VI05 - NOP iadd VI06,VI06,VI05 - NOP mtir VI07,VF08x - NOP ior VI07,VI07,VI05 - NOP mfir.x VF08,VI07 - NOP iaddiu VI07,VI01,0x00000001 - NOP ilw.w VI02,76(VI00) - NOP iadd VI06,VI06,VI05 - NOP sq VF08,236(VI04) - NOP iaddiu VI01,VI01,0x00000005 - NOP iaddiu VI08,VI07,0x00000004 - NOP iaddiu VI09,VI00,0x000003ff - NOP iaddiu VI10,VI00,0x00000800 - NOP iaddiu VI11,VI00,0x00000400 - maxi.w VF07,VF00,I iaddiu VI12,VI00,0x00000020 -adcLoop_lid: -; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] - NOP lq VF08,0(VI07) - ftoi0 VF08,VF08 NOP ; STALL_LATENCY ?3 - NOP mtir VI13,VF08x ; STALL_LATENCY ?3 - NOP iand VI14,VI13,VI11 - NOP NOP - NOP ibeq VI14,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_nospec_pp4_vcl_8] - NOP iand VI14,VI13,VI10 - NOP iand VI13,VI13,VI09 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP isw.w VI14,3(VI13) - NOP mtir VI14,VF08y - NOP iand VI13,VI14,VI11 - NOP NOP - NOP ibeq VI13,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_nospec_pp4_vcl_9] - NOP iand VI13,VI14,VI10 - NOP iand VI14,VI14,VI09 - NOP iadd VI14,VI14,VI01 - NOP isw.w VI12,0(VI14) - NOP isw.w VI13,3(VI14) - NOP mtir VI13,VF08z - NOP iand VI14,VI13,VI11 - NOP NOP - NOP ibeq VI14,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_nospec_pp4_vcl_10] - NOP iand VI14,VI13,VI10 - NOP iand VI13,VI13,VI09 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP isw.w VI14,3(VI13) - NOP mtir VI14,VF08w - NOP iand VI13,VI14,VI11 - NOP NOP - NOP ibeq VI13,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_general_nospec_pp4_vcl_11] - NOP iand VI13,VI14,VI09 - NOP iaddiu VI07,VI07,0x00000001 - NOP iand VI14,VI14,VI10 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP ibne VI07,VI08,adcLoop_lid - NOP isw.w VI14,3(VI13) -adcLoop_done_lid: -; _LNOPT_w=[ normal2 ] 13 [19 0] 21 [adcLoop_done_lid] - NOP lq.xyz VF12,0(VI03) - mulax ACC,VF01,VF12x iaddiu VI07,VI00,0x00000020 ; STALL_LATENCY ?3 - madday ACC,VF02,VF12y lq.xyz VF10,57(VI00) - maddaz ACC,VF03,VF12z lq.xyz VF11,59(VI00) - maddw VF12,VF04,VF00w lq.w VF05,0(VI00) - mul.xyz VF10,VF10,VF11 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF12w - add.xyz VF07,VF07,VF10 ilw.w VI08,0(VI00) ; STALL_LATENCY ?2 - NOP iaddiu VI03,VI03,0x00000003 - NOP lq.xyz VF11,-1(VI03) - mulq.xyz VF12,VF12,Q sq.xyz VF07,238(VI04) ; STALL_LATENCY ?1 - max.xyz VF09,VF00,VF00 ibeq VI03,VI06,EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI1 - max.xyz VF08,VF00,VF00 iaddiu VI04,VI04,0 -; _LNOPT_w=[ ] 21 [19 0] 22 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__PRO1] - NOP NOP - add.xyz VF10,VF12,VF05 lq.xyz VF15,0(VI03) - sub.xyz VF13,VF09,VF12 iaddiu VI04,VI04,0x000000f3 - max.xyz VF09,VF12,VF12 NOP - mul.xyz VF14,VF12,VF06 NOP - mulax ACC,VF01,VF15x NOP - madday ACC,VF02,VF15y NOP - maddaz ACC,VF03,VF15z ilw.w VI09,-3(VI03) - maddw VF12,VF04,VF00w isub VI01,VI08,VI07 - mulq.xyz VF11,VF11,Q sq.xyz VF07,-2(VI04) - opmula.xyz ACCxyz,VF13xyz,VF08xyz iaddiu VI03,VI03,0x00000003 - opmsub.xyz VF16xyz,VF08xyz,VF13xyz iand VI10,VI09,VI08 - NOP div Q,VF00w,VF12w - NOP sq.xyz VF11,-6(VI04) - abs.xyz VF00,VF16 fmand VI11,VI08 ; STALL_LATENCY ?1 - NOP lq.xyz VF11,-1(VI03) - clipw.xyz VF14xyz,VF07w isub VI11,VI11,VI07 - NOP ior VI07,VI01,VI10 - mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 - NOP ibeq VI03,VI06,EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI0 - mulw.xyz VF08,VF13,VF05w fcand VI01,262143 -EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 22 [22 22] 22 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP] - ftoi4.xyz VF14,VF10 iand VI01,VI01,VI02 - add.xyz VF10,VF12,VF05 lq.xyz VF16,0(VI03) - sub.xyz VF13,VF09,VF12 ior VI10,VI01,VI10 - max.xyz VF09,VF12,VF12 ior VI10,VI10,VI09 - mul.xyz VF15,VF12,VF06 ilw.w VI09,-3(VI03) - mulax ACC,VF01,VF16x iaddiu VI10,VI10,0x00007fff - madday ACC,VF02,VF16y mfir.w VF14,VI10 - maddaz ACC,VF03,VF16z isub VI01,VI08,VI07 - maddw VF12,VF04,VF00w iand VI10,VI09,VI08 - mulq.xyz VF11,VF11,Q sq.xyz VF07,1(VI04) - opmula.xyz ACCxyz,VF13xyz,VF08xyz sq VF14,-4(VI04) - opmsub.xyz VF00xyz,VF08xyz,VF13xyz iaddiu VI03,VI03,0x00000003 - mulw.xyz VF08,VF13,VF05w div Q,VF00w,VF12w - clipw.xyz VF15xyz,VF07w sq.xyz VF11,-3(VI04) - NOP iaddiu VI04,VI04,0x00000003 - NOP fmand VI11,VI08 - NOP lq.xyz VF11,-1(VI03) - NOP isub VI11,VI11,VI07 - NOP ior VI07,VI01,VI10 - mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 - NOP ibne VI03,VI06,EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP - NOP fcand VI01,262143 -EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 21 [20 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI0] - NOP NOP - sub.xyz VF09,VF09,VF12 NOP - mul.xyz VF07,VF12,VF06 iand VI01,VI01,VI02 - opmula.xyz ACCxyz,VF09xyz,VF08xyz ior VI10,VI01,VI10 ; STALL_LATENCY ?2 - opmsub.xyz VF14xyz,VF08xyz,VF09xyz ior VI10,VI10,VI09 - NOP ilw.w VI09,-3(VI03) - NOP iaddiu VI10,VI10,0x00007fff - clipw.xyz VF07xyz,VF07w mfir.w VF13,VI10 - abs.xyz VF00,VF14 fmand VI10,VI08 - NOP isub VI07,VI10,VI07 - ftoi4.xyz VF13,VF10 iand VI10,VI07,VI08 - add.xyz VF10,VF12,VF05 fcand VI01,262143 - NOP iand VI01,VI01,VI02 - mulq.xyz VF11,VF11,Q ior VI10,VI01,VI10 - NOP ior VI10,VI10,VI09 - ftoi4.xyz VF10,VF10 iaddiu VI10,VI10,0x00007fff - NOP mfir.w VF10,VI10 - NOP sq.xyz VF11,-3(VI04) - NOP sq VF13,-4(VI04) - NOP b EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF10,-1(VI04) -EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI1: -; _LNOPT_w=[ ] 18 [20 0] 24 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI1] - NOP NOP - NOP NOP - sub.xyz VF09,VF09,VF12 NOP - mul.xyz VF06,VF12,VF06 NOP - opmula.xyz ACCxyz,VF09xyz,VF08xyz NOP ; STALL_LATENCY ?2 - opmsub.xyz VF10xyz,VF08xyz,VF09xyz NOP - clipw.xyz VF06xyz,VF07w ilw.w VI03,-3(VI03) - abs.xyz VF00,VF10 fmand VI01,VI08 ; STALL_LATENCY ?2 - NOP isub VI07,VI01,VI07 - NOP iand VI08,VI07,VI08 - add.xyz VF07,VF12,VF05 fcand VI01,262143 - NOP iand VI02,VI01,VI02 - NOP ior VI02,VI02,VI08 - mulq.xyz VF11,VF11,Q ior VI03,VI02,VI03 - ftoi4.xyz VF07,VF07 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF07,VI03 - NOP sq.xyz VF11,237(VI04) ; STALL_LATENCY ?1 - NOP sq VF07,239(VI04) ; STALL_LATENCY ?1 -EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 6 [6 0] 6 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EXIT_POINT] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,60(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_pp4_vcl_16] - maxw.z VF07,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ normal2 ] 17 [17 0] 17 [dir_light_loop_lid] - NOP xtop VI06 - NOP ilw.x VI05,0(VI06) - NOP lq.xyz VF10,67(VI00) - NOP lq.xyz VF08,3(VI03) - NOP lq.xyz VF11,68(VI00) - NOP lq.xyz VF09,69(VI00) - NOP iaddiu VI04,VI06,0x00000005 - mulax.xyz ACC,VF10,VF08x iadd VI07,VI04,VI05 - madday.xyz ACC,VF11,VF08y iadd VI07,VI07,VI05 - maddz.xyz VF10,VF09,VF08z lq.xyz VF17,1(VI04) - NOP iaddiu VI06,VI06,0 - NOP iadd VI07,VI07,VI05 - NOP lq.xyz VF08,0(VI03) - mul.xyz VF17,VF10,VF17 iaddiu VI04,VI04,0x00000003 - NOP lq.xyz VF09,1(VI03) - NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI3 - NOP iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__PRO1] - adday.z ACC,VF17,VF17y lq.xyz VF12,1(VI04) - maddx.z VF11,VF07,VF17x NOP - mul.xyz VF17,VF10,VF12 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 - NOP NOP - NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI2 - maxx.z VF11,VF11,VF00x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__PRO2] - adday.z ACC,VF17,VF17y lq.xyz VF13,1(VI04) - maddx.z VF12,VF07,VF17x NOP - mul.xyz VF17,VF10,VF13 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 - NOP NOP - mulz.xyz VF13,VF09,VF11z ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI1 - maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 7 [7 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__PRO3] - adday.z ACC,VF17,VF17y lq.xyz VF14,1(VI04) - maddx.z VF12,VF07,VF17x NOP - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF17,VF10,VF14 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 - mulz.xyz VF13,VF09,VF11z lq.xyz VF15,238(VI06) - madd.xyz VF14,VF08,VF05 ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI0 - maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0x000000f9 -EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 8 [8 8] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - adday.z ACC,VF17,VF17y lq.xyz VF18,1(VI04) - maddx.z VF12,VF07,VF17x NOP - add.xyz VF16,VF15,VF14 iaddiu VI06,VI06,0x00000003 - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF17,VF10,VF18 iaddiu VI04,VI04,0x00000003 - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-11(VI06) - mulz.xyz VF13,VF09,VF11z ibne VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - maxx.z VF11,VF12,VF00x sq.xyz VF16,-14(VI06) -EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 19 [23 0] 25 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI0] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - add.xyz VF10,VF15,VF14 NOP - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-8(VI06) - mulz.xyz VF13,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP - add.xyz VF17,VF15,VF14 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-5(VI06) - mulz.xyz VF13,VF09,VF11z NOP - NOP sq.xyz VF17,-8(VI06) - add.xyz VF09,VF15,VF14 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF14,VF08,VF05 lq.xyz VF15,-2(VI06) - NOP sq.xyz VF09,-5(VI06) ; STALL_LATENCY ?1 - add.xyz VF08,VF15,VF14 sq.xyz VF10,-11(VI06) ; STALL_LATENCY ?1 - NOP b EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 18 [23 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI1] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - mula.xyz ACC,VF13,VF06 NOP - mulz.xyz VF13,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 - madd.xyz VF12,VF08,VF05 lq.xyz VF10,238(VI06) - mula.xyz ACC,VF13,VF06 NOP - mulz.xyz VF13,VF09,VF11z NOP ; STALL_LATENCY ?1 - add.xyz VF12,VF10,VF12 NOP - madd.xyz VF10,VF08,VF05 lq.xyz VF17,241(VI06) - mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?1 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,244(VI06) - NOP sq.xyz VF12,238(VI06) - add.xyz VF11,VF17,VF10 NOP - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?1 - NOP sq.xyz VF11,241(VI06) ; STALL_LATENCY ?1 - NOP b EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,244(VI06) -EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 15 [23 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI2] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - NOP NOP - mulz.xyz VF10,VF09,VF11z NOP - maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF06 NOP ; STALL_LATENCY ?1 - mulz.xyz VF09,VF09,VF11z NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF08,VF05 lq.xyz VF17,238(VI06) - mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?2 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,241(VI06) - add.xyz VF11,VF17,VF10 NOP - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,238(VI06) - NOP b EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,241(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI3: -; _LNOPT_w=[ ] 8 [23 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI3] - adday.z ACC,VF17,VF17y NOP - maddx.z VF17,VF07,VF17x NOP - maxx.z VF17,VF17,VF00x NOP ; STALL_LATENCY ?3 - mulz.xyz VF09,VF09,VF17z NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?3 - madd.xyz VF09,VF08,VF05 lq.xyz VF08,238(VI06) - add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_pp4_vcl_22] - maxw.z VF07,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ normal2 ] 25 [32 0] 36 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,71(VI00) - NOP lq.xyz VF09,3(VI03) - NOP lq.xyz VF08,72(VI00) - NOP lq.xyz VF11,73(VI00) - NOP lq.xyz VF10,74(VI00) - mulax.xyz ACC,VF12,VF09x NOP - madday.xyz ACC,VF08,VF09y xtop VI06 - maddaz.xyz ACC,VF11,VF09z iaddiu VI04,VI06,0x00000005 - maddw.xyz VF11,VF10,VF00w lq.xyz VF12,0(VI04) - sub.xyz VF12,VF11,VF12 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF13,VF12,VF12 iaddiu VI06,VI06,0 ; STALL_LATENCY ?3 - NOP iaddiu VI06,VI06,0 - adday.z ACC,VF13,VF13y lq.xyz VF08,0(VI03) ; STALL_LATENCY ?2 - maddx.z VF13,VF07,VF13x lq.xyz VF09,1(VI03) - NOP sqrt Q,VF13z ; STALL_LATENCY ?3 - NOP NOP - NOP iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP iaddiu VI04,VI04,0x00000003 - addw.x VF13,VF00,VF00w lq.xyz VF10,5(VI03) - addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI1 - NOP NOP -; _LNOPT_w=[ ] 20 [25 0] 26 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__PRO1] - NOP lq.xyz VF15,0(VI04) - NOP NOP - mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y - sub.xyz VF12,VF11,VF15 move.xyz VF15,VF12 ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF14x iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 - mul.xyz VF13,VF12,VF12 NOP ; STALL_LATENCY ?1 - NOP waitq - mulq.xyz VF15,VF15,Q lq.xyz VF16,-5(VI04) - adday.z ACC,VF13,VF13y NOP ; STALL_LATENCY ?1 - maddx.z VF13,VF07,VF13x NOP - mul.xyz VF15,VF15,VF16 NOP - madday.w ACC,VF00,VF14y sqrt Q,VF13z ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF14z NOP - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - maddz.w VF06,VF00,VF15z NOP - NOP NOP - addw.x VF13,VF00,VF00w NOP - addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI0 - maxx.w VF06,VF06,VF00x iaddiu VI06,VI06,0x000000f3 -EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 26 [25 21] 26 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - NOP lq.xyz VF16,0(VI04) - NOP NOP - mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y - mulw.xyz VF13,VF09,VF06w iaddiu VI06,VI06,0x00000003 - sub.xyz VF12,VF11,VF16 move.xyz VF16,VF12 - NOP NOP - NOP NOP - mula.xyz ACC,VF13,VF06 NOP - mul.xyz VF13,VF12,VF12 NOP - mulq.xyz VF16,VF16,Q div Q,VF00w,VF05w - madd.xyz VF15,VF08,VF05 lq.xyz VF17,-2(VI04) - mulax.w ACC,VF00,VF14x NOP - adday.z ACC,VF13,VF13y NOP - maddx.z VF13,VF07,VF13x NOP - mul.xyz VF16,VF16,VF17 NOP - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z lq.xyz VF14,-8(VI06) - mulq.xyz VF15,VF15,Q sqrt Q,VF13z - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - maddz.w VF06,VF00,VF16z NOP - add.xyz VF14,VF14,VF15 NOP - NOP iaddiu VI04,VI04,0x00000003 - addw.x VF13,VF00,VF00w NOP - addq.y VF13,VF00,Q ibne VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - maxx.w VF06,VF06,VF00x sq.xyz VF14,-8(VI06) -EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 27 [43 0] 45 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI0] - NOP NOP - NOP NOP - mul.xyz VF10,VF13,VF10 div Q,VF00w,VF13y - mulw.xyz VF13,VF09,VF06w move.xyz VF12,VF12 - mulax.w ACC,VF00,VF10x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF10y NOP - NOP NOP - mulq.xyz VF12,VF12,Q lq.xyz VF11,-2(VI04) - mul.xyz VF12,VF12,VF11 NOP ; STALL_LATENCY ?3 - maddz.w VF05,VF00,VF10z div Q,VF00w,VF05w - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF13,VF08,VF05 NOP - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF06,VF00,VF12z NOP - NOP div Q,VF00w,VF05w ; STALL_THRUPUT ?1 - mulq.xyz VF13,VF13,Q lq.xyz VF10,-5(VI06) - maxx.w VF06,VF06,VF00x NOP - add.xyz VF10,VF10,VF13 NOP ; STALL_LATENCY ?2 - mulw.xyz VF13,VF09,VF06w NOP - NOP sq.xyz VF10,-5(VI06) ; STALL_LATENCY ?2 - mula.xyz ACC,VF13,VF06 NOP - madd.xyz VF08,VF08,VF05 NOP - mulq.xyz VF08,VF08,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 - add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 20 [43 0] 45 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI1] - NOP NOP - NOP NOP - mul.xyz VF13,VF13,VF10 div Q,VF00w,VF13y - NOP move.xyz VF12,VF12 - mulax.w ACC,VF00,VF13x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF13y NOP - maddz.w VF05,VF00,VF13z NOP - mulq.xyz VF12,VF12,Q lq.xyz VF10,-2(VI04) - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?2 - mul.xyz VF12,VF12,VF10 NOP - mulax.w ACC,VF00,VF12x NOP ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF12y NOP - maddz.w VF06,VF00,VF12z NOP - maxx.w VF06,VF06,VF00x NOP ; STALL_LATENCY ?3 - mulw.xyz VF13,VF09,VF06w NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?3 - madd.xyz VF08,VF08,VF05 NOP - mulq.xyz VF08,VF08,Q lq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_nospec_pp4_vcl_30] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralNoSpec_CodeEnd: -; iCount=450 -; register stats: -; 15 VU User integer -; 19 VU User floating point diff --git a/vu1/general_pv_diff_quad_vcl.vsm b/vu1/general_pv_diff_quad_vcl.vsm deleted file mode 100644 index 22351c35..00000000 --- a/vu1/general_pv_diff_quad_vcl.vsm +++ /dev/null @@ -1,714 +0,0 @@ -; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === normal1 : optimal=50 clid=0 mlid=1 size=(51) -; === hDown : optimal=50 clid=0 mlid=1 size=(51) -; === another : optimal=50 clid=0 mlid=1 size=(51) -; === normal2 : optimal=50 clid=0 mlid=2 size=(51) -; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=4 mlid=4 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=4 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=6 mlid=6 size=(18) -; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === hDown : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === normal : optimal=34 clid=0 mlid=4 size=(39) -; === another : optimal=34 clid=0 mlid=4 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=4 size=(39) -; === normal2 : optimal=34 clid=0 mlid=5 size=(39) -; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralPVDiffQuad_CodeStart - .global vsmGeneralPVDiffQuad_CodeEnd -vsmGeneralPVDiffQuad_CodeStart: -__v_vu1_general_pv_diff_quad_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pv_diff_quad_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ vuta1 ] 53 [53 0] 57 [main_loop_lid] - NOP lq.xyz VF09,57(VI00) - NOP lq.xyz VF08,59(VI00) - mul.xyz VF09,VF09,VF08 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?3 - NOP xtop VI05 - NOP iaddiu VI03,VI05,0x00000005 - add.xyz VF09,VF07,VF09 lq.xyz VF07,0(VI03) ; STALL_LATENCY ?1 - NOP lq.w VF05,57(VI00) - NOP loi 0x44fff000 - NOP iaddiu VI04,VI05,0x000000ed - mulax ACC,VF01,VF07x lq VF06,75(VI00) - madday ACC,VF02,VF07y ilw.x VI05,0(VI05) - maddaz ACC,VF03,VF07z lq.xyz VF20,8(VI03) - maddw VF22,VF04,VF00w mr32.z VF05,VF05 - NOP mtir VI08,VF06x - NOP ior VI08,VI08,VI05 - mulax ACC,VF01,VF20x lq.xyz VF15,12(VI03) - madday ACC,VF02,VF20y div Q,VF00w,VF22w - maddaz ACC,VF03,VF20z lq.xyz VF12,15(VI03) - maddw VF20,VF04,VF00w mfir.x VF06,VI08 - mulax ACC,VF01,VF15x lq.xyz VF14,4(VI03) - madday ACC,VF02,VF15y lq.xyz VF07,11(VI03) - maddaz ACC,VF03,VF15z sq.xyz VF12,11(VI03) - maddw VF15,VF04,VF00w lq.xyz VF12,9(VI03) - mulax ACC,VF01,VF14x div Q,VF00w,VF20w - madday ACC,VF02,VF14y lq.xyz VF18,13(VI03) - maddaz ACC,VF03,VF14z lq.xyz VF10,2(VI03) - maddw VF12,VF04,VF00w sq.xyz VF12,13(VI03) - addi.xy VF05,VF00,I loi 0x45000000 - mulq.xyz VF13,VF22,Q sq VF06,-1(VI04) - mulq.xyz VF10,VF10,Q lq.xyz VF06,76(VI00) - NOP div Q,VF00w,VF12w - NOP ilw.w VI07,0(VI00) - NOP lq.xyz VF11,10(VI03) - maxi.w VF07,VF00,I sq.xyz VF18,9(VI03) - mul.xyz VF08,VF13,VF06 lq.xyz VF18,6(VI03) - mulq.xyz VF20,VF20,Q iadd VI06,VI03,VI05 - mulq.xyz VF11,VF11,Q iadd VI06,VI06,VI05 - add.xyz VF14,VF13,VF05 div Q,VF00w,VF15w - mulq.xyz VF16,VF12,Q fcset 0 - clipw.xyz VF08xyz,VF07w iadd VI06,VI06,VI05 - add.xyz VF07,VF20,VF05 sq.xyz VF07,15(VI03) - mulq.xyz VF12,VF18,Q iadd VI06,VI06,VI05 - sub.xyz VF22,VF13,VF16 iaddiu VI08,VI00,0x00007fff - mul.xyz VF17,VF16,VF06 lq.w VF08,0(VI00) - mulq.xyz VF08,VF15,Q iaddiu VI08,VI08,0x00000001 - add.xyz VF13,VF16,VF05 iaddiu VI01,VI03,0 - mul.xyz VF15,VF20,VF06 iaddiu VI01,VI01,0 - clipw.xyz VF17xyz,VF07w lq.xyz VF18,14(VI03) - mul.xyz VF17,VF08,VF06 iaddiu VI03,VI01,0 - mulw.xyz VF22,VF22,VF08w iaddiu VI03,VI03,0x00000010 - sub.xyz VF20,VF20,VF16 ilw.w VI02,76(VI00) - add.xyz VF08,VF08,VF05 ibeq VI03,VI06,EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF17xyz,VF07w mfir.w VF10,VI08 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ normal1 ] 51 [50 50] 51 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP] - ftoi4.xyz VF19,VF14 lq.xyz VF16,0(VI03) - opmula.xyz ACCxyz,VF22xyz,VF20xyz lq.xyz VF17,8(VI03) - opmsub.xyz VF00xyz,VF20xyz,VF22xyz lq.xyz VF21,12(VI03) - mulq.xyz VF20,VF18,Q iaddiu VI01,VI03,0 - mulax ACC,VF01,VF16x iaddiu VI10,VI04,0 - madday ACC,VF02,VF16y lq.xyz VF14,4(VI03) - maddaz ACC,VF03,VF16z fmand VI09,VI07 - maddw VF22,VF04,VF00w lq.xyz VF16,11(VI03) - mulax ACC,VF01,VF17x lq.xyz VF23,15(VI03) - madday ACC,VF02,VF17y lq.xyz VF18,13(VI03) - maddaz ACC,VF03,VF17z mfir.w VF19,VI08 - maddw VF17,VF04,VF00w div Q,VF00w,VF22w - mulax ACC,VF01,VF21x sq.xyz VF23,11(VI03) - madday ACC,VF02,VF21y lq.xyz VF23,9(VI03) - maddaz ACC,VF03,VF21z sq VF19,2(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF20,6(VI04) - ftoi4.xyz VF10,VF13 sq.xyz VF10,0(VI04) - maddw VF15,VF04,VF00w sq.xyz VF11,9(VI04) - mulq.xyz VF13,VF22,Q sq.xyz VF12,3(VI04) - mulax ACC,VF01,VF14x div Q,VF00w,VF17w - madday ACC,VF02,VF14y sq VF10,5(VI04) - maddaz ACC,VF03,VF14z lq.xyz VF10,2(VI03) - mul.xyz VF19,VF13,VF06 iaddiu VI11,VI10,0 - maddw VF12,VF04,VF00w sq.xyz VF23,13(VI03) - add.xyz VF14,VF13,VF05 sq.xyz VF18,9(VI03) - mulq.xyz VF10,VF10,Q lq.xyz VF11,10(VI03) - clipw.xyz VF19xyz,VF07w sq.xyz VF16,15(VI03) - mulq.xyz VF16,VF17,Q div Q,VF00w,VF12w - NOP iaddiu VI10,VI01,0 - NOP fcand VI01,16777215 - ftoi4.xyz VF19,VF07 iand VI12,VI01,VI02 - add.xyz VF07,VF16,VF05 lq.xyz VF18,6(VI03) - NOP iaddiu VI01,VI11,0 - mulq.xyz VF11,VF11,Q ior VI09,VI12,VI09 - mulq.xyz VF17,VF12,Q div Q,VF00w,VF15w - NOP iaddiu VI09,VI09,0x00007fff - mulq.xyz VF12,VF18,Q mfir.w VF20,VI09 - ftoi4.xyz VF20,VF08 mfir.w VF19,VI09 - sub.xyz VF22,VF13,VF17 sq.xyz VF09,10(VI04) - mul.xyz VF21,VF17,VF06 sq.xyz VF09,4(VI04) - add.xyz VF13,VF17,VF05 lq.xyz VF18,14(VI03) - mulq.xyz VF08,VF15,Q sq VF19,11(VI04) - NOP sq VF20,8(VI04) - clipw.xyz VF21xyz,VF07w sq.xyz VF09,1(VI04) - mul.xyz VF15,VF16,VF06 sq.xyz VF09,7(VI04) - mul.xyz VF19,VF08,VF06 iaddiu VI04,VI01,0x0000000c - add.xyz VF08,VF08,VF05 iaddiu VI03,VI10,0 - mulw.xyz VF22,VF22,VF08w iaddiu VI03,VI03,0x00000010 - sub.xyz VF20,VF16,VF17 NOP - clipw.xyz VF19xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP - NOP mfir.w VF10,VI08 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 28 [27 0] 28 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__EPI0] - NOP NOP - opmula.xyz ACCxyz,VF22xyz,VF20xyz sq.xyz VF10,0(VI04) - opmsub.xyz VF16xyz,VF20xyz,VF22xyz mfir.w VF05,VI08 - ftoi4.xyz VF05,VF14 sq.xyz VF11,9(VI04) - mulq.xyz VF20,VF18,Q sq.xyz VF12,3(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF09,10(VI04) - abs.xyz VF00,VF16 fmand VI07,VI07 - NOP sq VF05,2(VI04) - NOP sq.xyz VF20,6(VI04) - NOP fcand VI01,16777215 - NOP iand VI02,VI01,VI02 - NOP ior VI02,VI02,VI07 - ftoi4.xyz VF20,VF08 iaddiu VI02,VI02,0x00007fff - ftoi4.xyz VF07,VF07 mfir.w VF20,VI02 - NOP mfir.w VF07,VI02 - NOP sq.xyz VF09,4(VI04) - NOP sq.xyz VF09,1(VI04) - ftoi4.xyz VF10,VF13 sq VF20,8(VI04) - NOP sq VF07,11(VI04) - NOP sq.xyz VF09,7(VI04) - NOP ilw.x VI01,0(VI00) - NOP sq VF10,5(VI04) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,61(VI00) - NOP lq.xyz VF07,66(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_quad_pp4_vcl_9] - maxw.z VF08,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 28 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF11,67(VI00) - NOP lq.xyz VF12,3(VI03) - NOP lq.xyz VF13,68(VI00) - NOP lq.xyz VF09,69(VI00) - mulax.xyz ACC,VF11,VF12x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF13,VF12y ilw.x VI05,0(VI06) - maddz.xyz VF12,VF09,VF12z iaddiu VI04,VI06,0x00000005 - NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 - add.xyz VF13,VF07,VF12 lq.xyz VF10,2(VI03) - NOP esadd P,VF13 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF13,VF13,VF05w lq.xyz VF16,1(VI04) ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - mul.xyz VF14,VF13,VF16 iadd VI07,VI07,VI05 ; STALL_LATENCY ?2 - mul.xyz VF15,VF12,VF16 iadd VI07,VI07,VI05 - mul.xyz VF11,VF10,VF06 lq.xyz VF10,1(VI03) - NOP lq.xyz VF09,0(VI03) - NOP mr32.xyw VF16,VF14 - adday.z ACC,VF15,VF15y iaddiu VI04,VI04,0x00000004 - maddx.z VF16,VF08,VF15x iaddiu VI06,VI06,0 - NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 11 [13 0] 14 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF09,VF00,VF16y NOP - maxx.z VF15,VF16,VF00x lq.xyz VF16,1(VI04) - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 - mul.xyz VF14,VF13,VF16 NOP - mul.xyz VF17,VF12,VF16 NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulz.xyz VF15,VF10,VF15z mr32.xyw VF16,VF14 - adday.z ACC,VF17,VF17y iaddiu VI04,VI04,0x00000004 - maddx.z VF16,VF08,VF17x NOP - mul.w VF06,VF06,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1 - addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 15 [15 0] 17 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF09,VF00,VF16y NOP - maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF17,VF13,VF16 NOP - mul.w VF05,VF07,VF07 NOP - mul.xyz VF18,VF12,VF16 NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 mr32.xyw VF16,VF17 ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) - mul.w VF06,VF06,VF06 move.xyz VF17,VF15 - maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 - addax.w ACC,VF16,VF16x NOP - mulz.xyz VF15,VF10,VF14z ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0 - mula.xyz ACC,VF17,VF19 iaddiu VI06,VI06,0x000000f6 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF09,VF00,VF16y iaddiu VI06,VI06,0x00000003 - maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) - mul.w VF07,VF06,VF06 NOP - maddaw.xyz ACC,VF11,VF05w NOP - maxx.w VF06,VF09,VF00x NOP - mul.xyz VF17,VF13,VF16 NOP - mul.w VF05,VF07,VF07 NOP - madd.xyz VF20,VF09,VF05 lq.xyz VF19,-11(VI06) - mul.w VF06,VF06,VF06 NOP - mul.xyz VF18,VF12,VF16 NOP - mul.w VF05,VF05,VF05 NOP - add.xyz VF20,VF19,VF20 mr32.xyw VF16,VF17 - mul.w VF06,VF06,VF06 move.xyz VF17,VF15 - adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) - maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 - addax.w ACC,VF16,VF16x sq.xyz VF20,-11(VI06) - mulz.xyz VF15,VF10,VF14z ibne VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - mula.xyz ACC,VF17,VF19 NOP -EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 26 [38 0] 38 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF09,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF13,VF09,VF05 lq.xyz VF12,-8(VI06) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF07,VF07 NOP - maxx.z VF16,VF16,VF00x NOP - add.xyz VF14,VF12,VF13 lq.xyz VF13,-5(VI04) - mul.w VF06,VF06,VF06 move.xyz VF12,VF15 - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF15,VF10,VF16z NOP - NOP sq.xyz VF14,-8(VI06) - mul.w VF07,VF06,VF06 NOP - mula.xyz ACC,VF12,VF13 NOP - maddaw.xyz ACC,VF11,VF05w move.xyz VF15,VF15 - madd.xyz VF16,VF09,VF05 lq.xyz VF10,-5(VI06) - mul.w VF05,VF07,VF07 NOP - add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF15,VF10 sq.xyz VF16,-5(VI06) ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF11,VF09,VF05 lq.xyz VF09,-2(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF09,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 23 [38 0] 38 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF09,VF00,VF16y lq.xyz VF13,-5(VI04) - NOP NOP - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - maxx.z VF16,VF16,VF00x NOP - mul.w VF06,VF06,VF06 move.xyz VF12,VF15 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF15,VF10,VF16z NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF12,VF13 NOP - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF16,VF09,VF05 lq.xyz VF10,238(VI06) - mul.w VF05,VF07,VF07 move.xyz VF15,VF15 - add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF15,VF10 sq.xyz VF16,238(VI06) ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF11,VF09,VF05 lq.xyz VF09,241(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF09,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 15 [38 0] 38 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF09,VF00,VF16y NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - maxx.z VF16,VF16,VF00x NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 - mulz.xyz VF10,VF10,VF16z lq.xyz VF16,-1(VI04) - mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP move.xyz VF10,VF10 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF10,VF16 NOP - maddaw.xyz ACC,VF11,VF05w NOP ; STALL_LATENCY ?2 - madd.xyz VF11,VF09,VF05 lq.xyz VF09,238(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_quad_pp4_vcl_15] - maxw.z VF08,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 36 [43 0] 50 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF09,71(VI00) - NOP lq.xyz VF10,3(VI03) - NOP lq.xyz VF12,72(VI00) - NOP lq.xyz VF13,73(VI00) - mulax.xyz ACC,VF09,VF10x lq.xyz VF09,74(VI00) ; STALL_LATENCY ?1 - madday.xyz ACC,VF12,VF10y xtop VI06 - maddaz.xyz ACC,VF13,VF10z iaddiu VI04,VI06,0x00000005 - maddw.xyz VF13,VF09,VF00w lq.xyz VF16,0(VI04) ; STALL_LATENCY ?1 - sub.xyz VF16,VF13,VF16 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF15,VF16,VF16 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF15,VF15y lq.xyz VF10,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF15,VF08,VF15x lq.xyz VF09,0(VI03) - NOP sqrt Q,VF15z ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF15,VF00,Q iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP div Q,VF00w,VF15y - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF15,VF00,VF00w lq.xyz VF12,5(VI03) - mulq.xyz VF16,VF16,Q iaddiu VI04,VI04,0x00000004 - NOP iaddiu VI06,VI06,0 - mul.xyz VF11,VF11,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF15,VF15,VF12 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF17,VF07,VF16 lq.xyz VF18,0(VI04) - sub.xyz VF18,VF13,VF18 esadd P,VF17 ; STALL_LATENCY ?3 - mul.xyz VF20,VF18,VF18 lq.xyz VF19,-3(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF20,VF20y NOP ; STALL_LATENCY ?3 - maddx.z VF20,VF08,VF20x NOP - NOP waitp - addw.x VF20,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF20z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF20,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF20y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 - mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0 - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - NOP move.xyz VF18,VF17 - NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF20x mfp.w VF06,P -; _LNOPT_w=[ ] 31 [38 0] 39 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) - NOP NOP - NOP NOP - mulw.xyz VF21,VF18,VF06w NOP - sub.xyz VF18,VF13,VF22 NOP - madday.w ACC,VF00,VF20y esadd P,VF17 - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF19,VF21,VF19 NOP - mul.xyz VF20,VF18,VF18 NOP - maxx.w VF05,VF06,VF00x NOP ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF19x NOP - adday.z ACC,VF20,VF20y NOP - maddx.z VF20,VF08,VF20x NOP - madday.w ACC,VF00,VF19y NOP - maddz.w VF07,VF00,VF19z waitp - mulw.xyz VF21,VF10,VF05w mfp.w VF06,P - mulax.w ACC,VF00,VF14x sqrt Q,VF20z - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - addw.x VF20,VF00,VF00w ersqrt P,VF06w - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - addq.y VF20,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF20y ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF14y lq.xyz VF19,-3(VI04) - maddz.w VF05,VF00,VF14z NOP - mul.w VF06,VF06,VF06 lq.xyz VF22,-5(VI04) ; STALL_LATENCY ?1 - mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 - mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0x000000f6 - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - mul.w VF07,VF06,VF06 move.xyz VF18,VF17 - mula.xyz ACC,VF21,VF22 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF20x mfp.w VF06,P -EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) - mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 - madday.w ACC,VF00,VF20y NOP - mulw.xyz VF21,VF18,VF06w NOP - sub.xyz VF18,VF13,VF22 esadd P,VF17 - maddaw.xyz ACC,VF11,VF07w NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF21,VF21,VF19 NOP - mul.xyz VF20,VF18,VF18 NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF21x NOP - adday.z ACC,VF20,VF20y NOP - maddx.z VF20,VF08,VF20x NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF07,VF00,VF21z mfp.w VF06,P - NOP lq.xyz VF22,-11(VI06) - mulq.xyz VF19,VF19,Q sqrt Q,VF20z - mulw.xyz VF21,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP - mulax.w ACC,VF00,VF14x ersqrt P,VF06w - add.xyz VF19,VF22,VF19 NOP - addw.x VF20,VF00,VF00w NOP - mul.w VF06,VF05,VF05 NOP - addq.y VF20,VF00,Q lq.xyz VF22,-5(VI04) - madday.w ACC,VF00,VF14y sq.xyz VF19,-11(VI06) - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mul.w VF06,VF06,VF06 lq.xyz VF19,-3(VI04) - mul.xyz VF15,VF20,VF12 div Q,VF00w,VF20y - mula.xyz ACC,VF21,VF22 NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF20,VF16,VF19 NOP - NOP NOP - NOP NOP - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - mul.w VF07,VF06,VF06 move.xyz VF18,VF17 - NOP ibne VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF20x mfp.w VF06,P -EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF12,VF07,VF16 NOP - mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - madday.w ACC,VF00,VF20y NOP - mul.w VF07,VF07,VF07 NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF20,VF18,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF10,VF05w NOP - madd.xyz VF19,VF09,VF05 ersqrt P,VF06w - maxx.w VF05,VF07,VF00x waitq ; STALL_LATENCY ?1 - mulq.xyz VF19,VF19,Q lq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF14x sq.xyz VF19,-8(VI06) ; STALL_LATENCY ?1 - mula.xyz ACC,VF20,VF18 lq.xyz VF19,-3(VI04) - mul.w VF06,VF06,VF06 NOP - mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 ; STALL_LATENCY ?2 - mul.w VF07,VF06,VF06 mfp.w VF06,P - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mulax.w ACC,VF00,VF20x NOP - mulw.xyz VF12,VF18,VF06w NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF12,VF12,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF07,VF00,VF12z NOP - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF19,Q NOP - mulax.w ACC,VF00,VF14x lq.xyz VF12,-5(VI06) - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z NOP - add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) - mul.w VF07,VF07,VF07 NOP - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF12 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 sq.xyz VF19,-5(VI06) - mulq.xyz VF19,VF19,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 - add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF19,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 52 [93 0] 93 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF12,VF07,VF16 NOP - mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 - mul.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF19x mfp.w VF06,P - madday.w ACC,VF00,VF19y NOP - maddz.w VF07,VF00,VF19z lq.xyz VF19,-3(VI04) - mulw.xyz VF20,VF10,VF05w NOP - mulax.w ACC,VF00,VF14x ersqrt P,VF06w - madday.w ACC,VF00,VF14y NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF20,VF18 NOP - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mulax.w ACC,VF00,VF20x NOP - mulw.xyz VF12,VF18,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF12,VF12,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF07,VF00,VF12z NOP - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF19,Q NOP - mulax.w ACC,VF00,VF14x lq.xyz VF12,238(VI06) - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z NOP - add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) - mul.w VF07,VF07,VF07 NOP - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF12 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 sq.xyz VF19,238(VI06) - mulq.xyz VF19,VF19,Q lq.xyz VF09,241(VI06) ; STALL_LATENCY ?3 - add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF19,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF12,VF07,VF16 lq.xyz VF13,-3(VI04) - mul.xyz VF16,VF16,VF13 esadd P,VF12 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x move.xyz VF15,VF15 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF16y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF12,VF12 - NOP mfp.w VF06,P - mulw.xyz VF12,VF12,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF16z NOP - mul.xyz VF16,VF12,VF13 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF16x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF16y NOP - maddz.w VF07,VF00,VF16z lq.xyz VF16,-1(VI04) - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF10,VF16 NOP - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF15y NOP - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?1 - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF09,VF09,VF05 NOP - mulq.xyz VF09,VF09,Q lq.xyz VF11,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF09,VF11,VF09 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pv_diff_quad_pp4_vcl_23] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralPVDiffQuad_CodeEnd: -; iCount=613 -; register stats: -; 13 VU User integer -; 24 VU User floating point diff --git a/vu1/general_pv_diff_tri_vcl.vsm b/vu1/general_pv_diff_tri_vcl.vsm deleted file mode 100644 index 7dab5c3c..00000000 --- a/vu1/general_pv_diff_tri_vcl.vsm +++ /dev/null @@ -1,688 +0,0 @@ -; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === hDown : optimal=35 clid=0 mlid=2 size=(36) -; === dUp : optimal=35 clid=0 mlid=1 size=(36) -; === normal : optimal=35 clid=0 mlid=1 size=(36) -; === another : optimal=35 clid=0 mlid=2 size=(36) -; === normal2 : optimal=35 clid=0 mlid=2 size=(36) -; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=4 mlid=4 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=4 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=6 mlid=6 size=(18) -; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === hDown : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === normal : optimal=34 clid=0 mlid=4 size=(39) -; === another : optimal=34 clid=0 mlid=4 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=4 size=(39) -; === normal2 : optimal=34 clid=0 mlid=5 size=(39) -; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralPVDiffTri_CodeStart - .global vsmGeneralPVDiffTri_CodeEnd -vsmGeneralPVDiffTri_CodeStart: -__v_vu1_general_pv_diff_tri_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pv_diff_tri_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ vuta1 ] 49 [49 0] 49 [main_loop_lid] - NOP lq.w VF05,57(VI00) - NOP loi 0x44fff000 - NOP xtop VI05 - NOP iaddiu VI03,VI05,0x00000005 - NOP mr32.z VF05,VF05 - NOP iaddiu VI04,VI05,0x000000ed - NOP ilw.x VI05,0(VI05) - NOP lq VF06,75(VI00) - NOP lq.xyz VF09,57(VI00) - NOP lq.xyz VF08,59(VI00) - addi.xy VF05,VF00,I loi 0x45000000 - NOP ilw.w VI07,0(VI00) - NOP iadd VI06,VI03,VI05 - mul.xyz VF09,VF09,VF08 lq.xyz VF08,4(VI03) - NOP iadd VI06,VI06,VI05 - NOP iadd VI06,VI06,VI05 - NOP mtir VI08,VF06x - mulax ACC,VF01,VF08x ior VI08,VI08,VI05 - madday ACC,VF02,VF08y mfir.x VF06,VI08 - maddaz ACC,VF03,VF08z iadd VI06,VI06,VI05 - maddw VF15,VF04,VF00w lq.xyz VF14,0(VI03) - NOP lq.xyz VF07,58(VI00) - NOP sq VF06,-1(VI04) - NOP iaddiu VI08,VI00,0x00007fff - mulax ACC,VF01,VF14x div Q,VF00w,VF15w - madday ACC,VF02,VF14y lq.xyz VF08,8(VI03) - maddaz ACC,VF03,VF14z iaddiu VI08,VI08,0x00000001 - maddw VF14,VF04,VF00w ilw.w VI02,76(VI00) - add.xyz VF09,VF07,VF09 lq.xyz VF06,76(VI00) - mulax ACC,VF01,VF08x lq.xyz VF07,6(VI03) - madday ACC,VF02,VF08y lq.w VF08,0(VI00) - maddaz ACC,VF03,VF08z div Q,VF00w,VF14w - mulq.xyz VF08,VF15,Q fcset 0 - mulq.xyz VF07,VF07,Q iaddiu VI09,VI03,0 - maddw VF12,VF04,VF00w iaddiu VI10,VI04,0 - NOP sq.xyz VF09,1(VI04) - add.xyz VF10,VF08,VF05 lq.xyz VF15,2(VI03) - maxi.w VF07,VF00,I sq.xyz VF07,3(VI04) - mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w - mul.xyz VF13,VF08,VF06 mfir.w VF10,VI08 - ftoi4.xyz VF10,VF10 lq.xyz VF11,10(VI03) - mulq.xyz VF15,VF15,Q sq.xyz VF09,4(VI04) - sub.xyz VF14,VF07,VF08 sq.xyz VF09,7(VI04) - mul.xyz VF16,VF07,VF06 iaddiu VI03,VI09,0 - add.xyz VF10,VF07,VF05 sq VF10,5(VI04) - mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x0000000c - mulq.xyz VF15,VF11,Q sq.xyz VF15,0(VI04) - clipw.xyz VF16xyz,VF07w ibeq VI03,VI06,EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ dUp ] 36 [35 35] 36 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP] - ftoi4.xyz VF12,VF10 lq.xyz VF11,4(VI03) - sub.xyz VF10,VF07,VF08 NOP - mul.xyz VF08,VF07,VF06 iaddiu VI04,VI10,0x00000009 - mulw.xyz VF13,VF14,VF08w lq.xyz VF14,0(VI03) - mulax ACC,VF01,VF11x sq.xyz VF15,6(VI09) - madday ACC,VF02,VF11y mfir.w VF12,VI08 - maddaz ACC,VF03,VF11z NOP - maddw VF15,VF04,VF00w NOP - mulax ACC,VF01,VF14x lq.xyz VF11,8(VI03) - madday ACC,VF02,VF14y sq VF12,2(VI09) - maddaz ACC,VF03,VF14z iaddiu VI11,VI03,0 - maddw VF14,VF04,VF00w div Q,VF00w,VF15w - mulax ACC,VF01,VF11x lq.xyz VF16,6(VI03) - madday ACC,VF02,VF11y iaddiu VI10,VI04,0 - maddaz ACC,VF03,VF11z sq.xyz VF09,1(VI04) - maddw VF12,VF04,VF00w lq.xyz VF11,10(VI03) - clipw.xyz VF08xyz,VF07w sq.xyz VF09,4(VI04) - opmula.xyz ACCxyz,VF13xyz,VF10xyz mfir.w VF10,VI08 - mulq.xyz VF08,VF15,Q div Q,VF00w,VF14w - mulq.xyz VF16,VF16,Q lq.xyz VF15,2(VI03) - opmsub.xyz VF00xyz,VF10xyz,VF13xyz fcand VI01,262143 - NOP iand VI03,VI01,VI02 - mul.xyz VF13,VF08,VF06 sq.xyz VF09,7(VI04) - add.xyz VF10,VF08,VF05 sq.xyz VF16,3(VI04) - add.xyz VF17,VF07,VF05 fmand VI01,VI07 - mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w - mulq.xyz VF16,VF15,Q ior VI01,VI03,VI01 - ftoi4.xyz VF10,VF10 iaddiu VI03,VI11,0 - ftoi4.xyz VF15,VF17 iaddiu VI11,VI01,0x00007fff - sub.xyz VF14,VF07,VF08 mfir.w VF15,VI11 - mul.xyz VF16,VF07,VF06 sq.xyz VF16,0(VI04) - add.xyz VF10,VF07,VF05 sq VF10,5(VI04) - mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x0000000c - mulq.xyz VF15,VF11,Q sq VF15,8(VI09) - clipw.xyz VF16xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP - clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 20 [17 0] 21 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__EPI0] - ftoi4.xyz VF09,VF10 NOP - sub.xyz VF10,VF07,VF08 mfir.w VF09,VI08 - mul.xyz VF08,VF07,VF06 NOP - mulw.xyz VF14,VF14,VF08w NOP - add.xyz VF07,VF07,VF05 sq.xyz VF15,6(VI09) - clipw.xyz VF08xyz,VF07w lq.xyz VF06,61(VI00) ; STALL_LATENCY ?1 - opmula.xyz ACCxyz,VF14xyz,VF10xyz lq.xyz VF05,59(VI00) - opmsub.xyz VF11xyz,VF10xyz,VF14xyz sq VF09,2(VI09) - ftoi4.xyz VF15,VF07 lq.xyz VF07,66(VI00) - NOP fcand VI01,262143 - NOP iand VI02,VI01,VI02 - abs.xyz VF00,VF11 fmand VI07,VI07 - NOP ior VI02,VI02,VI07 - NOP iaddiu VI02,VI02,0x00007fff - NOP ilw.x VI01,0(VI00) - NOP mfir.w VF15,VI02 - NOP iaddiu VI02,VI00,0x00000001 - NOP ilw.x VI03,0(VI02) - NOP ibeq VI01,VI00,pt_lights_lid - NOP sq VF15,8(VI09) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_tri_pp4_vcl_9] - maxw.z VF08,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 28 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF11,67(VI00) - NOP lq.xyz VF12,3(VI03) - NOP lq.xyz VF13,68(VI00) - NOP lq.xyz VF09,69(VI00) - mulax.xyz ACC,VF11,VF12x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF13,VF12y ilw.x VI05,0(VI06) - maddz.xyz VF12,VF09,VF12z iaddiu VI04,VI06,0x00000005 - NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 - add.xyz VF13,VF07,VF12 lq.xyz VF10,2(VI03) - NOP esadd P,VF13 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF13,VF13,VF05w lq.xyz VF16,1(VI04) ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - mul.xyz VF14,VF13,VF16 iadd VI07,VI07,VI05 ; STALL_LATENCY ?2 - mul.xyz VF15,VF12,VF16 iadd VI07,VI07,VI05 - mul.xyz VF11,VF10,VF06 lq.xyz VF10,1(VI03) - NOP lq.xyz VF09,0(VI03) - NOP mr32.xyw VF16,VF14 - adday.z ACC,VF15,VF15y iaddiu VI04,VI04,0x00000004 - maddx.z VF16,VF08,VF15x iaddiu VI06,VI06,0 - NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 11 [13 0] 14 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF09,VF00,VF16y NOP - maxx.z VF15,VF16,VF00x lq.xyz VF16,1(VI04) - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 - mul.xyz VF14,VF13,VF16 NOP - mul.xyz VF17,VF12,VF16 NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulz.xyz VF15,VF10,VF15z mr32.xyw VF16,VF14 - adday.z ACC,VF17,VF17y iaddiu VI04,VI04,0x00000004 - maddx.z VF16,VF08,VF17x NOP - mul.w VF06,VF06,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1 - addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 15 [15 0] 17 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF09,VF00,VF16y NOP - maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF17,VF13,VF16 NOP - mul.w VF05,VF07,VF07 NOP - mul.xyz VF18,VF12,VF16 NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 mr32.xyw VF16,VF17 ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) - mul.w VF06,VF06,VF06 move.xyz VF17,VF15 - maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 - addax.w ACC,VF16,VF16x NOP - mulz.xyz VF15,VF10,VF14z ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0 - mula.xyz ACC,VF17,VF19 iaddiu VI06,VI06,0x000000f6 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF09,VF00,VF16y iaddiu VI06,VI06,0x00000003 - maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) - mul.w VF07,VF06,VF06 NOP - maddaw.xyz ACC,VF11,VF05w NOP - maxx.w VF06,VF09,VF00x NOP - mul.xyz VF17,VF13,VF16 NOP - mul.w VF05,VF07,VF07 NOP - madd.xyz VF20,VF09,VF05 lq.xyz VF19,-11(VI06) - mul.w VF06,VF06,VF06 NOP - mul.xyz VF18,VF12,VF16 NOP - mul.w VF05,VF05,VF05 NOP - add.xyz VF20,VF19,VF20 mr32.xyw VF16,VF17 - mul.w VF06,VF06,VF06 move.xyz VF17,VF15 - adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) - maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 - addax.w ACC,VF16,VF16x sq.xyz VF20,-11(VI06) - mulz.xyz VF15,VF10,VF14z ibne VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - mula.xyz ACC,VF17,VF19 NOP -EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 26 [38 0] 38 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF09,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF13,VF09,VF05 lq.xyz VF12,-8(VI06) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF07,VF07 NOP - maxx.z VF16,VF16,VF00x NOP - add.xyz VF14,VF12,VF13 lq.xyz VF13,-5(VI04) - mul.w VF06,VF06,VF06 move.xyz VF12,VF15 - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF15,VF10,VF16z NOP - NOP sq.xyz VF14,-8(VI06) - mul.w VF07,VF06,VF06 NOP - mula.xyz ACC,VF12,VF13 NOP - maddaw.xyz ACC,VF11,VF05w move.xyz VF15,VF15 - madd.xyz VF16,VF09,VF05 lq.xyz VF10,-5(VI06) - mul.w VF05,VF07,VF07 NOP - add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF15,VF10 sq.xyz VF16,-5(VI06) ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF11,VF09,VF05 lq.xyz VF09,-2(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF09,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 23 [38 0] 38 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF09,VF00,VF16y lq.xyz VF13,-5(VI04) - NOP NOP - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - maxx.z VF16,VF16,VF00x NOP - mul.w VF06,VF06,VF06 move.xyz VF12,VF15 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF15,VF10,VF16z NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF12,VF13 NOP - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF16,VF09,VF05 lq.xyz VF10,238(VI06) - mul.w VF05,VF07,VF07 move.xyz VF15,VF15 - add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF15,VF10 sq.xyz VF16,238(VI06) ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF11,VF09,VF05 lq.xyz VF09,241(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF09,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 15 [38 0] 38 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF09,VF00,VF16y NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - maxx.z VF16,VF16,VF00x NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 - mulz.xyz VF10,VF10,VF16z lq.xyz VF16,-1(VI04) - mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP move.xyz VF10,VF10 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF10,VF16 NOP - maddaw.xyz ACC,VF11,VF05w NOP ; STALL_LATENCY ?2 - madd.xyz VF11,VF09,VF05 lq.xyz VF09,238(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_tri_pp4_vcl_15] - maxw.z VF08,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 36 [43 0] 50 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF09,71(VI00) - NOP lq.xyz VF10,3(VI03) - NOP lq.xyz VF12,72(VI00) - NOP lq.xyz VF13,73(VI00) - mulax.xyz ACC,VF09,VF10x lq.xyz VF09,74(VI00) ; STALL_LATENCY ?1 - madday.xyz ACC,VF12,VF10y xtop VI06 - maddaz.xyz ACC,VF13,VF10z iaddiu VI04,VI06,0x00000005 - maddw.xyz VF13,VF09,VF00w lq.xyz VF16,0(VI04) ; STALL_LATENCY ?1 - sub.xyz VF16,VF13,VF16 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF15,VF16,VF16 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF15,VF15y lq.xyz VF10,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF15,VF08,VF15x lq.xyz VF09,0(VI03) - NOP sqrt Q,VF15z ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF15,VF00,Q iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP div Q,VF00w,VF15y - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF15,VF00,VF00w lq.xyz VF12,5(VI03) - mulq.xyz VF16,VF16,Q iaddiu VI04,VI04,0x00000004 - NOP iaddiu VI06,VI06,0 - mul.xyz VF11,VF11,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF15,VF15,VF12 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF17,VF07,VF16 lq.xyz VF18,0(VI04) - sub.xyz VF18,VF13,VF18 esadd P,VF17 ; STALL_LATENCY ?3 - mul.xyz VF20,VF18,VF18 lq.xyz VF19,-3(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF20,VF20y NOP ; STALL_LATENCY ?3 - maddx.z VF20,VF08,VF20x NOP - NOP waitp - addw.x VF20,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF20z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF20,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF20y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 - mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0 - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - NOP move.xyz VF18,VF17 - NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF20x mfp.w VF06,P -; _LNOPT_w=[ ] 31 [38 0] 39 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) - NOP NOP - NOP NOP - mulw.xyz VF21,VF18,VF06w NOP - sub.xyz VF18,VF13,VF22 NOP - madday.w ACC,VF00,VF20y esadd P,VF17 - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF19,VF21,VF19 NOP - mul.xyz VF20,VF18,VF18 NOP - maxx.w VF05,VF06,VF00x NOP ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF19x NOP - adday.z ACC,VF20,VF20y NOP - maddx.z VF20,VF08,VF20x NOP - madday.w ACC,VF00,VF19y NOP - maddz.w VF07,VF00,VF19z waitp - mulw.xyz VF21,VF10,VF05w mfp.w VF06,P - mulax.w ACC,VF00,VF14x sqrt Q,VF20z - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - addw.x VF20,VF00,VF00w ersqrt P,VF06w - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - addq.y VF20,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF20y ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF14y lq.xyz VF19,-3(VI04) - maddz.w VF05,VF00,VF14z NOP - mul.w VF06,VF06,VF06 lq.xyz VF22,-5(VI04) ; STALL_LATENCY ?1 - mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 - mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0x000000f6 - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - mul.w VF07,VF06,VF06 move.xyz VF18,VF17 - mula.xyz ACC,VF21,VF22 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF20x mfp.w VF06,P -EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) - mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 - madday.w ACC,VF00,VF20y NOP - mulw.xyz VF21,VF18,VF06w NOP - sub.xyz VF18,VF13,VF22 esadd P,VF17 - maddaw.xyz ACC,VF11,VF07w NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF21,VF21,VF19 NOP - mul.xyz VF20,VF18,VF18 NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF21x NOP - adday.z ACC,VF20,VF20y NOP - maddx.z VF20,VF08,VF20x NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF07,VF00,VF21z mfp.w VF06,P - NOP lq.xyz VF22,-11(VI06) - mulq.xyz VF19,VF19,Q sqrt Q,VF20z - mulw.xyz VF21,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP - mulax.w ACC,VF00,VF14x ersqrt P,VF06w - add.xyz VF19,VF22,VF19 NOP - addw.x VF20,VF00,VF00w NOP - mul.w VF06,VF05,VF05 NOP - addq.y VF20,VF00,Q lq.xyz VF22,-5(VI04) - madday.w ACC,VF00,VF14y sq.xyz VF19,-11(VI06) - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mul.w VF06,VF06,VF06 lq.xyz VF19,-3(VI04) - mul.xyz VF15,VF20,VF12 div Q,VF00w,VF20y - mula.xyz ACC,VF21,VF22 NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF20,VF16,VF19 NOP - NOP NOP - NOP NOP - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - mul.w VF07,VF06,VF06 move.xyz VF18,VF17 - NOP ibne VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF20x mfp.w VF06,P -EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF12,VF07,VF16 NOP - mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - madday.w ACC,VF00,VF20y NOP - mul.w VF07,VF07,VF07 NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF20,VF18,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF10,VF05w NOP - madd.xyz VF19,VF09,VF05 ersqrt P,VF06w - maxx.w VF05,VF07,VF00x waitq ; STALL_LATENCY ?1 - mulq.xyz VF19,VF19,Q lq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF14x sq.xyz VF19,-8(VI06) ; STALL_LATENCY ?1 - mula.xyz ACC,VF20,VF18 lq.xyz VF19,-3(VI04) - mul.w VF06,VF06,VF06 NOP - mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 ; STALL_LATENCY ?2 - mul.w VF07,VF06,VF06 mfp.w VF06,P - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mulax.w ACC,VF00,VF20x NOP - mulw.xyz VF12,VF18,VF06w NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF12,VF12,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF07,VF00,VF12z NOP - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF19,Q NOP - mulax.w ACC,VF00,VF14x lq.xyz VF12,-5(VI06) - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z NOP - add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) - mul.w VF07,VF07,VF07 NOP - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF12 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 sq.xyz VF19,-5(VI06) - mulq.xyz VF19,VF19,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 - add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF19,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 52 [93 0] 93 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF12,VF07,VF16 NOP - mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 - mul.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF19x mfp.w VF06,P - madday.w ACC,VF00,VF19y NOP - maddz.w VF07,VF00,VF19z lq.xyz VF19,-3(VI04) - mulw.xyz VF20,VF10,VF05w NOP - mulax.w ACC,VF00,VF14x ersqrt P,VF06w - madday.w ACC,VF00,VF14y NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF20,VF18 NOP - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mulax.w ACC,VF00,VF20x NOP - mulw.xyz VF12,VF18,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF12,VF12,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF07,VF00,VF12z NOP - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF19,Q NOP - mulax.w ACC,VF00,VF14x lq.xyz VF12,238(VI06) - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z NOP - add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) - mul.w VF07,VF07,VF07 NOP - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF12 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 sq.xyz VF19,238(VI06) - mulq.xyz VF19,VF19,Q lq.xyz VF09,241(VI06) ; STALL_LATENCY ?3 - add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF19,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF12,VF07,VF16 lq.xyz VF13,-3(VI04) - mul.xyz VF16,VF16,VF13 esadd P,VF12 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x move.xyz VF15,VF15 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF16y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF12,VF12 - NOP mfp.w VF06,P - mulw.xyz VF12,VF12,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF16z NOP - mul.xyz VF16,VF12,VF13 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF16x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF16y NOP - maddz.w VF07,VF00,VF16z lq.xyz VF16,-1(VI04) - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF10,VF16 NOP - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF15y NOP - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?1 - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF09,VF09,VF05 NOP - mulq.xyz VF09,VF09,Q lq.xyz VF11,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF09,VF11,VF09 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pv_diff_tri_pp4_vcl_23] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralPVDiffTri_CodeEnd: -; iCount=586 -; register stats: -; 12 VU User integer -; 23 VU User floating point diff --git a/vu1/general_pv_diff_vcl.vsm b/vu1/general_pv_diff_vcl.vsm deleted file mode 100644 index cf088de1..00000000 --- a/vu1/general_pv_diff_vcl.vsm +++ /dev/null @@ -1,769 +0,0 @@ -; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === ldumb : optimal=22 clid=0 mlid=2 size=(22) -; === normal1 : optimal=22 clid=0 mlid=2 size=(22) -; === vuta : optimal=22 clid=0 mlid=2 size=(22) -; === dUp : optimal=22 clid=0 mlid=2 size=(22) -; === vuta1 : optimal=22 clid=0 mlid=2 size=(22) -; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=4 mlid=4 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=4 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=6 mlid=6 size=(18) -; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === hDown : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === normal : optimal=34 clid=0 mlid=4 size=(39) -; === another : optimal=34 clid=0 mlid=4 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=4 size=(39) -; === normal2 : optimal=34 clid=0 mlid=5 size=(39) -; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralPVDiff_CodeStart - .global vsmGeneralPVDiff_CodeEnd -vsmGeneralPVDiff_CodeStart: -__v_vu1_general_pv_diff_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pv_diff_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ normal2 ] 27 [27 0] 27 [main_loop_lid] - NOP loi 0x44fff000 - NOP lq.w VF06,57(VI00) - addi.xy VF05,VF00,I loi 0x45000000 - NOP xtop VI01 - NOP iaddiu VI03,VI01,0x00000005 - NOP mr32.z VF05,VF06 - NOP iaddiu VI04,VI01,0 - NOP ilw.x VI05,0(VI01) - NOP fcset 0 - NOP lq VF08,75(VI00) - NOP lq.xyz VF06,76(VI00) - NOP iadd VI06,VI03,VI05 - NOP iadd VI06,VI06,VI05 - NOP iadd VI06,VI06,VI05 - NOP mtir VI07,VF08x - NOP ior VI07,VI07,VI05 - NOP mfir.x VF08,VI07 - NOP iaddiu VI07,VI01,0x00000001 - NOP ilw.w VI02,76(VI00) - NOP iadd VI06,VI06,VI05 - NOP sq VF08,236(VI04) - NOP iaddiu VI01,VI01,0x00000005 - NOP iaddiu VI08,VI07,0x00000004 - NOP iaddiu VI09,VI00,0x000003ff - NOP iaddiu VI10,VI00,0x00000800 - NOP iaddiu VI11,VI00,0x00000400 - maxi.w VF07,VF00,I iaddiu VI12,VI00,0x00000020 -adcLoop_lid: -; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] - NOP lq VF08,0(VI07) - ftoi0 VF08,VF08 NOP ; STALL_LATENCY ?3 - NOP mtir VI13,VF08x ; STALL_LATENCY ?3 - NOP iand VI14,VI13,VI11 - NOP NOP - NOP ibeq VI14,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pv_diff_pp4_vcl_8] - NOP iand VI14,VI13,VI10 - NOP iand VI13,VI13,VI09 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP isw.w VI14,4(VI13) - NOP mtir VI14,VF08y - NOP iand VI13,VI14,VI11 - NOP NOP - NOP ibeq VI13,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pv_diff_pp4_vcl_9] - NOP iand VI13,VI14,VI10 - NOP iand VI14,VI14,VI09 - NOP iadd VI14,VI14,VI01 - NOP isw.w VI12,0(VI14) - NOP isw.w VI13,4(VI14) - NOP mtir VI13,VF08z - NOP iand VI14,VI13,VI11 - NOP NOP - NOP ibeq VI14,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pv_diff_pp4_vcl_10] - NOP iand VI14,VI13,VI10 - NOP iand VI13,VI13,VI09 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP isw.w VI14,4(VI13) - NOP mtir VI14,VF08w - NOP iand VI13,VI14,VI11 - NOP NOP - NOP ibeq VI13,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_general_pv_diff_pp4_vcl_11] - NOP iand VI13,VI14,VI09 - NOP iaddiu VI07,VI07,0x00000001 - NOP iand VI14,VI14,VI10 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP ibne VI07,VI08,adcLoop_lid - NOP isw.w VI14,4(VI13) -adcLoop_done_lid: -; _LNOPT_w=[ normal2 ] 13 [19 0] 21 [adcLoop_done_lid] - NOP lq.xyz VF12,0(VI03) - mulax ACC,VF01,VF12x iaddiu VI07,VI00,0x00000020 ; STALL_LATENCY ?3 - madday ACC,VF02,VF12y lq.xyz VF10,57(VI00) - maddaz ACC,VF03,VF12z lq.xyz VF11,59(VI00) - maddw VF12,VF04,VF00w lq.w VF05,0(VI00) - mul.xyz VF10,VF10,VF11 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF12w - add.xyz VF07,VF07,VF10 ilw.w VI08,0(VI00) ; STALL_LATENCY ?2 - NOP iaddiu VI03,VI03,0x00000004 - NOP lq.xyz VF11,-2(VI03) - mulq.xyz VF12,VF12,Q sq.xyz VF07,238(VI04) ; STALL_LATENCY ?1 - max.xyz VF09,VF00,VF00 ibeq VI03,VI06,EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI1 - max.xyz VF08,VF00,VF00 iaddiu VI04,VI04,0 -; _LNOPT_w=[ ] 21 [19 0] 22 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__PRO1] - NOP NOP - add.xyz VF10,VF12,VF05 lq.xyz VF15,0(VI03) - sub.xyz VF13,VF09,VF12 iaddiu VI04,VI04,0x000000f3 - max.xyz VF09,VF12,VF12 NOP - mul.xyz VF14,VF12,VF06 NOP - mulax ACC,VF01,VF15x NOP - madday ACC,VF02,VF15y NOP - maddaz ACC,VF03,VF15z ilw.w VI09,-4(VI03) - maddw VF12,VF04,VF00w isub VI01,VI08,VI07 - mulq.xyz VF11,VF11,Q sq.xyz VF07,-2(VI04) - opmula.xyz ACCxyz,VF13xyz,VF08xyz iaddiu VI03,VI03,0x00000004 - opmsub.xyz VF16xyz,VF08xyz,VF13xyz iand VI10,VI09,VI08 - NOP div Q,VF00w,VF12w - NOP sq.xyz VF11,-6(VI04) - abs.xyz VF00,VF16 fmand VI11,VI08 ; STALL_LATENCY ?1 - NOP lq.xyz VF11,-2(VI03) - clipw.xyz VF14xyz,VF07w isub VI11,VI11,VI07 - NOP ior VI07,VI01,VI10 - mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 - NOP ibeq VI03,VI06,EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI0 - mulw.xyz VF08,VF13,VF05w fcand VI01,262143 -EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 22 [22 22] 22 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP] - ftoi4.xyz VF14,VF10 iand VI01,VI01,VI02 - add.xyz VF10,VF12,VF05 lq.xyz VF16,0(VI03) - sub.xyz VF13,VF09,VF12 ior VI10,VI01,VI10 - max.xyz VF09,VF12,VF12 ior VI10,VI10,VI09 - mul.xyz VF15,VF12,VF06 ilw.w VI09,-4(VI03) - mulax ACC,VF01,VF16x iaddiu VI10,VI10,0x00007fff - madday ACC,VF02,VF16y mfir.w VF14,VI10 - maddaz ACC,VF03,VF16z isub VI01,VI08,VI07 - maddw VF12,VF04,VF00w iand VI10,VI09,VI08 - mulq.xyz VF11,VF11,Q sq.xyz VF07,1(VI04) - opmula.xyz ACCxyz,VF13xyz,VF08xyz sq VF14,-4(VI04) - opmsub.xyz VF00xyz,VF08xyz,VF13xyz iaddiu VI03,VI03,0x00000004 - mulw.xyz VF08,VF13,VF05w div Q,VF00w,VF12w - clipw.xyz VF15xyz,VF07w sq.xyz VF11,-3(VI04) - NOP iaddiu VI04,VI04,0x00000003 - NOP fmand VI11,VI08 - NOP lq.xyz VF11,-2(VI03) - NOP isub VI11,VI11,VI07 - NOP ior VI07,VI01,VI10 - mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 - NOP ibne VI03,VI06,EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP - NOP fcand VI01,262143 -EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 21 [20 0] 23 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI0] - NOP NOP - sub.xyz VF09,VF09,VF12 NOP - mul.xyz VF07,VF12,VF06 iand VI01,VI01,VI02 - opmula.xyz ACCxyz,VF09xyz,VF08xyz ior VI10,VI01,VI10 ; STALL_LATENCY ?2 - opmsub.xyz VF14xyz,VF08xyz,VF09xyz ior VI10,VI10,VI09 - NOP ilw.w VI09,-4(VI03) - NOP iaddiu VI10,VI10,0x00007fff - clipw.xyz VF07xyz,VF07w mfir.w VF13,VI10 - abs.xyz VF00,VF14 fmand VI10,VI08 - NOP isub VI07,VI10,VI07 - ftoi4.xyz VF13,VF10 iand VI10,VI07,VI08 - add.xyz VF10,VF12,VF05 fcand VI01,262143 - NOP iand VI01,VI01,VI02 - mulq.xyz VF11,VF11,Q ior VI10,VI01,VI10 - NOP ior VI10,VI10,VI09 - ftoi4.xyz VF10,VF10 iaddiu VI10,VI10,0x00007fff - NOP mfir.w VF10,VI10 - NOP sq.xyz VF11,-3(VI04) - NOP sq VF13,-4(VI04) - NOP b EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF10,-1(VI04) -EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI1: -; _LNOPT_w=[ ] 18 [20 0] 24 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI1] - NOP NOP - NOP NOP - sub.xyz VF09,VF09,VF12 NOP - mul.xyz VF06,VF12,VF06 NOP - opmula.xyz ACCxyz,VF09xyz,VF08xyz NOP ; STALL_LATENCY ?2 - opmsub.xyz VF10xyz,VF08xyz,VF09xyz NOP - clipw.xyz VF06xyz,VF07w ilw.w VI03,-4(VI03) - abs.xyz VF00,VF10 fmand VI01,VI08 ; STALL_LATENCY ?2 - NOP isub VI07,VI01,VI07 - NOP iand VI08,VI07,VI08 - add.xyz VF07,VF12,VF05 fcand VI01,262143 - NOP iand VI02,VI01,VI02 - NOP ior VI02,VI02,VI08 - mulq.xyz VF11,VF11,Q ior VI03,VI02,VI03 - ftoi4.xyz VF07,VF07 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF07,VI03 - NOP sq.xyz VF11,237(VI04) ; STALL_LATENCY ?1 - NOP sq VF07,239(VI04) ; STALL_LATENCY ?1 -EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 7 [7 0] 7 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EXIT_POINT] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,61(VI00) - NOP lq.xyz VF07,66(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_pp4_vcl_16] - maxw.z VF08,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 28 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF11,67(VI00) - NOP lq.xyz VF12,3(VI03) - NOP lq.xyz VF13,68(VI00) - NOP lq.xyz VF09,69(VI00) - mulax.xyz ACC,VF11,VF12x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF13,VF12y ilw.x VI05,0(VI06) - maddz.xyz VF12,VF09,VF12z iaddiu VI04,VI06,0x00000005 - NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 - add.xyz VF13,VF07,VF12 lq.xyz VF10,2(VI03) - NOP esadd P,VF13 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF13,VF13,VF05w lq.xyz VF16,1(VI04) ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - mul.xyz VF14,VF13,VF16 iadd VI07,VI07,VI05 ; STALL_LATENCY ?2 - mul.xyz VF15,VF12,VF16 iadd VI07,VI07,VI05 - mul.xyz VF11,VF10,VF06 lq.xyz VF10,1(VI03) - NOP lq.xyz VF09,0(VI03) - NOP mr32.xyw VF16,VF14 - adday.z ACC,VF15,VF15y iaddiu VI04,VI04,0x00000004 - maddx.z VF16,VF08,VF15x iaddiu VI06,VI06,0 - NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 11 [13 0] 14 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF09,VF00,VF16y NOP - maxx.z VF15,VF16,VF00x lq.xyz VF16,1(VI04) - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 - mul.xyz VF14,VF13,VF16 NOP - mul.xyz VF17,VF12,VF16 NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulz.xyz VF15,VF10,VF15z mr32.xyw VF16,VF14 - adday.z ACC,VF17,VF17y iaddiu VI04,VI04,0x00000004 - maddx.z VF16,VF08,VF17x NOP - mul.w VF06,VF06,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI1 - addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 15 [15 0] 17 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF09,VF00,VF16y NOP - maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF17,VF13,VF16 NOP - mul.w VF05,VF07,VF07 NOP - mul.xyz VF18,VF12,VF16 NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 mr32.xyw VF16,VF17 ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) - mul.w VF06,VF06,VF06 move.xyz VF17,VF15 - maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 - addax.w ACC,VF16,VF16x NOP - mulz.xyz VF15,VF10,VF14z ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI0 - mula.xyz ACC,VF17,VF19 iaddiu VI06,VI06,0x000000f6 -EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF09,VF00,VF16y iaddiu VI06,VI06,0x00000003 - maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) - mul.w VF07,VF06,VF06 NOP - maddaw.xyz ACC,VF11,VF05w NOP - maxx.w VF06,VF09,VF00x NOP - mul.xyz VF17,VF13,VF16 NOP - mul.w VF05,VF07,VF07 NOP - madd.xyz VF20,VF09,VF05 lq.xyz VF19,-11(VI06) - mul.w VF06,VF06,VF06 NOP - mul.xyz VF18,VF12,VF16 NOP - mul.w VF05,VF05,VF05 NOP - add.xyz VF20,VF19,VF20 mr32.xyw VF16,VF17 - mul.w VF06,VF06,VF06 move.xyz VF17,VF15 - adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) - maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 - addax.w ACC,VF16,VF16x sq.xyz VF20,-11(VI06) - mulz.xyz VF15,VF10,VF14z ibne VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - mula.xyz ACC,VF17,VF19 NOP -EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 26 [38 0] 38 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF09,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF13,VF09,VF05 lq.xyz VF12,-8(VI06) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF07,VF07 NOP - maxx.z VF16,VF16,VF00x NOP - add.xyz VF14,VF12,VF13 lq.xyz VF13,-5(VI04) - mul.w VF06,VF06,VF06 move.xyz VF12,VF15 - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF15,VF10,VF16z NOP - NOP sq.xyz VF14,-8(VI06) - mul.w VF07,VF06,VF06 NOP - mula.xyz ACC,VF12,VF13 NOP - maddaw.xyz ACC,VF11,VF05w move.xyz VF15,VF15 - madd.xyz VF16,VF09,VF05 lq.xyz VF10,-5(VI06) - mul.w VF05,VF07,VF07 NOP - add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF15,VF10 sq.xyz VF16,-5(VI06) ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF11,VF09,VF05 lq.xyz VF09,-2(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF09,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 23 [38 0] 38 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF09,VF00,VF16y lq.xyz VF13,-5(VI04) - NOP NOP - mul.w VF07,VF06,VF06 NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - maxx.z VF16,VF16,VF00x NOP - mul.w VF06,VF06,VF06 move.xyz VF12,VF15 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF15,VF10,VF16z NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF12,VF13 NOP - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF16,VF09,VF05 lq.xyz VF10,238(VI06) - mul.w VF05,VF07,VF07 move.xyz VF15,VF15 - add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF15,VF10 sq.xyz VF16,238(VI06) ; STALL_LATENCY ?2 - maddaw.xyz ACC,VF11,VF05w NOP - madd.xyz VF11,VF09,VF05 lq.xyz VF09,241(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF09,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 15 [38 0] 38 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF09,VF00,VF16y NOP - maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - maxx.z VF16,VF16,VF00x NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 - mulz.xyz VF10,VF10,VF16z lq.xyz VF16,-1(VI04) - mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP move.xyz VF10,VF10 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF10,VF16 NOP - maddaw.xyz ACC,VF11,VF05w NOP ; STALL_LATENCY ?2 - madd.xyz VF11,VF09,VF05 lq.xyz VF09,238(VI06) - add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_pp4_vcl_22] - maxw.z VF08,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 36 [43 0] 50 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF09,71(VI00) - NOP lq.xyz VF10,3(VI03) - NOP lq.xyz VF12,72(VI00) - NOP lq.xyz VF13,73(VI00) - mulax.xyz ACC,VF09,VF10x lq.xyz VF09,74(VI00) ; STALL_LATENCY ?1 - madday.xyz ACC,VF12,VF10y xtop VI06 - maddaz.xyz ACC,VF13,VF10z iaddiu VI04,VI06,0x00000005 - maddw.xyz VF13,VF09,VF00w lq.xyz VF16,0(VI04) ; STALL_LATENCY ?1 - sub.xyz VF16,VF13,VF16 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF15,VF16,VF16 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF15,VF15y lq.xyz VF10,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF15,VF08,VF15x lq.xyz VF09,0(VI03) - NOP sqrt Q,VF15z ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF15,VF00,Q iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP div Q,VF00w,VF15y - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF15,VF00,VF00w lq.xyz VF12,5(VI03) - mulq.xyz VF16,VF16,Q iaddiu VI04,VI04,0x00000004 - NOP iaddiu VI06,VI06,0 - mul.xyz VF11,VF11,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF15,VF15,VF12 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF17,VF07,VF16 lq.xyz VF18,0(VI04) - sub.xyz VF18,VF13,VF18 esadd P,VF17 ; STALL_LATENCY ?3 - mul.xyz VF20,VF18,VF18 lq.xyz VF19,-3(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF20,VF20y NOP ; STALL_LATENCY ?3 - maddx.z VF20,VF08,VF20x NOP - NOP waitp - addw.x VF20,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF20z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF20,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF20y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 - mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0 - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - NOP move.xyz VF18,VF17 - NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF20x mfp.w VF06,P -; _LNOPT_w=[ ] 31 [38 0] 39 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) - NOP NOP - NOP NOP - mulw.xyz VF21,VF18,VF06w NOP - sub.xyz VF18,VF13,VF22 NOP - madday.w ACC,VF00,VF20y esadd P,VF17 - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF19,VF21,VF19 NOP - mul.xyz VF20,VF18,VF18 NOP - maxx.w VF05,VF06,VF00x NOP ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF19x NOP - adday.z ACC,VF20,VF20y NOP - maddx.z VF20,VF08,VF20x NOP - madday.w ACC,VF00,VF19y NOP - maddz.w VF07,VF00,VF19z waitp - mulw.xyz VF21,VF10,VF05w mfp.w VF06,P - mulax.w ACC,VF00,VF14x sqrt Q,VF20z - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - addw.x VF20,VF00,VF00w ersqrt P,VF06w - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - addq.y VF20,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF20y ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF14y lq.xyz VF19,-3(VI04) - maddz.w VF05,VF00,VF14z NOP - mul.w VF06,VF06,VF06 lq.xyz VF22,-5(VI04) ; STALL_LATENCY ?1 - mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 - mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0x000000f6 - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - mul.w VF07,VF06,VF06 move.xyz VF18,VF17 - mula.xyz ACC,VF21,VF22 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF20x mfp.w VF06,P -EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) - mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 - madday.w ACC,VF00,VF20y NOP - mulw.xyz VF21,VF18,VF06w NOP - sub.xyz VF18,VF13,VF22 esadd P,VF17 - maddaw.xyz ACC,VF11,VF07w NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF21,VF21,VF19 NOP - mul.xyz VF20,VF18,VF18 NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF21x NOP - adday.z ACC,VF20,VF20y NOP - maddx.z VF20,VF08,VF20x NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF07,VF00,VF21z mfp.w VF06,P - NOP lq.xyz VF22,-11(VI06) - mulq.xyz VF19,VF19,Q sqrt Q,VF20z - mulw.xyz VF21,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP - mulax.w ACC,VF00,VF14x ersqrt P,VF06w - add.xyz VF19,VF22,VF19 NOP - addw.x VF20,VF00,VF00w NOP - mul.w VF06,VF05,VF05 NOP - addq.y VF20,VF00,Q lq.xyz VF22,-5(VI04) - madday.w ACC,VF00,VF14y sq.xyz VF19,-11(VI06) - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mul.w VF06,VF06,VF06 lq.xyz VF19,-3(VI04) - mul.xyz VF15,VF20,VF12 div Q,VF00w,VF20y - mula.xyz ACC,VF21,VF22 NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF20,VF16,VF19 NOP - NOP NOP - NOP NOP - mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 - mul.w VF07,VF06,VF06 move.xyz VF18,VF17 - NOP ibne VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF20x mfp.w VF06,P -EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF12,VF07,VF16 NOP - mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - madday.w ACC,VF00,VF20y NOP - mul.w VF07,VF07,VF07 NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF20,VF18,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF10,VF05w NOP - madd.xyz VF19,VF09,VF05 ersqrt P,VF06w - maxx.w VF05,VF07,VF00x waitq ; STALL_LATENCY ?1 - mulq.xyz VF19,VF19,Q lq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulax.w ACC,VF00,VF14x sq.xyz VF19,-8(VI06) ; STALL_LATENCY ?1 - mula.xyz ACC,VF20,VF18 lq.xyz VF19,-3(VI04) - mul.w VF06,VF06,VF06 NOP - mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 ; STALL_LATENCY ?2 - mul.w VF07,VF06,VF06 mfp.w VF06,P - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mulax.w ACC,VF00,VF20x NOP - mulw.xyz VF12,VF18,VF06w NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF12,VF12,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF07,VF00,VF12z NOP - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF19,Q NOP - mulax.w ACC,VF00,VF14x lq.xyz VF12,-5(VI06) - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z NOP - add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) - mul.w VF07,VF07,VF07 NOP - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF12 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 sq.xyz VF19,-5(VI06) - mulq.xyz VF19,VF19,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 - add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF19,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 52 [93 0] 93 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF12,VF07,VF16 NOP - mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 - mul.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF19x mfp.w VF06,P - madday.w ACC,VF00,VF19y NOP - maddz.w VF07,VF00,VF19z lq.xyz VF19,-3(VI04) - mulw.xyz VF20,VF10,VF05w NOP - mulax.w ACC,VF00,VF14x ersqrt P,VF06w - madday.w ACC,VF00,VF14y NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF20,VF18 NOP - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 - mulax.w ACC,VF00,VF20x NOP - mulw.xyz VF12,VF18,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF06,VF00,VF20z NOP - mul.xyz VF12,VF12,VF19 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF12x NOP - madday.w ACC,VF00,VF12y NOP - maddz.w VF07,VF00,VF12z NOP - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF19,Q NOP - mulax.w ACC,VF00,VF14x lq.xyz VF12,238(VI06) - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - madday.w ACC,VF00,VF14y NOP - maddz.w VF05,VF00,VF14z NOP - add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) - mul.w VF07,VF07,VF07 NOP - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - mula.xyz ACC,VF10,VF12 NOP - maddaw.xyz ACC,VF11,VF07w NOP - madd.xyz VF19,VF09,VF05 sq.xyz VF19,238(VI06) - mulq.xyz VF19,VF19,Q lq.xyz VF09,241(VI06) ; STALL_LATENCY ?3 - add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF19,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF12,VF07,VF16 lq.xyz VF13,-3(VI04) - mul.xyz VF16,VF16,VF13 esadd P,VF12 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x move.xyz VF15,VF15 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF16y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF12,VF12 - NOP mfp.w VF06,P - mulw.xyz VF12,VF12,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF16z NOP - mul.xyz VF16,VF12,VF13 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF16x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF16y NOP - maddz.w VF07,VF00,VF16z lq.xyz VF16,-1(VI04) - mulw.xyz VF10,VF10,VF05w NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF10,VF16 NOP - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF15y NOP - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?1 - NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF09,VF09,VF05 NOP - mulq.xyz VF09,VF09,Q lq.xyz VF11,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF09,VF11,VF09 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pv_diff_pp4_vcl_30] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralPVDiff_CodeEnd: -; iCount=654 -; register stats: -; 15 VU User integer -; 23 VU User floating point diff --git a/vu1/general_quad_vcl.vsm b/vu1/general_quad_vcl.vsm deleted file mode 100644 index 0963cd46..00000000 --- a/vu1/general_quad_vcl.vsm +++ /dev/null @@ -1,794 +0,0 @@ -; === __LP__ EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === normal1 : optimal=46 clid=1 mlid=3 size=(47) -; === __LP__ EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=10 mlid=10 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=3 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) -; === __LP__ EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) -; === __LP__ EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralQuad_CodeStart - .global vsmGeneralQuad_CodeEnd -vsmGeneralQuad_CodeStart: -__v_vu1_general_quad_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_quad_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ vuta1 ] 45 [45 0] 45 [main_loop_lid] - NOP xtop VI05 - NOP lq.xyz VF09,57(VI00) - NOP lq.xyz VF08,59(VI00) - NOP iaddiu VI03,VI05,0x00000005 - NOP lq.xyz VF20,0(VI03) - NOP lq.w VF05,57(VI00) - NOP loi 0x44fff000 - mul.xyz VF09,VF09,VF08 iaddiu VI04,VI05,0x000000ed - mulax ACC,VF01,VF20x ilw.x VI05,0(VI05) - madday ACC,VF02,VF20y lq.xyz VF08,6(VI03) - maddaz ACC,VF03,VF20z mr32.z VF05,VF05 - maddw VF20,VF04,VF00w lq VF06,75(VI00) - addi.xy VF05,VF00,I lq.xyz VF14,9(VI03) - mulax ACC,VF01,VF08x loi 0x45000000 - madday ACC,VF02,VF08y ilw.w VI07,0(VI00) - maddaz ACC,VF03,VF08z div Q,VF00w,VF20w - maddw VF16,VF04,VF00w iadd VI06,VI03,VI05 - mulax ACC,VF01,VF14x lq.xyz VF13,3(VI03) - madday ACC,VF02,VF14y mtir VI08,VF06x - maddaz ACC,VF03,VF14z ior VI08,VI08,VI05 - maddw VF14,VF04,VF00w mfir.x VF06,VI08 - mulax ACC,VF01,VF13x iadd VI06,VI06,VI05 - madday ACC,VF02,VF13y iadd VI06,VI06,VI05 - maddaz ACC,VF03,VF13z lq.xyz VF07,58(VI00) - maddw VF13,VF04,VF00w sq VF06,-1(VI04) - NOP iaddiu VI08,VI00,0x00007fff - NOP iaddiu VI08,VI08,0x00000001 - NOP ilw.w VI02,76(VI00) - NOP div Q,VF00w,VF13w - add.xyz VF09,VF07,VF09 lq.xyz VF07,7(VI03) - NOP lq.xyz VF15,2(VI03) - mulq.xyz VF08,VF20,Q lq.xyz VF06,76(VI00) - NOP fcset 0 - maxi.w VF07,VF00,I lq.xyz VF17,10(VI03) - mulq.xyz VF15,VF15,Q sq.xyz VF07,10(VI03) - mul.xyz VF10,VF08,VF06 div Q,VF00w,VF16w - NOP lq.xyz VF07,5(VI03) - mulq.xyz VF13,VF13,Q iaddiu VI01,VI03,0 - add.xyz VF11,VF08,VF05 sq.xyz VF17,7(VI03) - clipw.xyz VF10xyz,VF07w lq.xyz VF17,11(VI03) - mulq.xyz VF10,VF07,Q lq.xyz VF20,8(VI03) - sub.xyz VF12,VF08,VF13 iaddiu VI03,VI01,0x0000000c - mul.xyz VF21,VF13,VF06 div Q,VF00w,VF14w - add.xyz VF08,VF13,VF05 ibeq VI03,VI06,EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI1 - mulq.xyz VF16,VF16,Q lq.w VF08,0(VI00) -; _LNOPT_w=[ ] 47 [45 0] 47 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__PRO1] - mulq.xyz VF19,VF20,Q NOP - NOP NOP - NOP NOP - clipw.xyz VF21xyz,VF07w NOP - mulq.xyz VF17,VF17,Q lq.xyz VF20,0(VI03) - mulq.xyz VF18,VF14,Q NOP - sub.xyz VF13,VF16,VF13 NOP - add.xyz VF07,VF16,VF05 NOP - mulax ACC,VF01,VF20x NOP - madday ACC,VF02,VF20y lq.xyz VF22,6(VI03) - maddaz ACC,VF03,VF20z NOP - maddw VF20,VF04,VF00w iaddiu VI01,VI03,0 - mul.xyz VF21,VF16,VF06 iaddiu VI09,VI04,0 - mulax ACC,VF01,VF22x lq.xyz VF14,9(VI03) - madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) - maddaz ACC,VF03,VF22z div Q,VF00w,VF20w - maddw VF16,VF04,VF00w lq.xyz VF23,7(VI03) - mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) - madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) - mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) - maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) - maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) - mulq.xyz VF22,VF20,Q lq.xyz VF17,11(VI03) - mulax ACC,VF01,VF24x lq.xyz VF23,5(VI03) - madday ACC,VF02,VF24y lq.xyz VF25,2(VI03) - maddaz ACC,VF03,VF24z sq.xyz VF19,9(VI04) - maddw VF24,VF04,VF00w sq.xyz VF10,3(VI04) - ftoi4.xyz VF11,VF11 sq.xyz VF09,4(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF09,7(VI04) - mul.xyz VF19,VF22,VF06 sq.xyz VF09,1(VI04) - clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w - mulw.xyz VF10,VF12,VF08w mfir.w VF11,VI08 - add.xyz VF12,VF18,VF05 lq.xyz VF20,8(VI03) - clipw.xyz VF19xyz,VF07w iaddiu VI03,VI01,0x0000000c - mulq.xyz VF15,VF25,Q fcand VI01,16777215 - opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,10(VI04) - opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq VF11,2(VI04) - ftoi4.xyz VF18,VF08 div Q,VF00w,VF16w - mulq.xyz VF13,VF24,Q mfir.w VF18,VI08 - ftoi4.xyz VF19,VF12 iand VI01,VI01,VI02 - mulq.xyz VF10,VF23,Q fmand VI10,VI07 - add.xyz VF11,VF22,VF05 ior VI01,VI01,VI10 - sub.xyz VF12,VF22,VF13 iaddiu VI01,VI01,0x00007fff - mul.xyz VF21,VF13,VF06 mfir.w VF19,VI01 - add.xyz VF08,VF13,VF05 div Q,VF00w,VF14w - ftoi4.xyz VF22,VF07 ibeq VI03,VI06,EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI0 - mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 -EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ normal1 ] 47 [45 45] 47 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP] - NOP sq VF19,8(VI04) - mulq.xyz VF19,VF20,Q sq VF18,5(VI04) - ftoi4.xyz VF11,VF11 NOP - sub.xyz VF13,VF16,VF13 sq VF22,11(VI04) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI09,0x0000000c - add.xyz VF07,VF16,VF05 lq.xyz VF20,0(VI03) - mulq.xyz VF18,VF14,Q NOP - clipw.xyz VF21xyz,VF07w lq.xyz VF22,6(VI03) - mul.xyz VF21,VF16,VF06 iaddiu VI01,VI03,0 - mulax ACC,VF01,VF20x iaddiu VI09,VI04,0 - madday ACC,VF02,VF20y NOP - maddaz ACC,VF03,VF20z lq.xyz VF14,9(VI03) - maddw VF20,VF04,VF00w NOP - mulax ACC,VF01,VF22x lq.xyz VF23,7(VI03) - madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) - maddaz ACC,VF03,VF22z NOP - maddw VF16,VF04,VF00w div Q,VF00w,VF20w - mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) - madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) - mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) - maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) - maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) - mulax ACC,VF01,VF24x lq.xyz VF17,11(VI03) - mulq.xyz VF22,VF20,Q lq.xyz VF23,5(VI03) - madday ACC,VF02,VF24y lq.xyz VF20,8(VI03) - maddaz ACC,VF03,VF24z lq.xyz VF25,2(VI03) - maddw VF24,VF04,VF00w iaddiu VI03,VI01,0x0000000c - mul.xyz VF19,VF22,VF06 sq.xyz VF19,9(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF10,3(VI04) - mulw.xyz VF10,VF12,VF08w sq.xyz VF09,4(VI04) - clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w - clipw.xyz VF19xyz,VF07w sq.xyz VF09,7(VI04) - add.xyz VF12,VF18,VF05 sq.xyz VF09,1(VI04) - mulq.xyz VF15,VF25,Q mfir.w VF11,VI08 - opmula.xyz ACCxyz,VF10xyz,VF13xyz fcand VI01,16777215 - ftoi4.xyz VF18,VF08 iand VI01,VI01,VI02 - opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,10(VI04) - mulq.xyz VF13,VF24,Q div Q,VF00w,VF16w - mulq.xyz VF10,VF23,Q sq VF11,2(VI04) - add.xyz VF11,VF22,VF05 mfir.w VF18,VI08 - ftoi4.xyz VF19,VF12 fmand VI10,VI07 - sub.xyz VF12,VF22,VF13 ior VI01,VI01,VI10 - mul.xyz VF21,VF13,VF06 iaddiu VI01,VI01,0x00007fff - add.xyz VF08,VF13,VF05 mfir.w VF19,VI01 - NOP div Q,VF00w,VF14w - ftoi4.xyz VF22,VF07 ibne VI03,VI06,EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP - mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 -EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 32 [26 0] 32 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI0] - mulq.xyz VF19,VF20,Q sq VF19,8(VI04) - NOP NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF17,Q mfir.w VF11,VI08 - NOP NOP - mulq.xyz VF18,VF14,Q sq VF18,5(VI04) - NOP NOP - NOP NOP - clipw.xyz VF21xyz,VF07w sq VF22,11(VI04) - sub.xyz VF13,VF16,VF13 iaddiu VI04,VI09,0 - mul.xyz VF15,VF18,VF06 sq.xyz VF15,12(VI04) - mul.xyz VF21,VF16,VF06 sq.xyz VF19,21(VI04) - mulw.xyz VF10,VF12,VF08w sq.xyz VF10,15(VI04) - NOP sq.xyz VF17,18(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF09,16(VI04) - clipw.xyz VF21xyz,VF07w sq.xyz VF09,19(VI04) - opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,13(VI04) - opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,22(VI04) - NOP mfir.w VF18,VI08 - add.xyz VF12,VF18,VF05 fcand VI01,16777215 - ftoi4.xyz VF11,VF11 iand VI02,VI01,VI02 - add.xyz VF07,VF16,VF05 fmand VI07,VI07 - ftoi4.xyz VF18,VF08 ior VI02,VI02,VI07 - ftoi4.xyz VF19,VF12 iaddiu VI02,VI02,0x00007fff - NOP mfir.w VF19,VI02 - ftoi4.xyz VF22,VF07 sq VF11,14(VI04) - NOP mfir.w VF22,VI02 - NOP sq VF18,17(VI04) - NOP sq VF19,20(VI04) - NOP b EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF22,23(VI04) -EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI1: -; _LNOPT_w=[ ] 29 [22 0] 29 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI1] - mulq.xyz VF20,VF20,Q NOP - NOP NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF17,Q NOP - mulq.xyz VF14,VF14,Q NOP - ftoi4.xyz VF11,VF11 NOP - sub.xyz VF13,VF16,VF13 NOP - mulw.xyz VF10,VF12,VF08w sq.xyz VF10,3(VI04) - clipw.xyz VF21xyz,VF07w sq.xyz VF15,0(VI04) - mul.xyz VF15,VF14,VF06 sq.xyz VF09,4(VI04) - mul.xyz VF21,VF16,VF06 sq.xyz VF09,7(VI04) - opmula.xyz ACCxyz,VF10xyz,VF13xyz mfir.w VF11,VI08 - opmsub.xyz VF18xyz,VF13xyz,VF10xyz sq.xyz VF20,9(VI04) - clipw.xyz VF15xyz,VF07w sq.xyz VF09,1(VI04) - clipw.xyz VF21xyz,VF07w sq.xyz VF09,10(VI04) - NOP sq VF11,2(VI04) - abs.xyz VF00,VF18 mfir.w VF08,VI08 - add.xyz VF12,VF14,VF05 fmand VI07,VI07 - add.xyz VF07,VF16,VF05 fcand VI01,16777215 - NOP iand VI02,VI01,VI02 - ftoi4.xyz VF08,VF08 ior VI02,VI02,VI07 - ftoi4.xyz VF11,VF12 iaddiu VI02,VI02,0x00007fff - ftoi4.xyz VF07,VF07 mfir.w VF11,VI02 - NOP mfir.w VF07,VI02 - NOP sq.xyz VF17,6(VI04) - NOP sq VF08,5(VI04) - NOP sq VF11,8(VI04) - NOP sq VF07,11(VI04) -EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 8 [8 0] 8 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EXIT_POINT] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,60(VI00) - NOP lq.xyz VF07,61(VI00) - NOP lq.xyz VF08,66(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_quad_pp4_vcl_9] - maxw.z VF09,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 27 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,67(VI00) - NOP lq.xyz VF13,3(VI03) - NOP lq.xyz VF14,68(VI00) - NOP lq.xyz VF10,69(VI00) - mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) - maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 - NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 - add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) - NOP esadd P,VF14 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 - mul.xyz VF15,VF14,VF18 iadd VI07,VI07,VI05 ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) - NOP lq.xyz VF10,0(VI03) - NOP mr32.xyw VF09,VF15 - NOP iaddiu VI04,VI04,0x00000003 - mul.xyz VF18,VF13,VF18 iaddiu VI06,VI06,0 - NOP ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF18,VF14,VF17 NOP - mul.w VF05,VF05,VF05 NOP - NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF15z NOP - addax.w ACC,VF09,VF09x ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1 - mul.w VF05,VF05,VF05 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF07,VF00,VF09y NOP - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF14,VF17 NOP - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000f6 -EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF07,VF00,VF09y iaddiu VI06,VI06,0x00000003 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x lq.xyz VF18,-11(VI06) - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF19,VF14,VF17 NOP - add.xyz VF18,VF18,VF15 NOP - maddaw.xyz ACC,VF12,VF06w NOP - mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 - mul.xyz VF18,VF13,VF17 sq.xyz VF18,-11(VI06) - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibne VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - addax.w ACC,VF09,VF09x NOP -EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - maxx.z VF17,VF18,VF00x lq.xyz VF18,-8(VI06) - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,-5(VI06) - mul.w VF06,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF17z NOP - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-5(VI06) ; STALL_LATENCY ?2 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,-2(VI06) - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF18,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - adday.z ACC,VF18,VF18y NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maxx.z VF18,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF18z NOP - maddaw.xyz ACC,VF12,VF06w NOP - madd.xyz VF13,VF10,VF05 lq.xyz VF18,238(VI06) - mul.w VF06,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,241(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF18,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF07,VF00,VF09y NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,238(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_quad_pp4_vcl_15] - maxw.z VF09,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF10,71(VI00) - NOP lq.xyz VF11,3(VI03) - NOP lq.xyz VF14,72(VI00) - mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 - madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) - NOP xtop VI06 - maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 - maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) - sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP sqrt Q,VF16z ; STALL_LATENCY ?1 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) - NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 - NOP iaddiu VI06,VI06,0 - mul.xyz VF12,VF12,VF07 ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) - sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 - mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 - maddx.z VF21,VF09,VF21x NOP - NOP waitp - addw.x VF21,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF21z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0 - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - NOP move.xyz VF19,VF18 - NOP ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - NOP NOP - NOP NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 NOP - madday.w ACC,VF00,VF21y esadd P,VF18 - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z waitp - maxx.w VF05,VF06,VF00x mfp.w VF06,P - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - addq.y VF21,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 - addw.x VF21,VF00,VF00w NOP - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - maddz.w VF05,VF00,VF15z NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0x000000f6 - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 - madday.w ACC,VF00,VF21y NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 esadd P,VF18 - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF20x NOP - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z mfp.w VF06,P - mulq.xyz VF23,VF22,Q lq.xyz VF22,-11(VI06) - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - add.xyz VF22,VF22,VF23 NOP - addw.x VF21,VF00,VF00w NOP - mul.w VF06,VF07,VF07 NOP - addq.y VF21,VF00,Q NOP - mula.xyz ACC,VF20,VF06 sq.xyz VF22,-11(VI06) - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) - mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y - NOP NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF21,VF17,VF20 NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibne VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF19,VF20 NOP - maddaw.xyz ACC,VF12,VF07w NOP - madd.xyz VF21,VF10,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF21,Q lq.xyz VF21,-8(VI06) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 - mul.xyz VF21,VF17,VF20 sq.xyz VF21,-8(VI06) ; STALL_LATENCY ?1 - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w - mula.xyz ACC,VF20,VF06 NOP - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF17,VF13,Q lq.xyz VF13,-5(VI06) - mulq.xyz VF12,VF10,Q lq.xyz VF10,-2(VI06) ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 NOP - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,-5(VI06) - NOP b EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF10,-2(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - NOP waitp ; STALL_LATENCY ?1 - mul.w VF07,VF06,VF06 mfp.w VF06,P - mul.xyz VF21,VF17,VF20 NOP - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w - mula.xyz ACC,VF20,VF06 NOP - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF17,VF13,Q lq.xyz VF13,238(VI06) - mulq.xyz VF12,VF10,Q lq.xyz VF10,241(VI06) ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 NOP - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,238(VI06) - NOP b EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF10,241(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) - mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF17y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF13,VF13 - NOP mfp.w VF06,P - mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF17z NOP - mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF17y NOP - maddz.w VF07,VF00,VF17z NOP - mulw.xyz VF11,VF11,VF05w NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF11,VF06 NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF16z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF12,VF10,Q lq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_quad_pp4_vcl_23] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralQuad_CodeEnd: -; iCount=695 -; register stats: -; 11 VU User integer -; 26 VU User floating point diff --git a/vu1/general_tri_vcl.vsm b/vu1/general_tri_vcl.vsm deleted file mode 100644 index 58404d13..00000000 --- a/vu1/general_tri_vcl.vsm +++ /dev/null @@ -1,689 +0,0 @@ -; === __LP__ EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === hDown : optimal=35 clid=0 mlid=2 size=(36) -; === dUp : optimal=35 clid=0 mlid=1 size=(36) -; === normal : optimal=35 clid=0 mlid=1 size=(36) -; === another : optimal=35 clid=0 mlid=2 size=(36) -; === normal2 : optimal=35 clid=0 mlid=2 size=(36) -; === __LP__ EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=10 mlid=10 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=3 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) -; === __LP__ EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) -; === __LP__ EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneralTri_CodeStart - .global vsmGeneralTri_CodeEnd -vsmGeneralTri_CodeStart: -__v_vu1_general_tri_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_tri_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ vuta1 ] 48 [48 0] 48 [main_loop_lid] - NOP lq.w VF05,57(VI00) - NOP loi 0x44fff000 - NOP xtop VI05 - NOP iaddiu VI03,VI05,0x00000005 - NOP mr32.z VF05,VF05 - NOP iaddiu VI04,VI05,0x000000ed - NOP ilw.x VI05,0(VI05) - NOP lq.xyz VF09,57(VI00) - NOP lq.xyz VF08,59(VI00) - NOP lq VF06,75(VI00) - addi.xy VF05,VF00,I loi 0x45000000 - NOP ilw.w VI07,0(VI00) - mul.xyz VF09,VF09,VF08 lq.xyz VF08,3(VI03) - NOP iadd VI06,VI03,VI05 - NOP iadd VI06,VI06,VI05 - NOP mtir VI08,VF06x - mulax ACC,VF01,VF08x ior VI08,VI08,VI05 - madday ACC,VF02,VF08y mfir.x VF06,VI08 - maddaz ACC,VF03,VF08z iadd VI06,VI06,VI05 - maddw VF15,VF04,VF00w lq.xyz VF14,0(VI03) - NOP lq.xyz VF07,58(VI00) - NOP sq VF06,-1(VI04) - NOP iaddiu VI08,VI00,0x00007fff - mulax ACC,VF01,VF14x div Q,VF00w,VF15w - madday ACC,VF02,VF14y lq.xyz VF08,6(VI03) - maddaz ACC,VF03,VF14z iaddiu VI08,VI08,0x00000001 - maddw VF14,VF04,VF00w ilw.w VI02,76(VI00) - add.xyz VF09,VF07,VF09 lq.xyz VF06,76(VI00) - mulax ACC,VF01,VF08x lq.xyz VF07,5(VI03) - madday ACC,VF02,VF08y lq.w VF08,0(VI00) - maddaz ACC,VF03,VF08z div Q,VF00w,VF14w - mulq.xyz VF08,VF15,Q fcset 0 - mulq.xyz VF07,VF07,Q iaddiu VI09,VI03,0 - maddw VF12,VF04,VF00w iaddiu VI10,VI04,0 - NOP sq.xyz VF09,1(VI04) - add.xyz VF10,VF08,VF05 lq.xyz VF15,2(VI03) - maxi.w VF07,VF00,I sq.xyz VF07,3(VI04) - mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w - mul.xyz VF13,VF08,VF06 mfir.w VF10,VI08 - ftoi4.xyz VF10,VF10 lq.xyz VF11,8(VI03) - mulq.xyz VF15,VF15,Q sq.xyz VF09,4(VI04) - sub.xyz VF14,VF07,VF08 sq.xyz VF09,7(VI04) - mul.xyz VF16,VF07,VF06 iaddiu VI03,VI09,0 - add.xyz VF10,VF07,VF05 sq VF10,5(VI04) - mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 - mulq.xyz VF15,VF11,Q sq.xyz VF15,0(VI04) - clipw.xyz VF16xyz,VF07w ibeq VI03,VI06,EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 -EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ dUp ] 36 [35 35] 36 [EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP] - ftoi4.xyz VF12,VF10 lq.xyz VF11,3(VI03) - sub.xyz VF10,VF07,VF08 NOP - mul.xyz VF08,VF07,VF06 iaddiu VI04,VI10,0x00000009 - mulw.xyz VF13,VF14,VF08w lq.xyz VF14,0(VI03) - mulax ACC,VF01,VF11x sq.xyz VF15,6(VI09) - madday ACC,VF02,VF11y mfir.w VF12,VI08 - maddaz ACC,VF03,VF11z NOP - maddw VF15,VF04,VF00w NOP - mulax ACC,VF01,VF14x lq.xyz VF11,6(VI03) - madday ACC,VF02,VF14y sq VF12,2(VI09) - maddaz ACC,VF03,VF14z iaddiu VI11,VI03,0 - maddw VF14,VF04,VF00w div Q,VF00w,VF15w - mulax ACC,VF01,VF11x lq.xyz VF16,5(VI03) - madday ACC,VF02,VF11y iaddiu VI10,VI04,0 - maddaz ACC,VF03,VF11z sq.xyz VF09,1(VI04) - maddw VF12,VF04,VF00w lq.xyz VF11,8(VI03) - clipw.xyz VF08xyz,VF07w sq.xyz VF09,4(VI04) - opmula.xyz ACCxyz,VF13xyz,VF10xyz mfir.w VF10,VI08 - mulq.xyz VF08,VF15,Q div Q,VF00w,VF14w - mulq.xyz VF16,VF16,Q lq.xyz VF15,2(VI03) - opmsub.xyz VF00xyz,VF10xyz,VF13xyz fcand VI01,262143 - NOP iand VI03,VI01,VI02 - mul.xyz VF13,VF08,VF06 sq.xyz VF09,7(VI04) - add.xyz VF10,VF08,VF05 sq.xyz VF16,3(VI04) - add.xyz VF17,VF07,VF05 fmand VI01,VI07 - mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w - mulq.xyz VF16,VF15,Q ior VI01,VI03,VI01 - ftoi4.xyz VF10,VF10 iaddiu VI03,VI11,0 - ftoi4.xyz VF15,VF17 iaddiu VI11,VI01,0x00007fff - sub.xyz VF14,VF07,VF08 mfir.w VF15,VI11 - mul.xyz VF16,VF07,VF06 sq.xyz VF16,0(VI04) - add.xyz VF10,VF07,VF05 sq VF10,5(VI04) - mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 - mulq.xyz VF15,VF11,Q sq VF15,8(VI09) - clipw.xyz VF16xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP - clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 -EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 20 [18 0] 21 [EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__EPI0] - ftoi4.xyz VF09,VF10 NOP - sub.xyz VF10,VF07,VF08 sq.xyz VF15,6(VI09) - mul.xyz VF08,VF07,VF06 NOP - mulw.xyz VF14,VF14,VF08w mfir.w VF09,VI08 - add.xyz VF07,VF07,VF05 lq.xyz VF06,60(VI00) - clipw.xyz VF08xyz,VF07w lq.xyz VF05,59(VI00) ; STALL_LATENCY ?1 - opmula.xyz ACCxyz,VF14xyz,VF10xyz sq VF09,2(VI09) - opmsub.xyz VF11xyz,VF10xyz,VF14xyz lq.xyz VF08,66(VI00) - ftoi4.xyz VF15,VF07 lq.xyz VF07,61(VI00) - NOP fcand VI01,262143 - NOP iand VI02,VI01,VI02 - abs.xyz VF00,VF11 fmand VI07,VI07 - NOP ior VI02,VI02,VI07 - NOP iaddiu VI02,VI02,0x00007fff - NOP ilw.x VI01,0(VI00) - NOP mfir.w VF15,VI02 - NOP iaddiu VI02,VI00,0x00000001 - NOP ilw.x VI03,0(VI02) - NOP ibeq VI01,VI00,pt_lights_lid - NOP sq VF15,8(VI09) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_tri_pp4_vcl_9] - maxw.z VF09,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 27 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,67(VI00) - NOP lq.xyz VF13,3(VI03) - NOP lq.xyz VF14,68(VI00) - NOP lq.xyz VF10,69(VI00) - mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) - maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 - NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 - add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) - NOP esadd P,VF14 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 - mul.xyz VF15,VF14,VF18 iadd VI07,VI07,VI05 ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) - NOP lq.xyz VF10,0(VI03) - NOP mr32.xyw VF09,VF15 - NOP iaddiu VI04,VI04,0x00000003 - mul.xyz VF18,VF13,VF18 iaddiu VI06,VI06,0 - NOP ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF18,VF14,VF17 NOP - mul.w VF05,VF05,VF05 NOP - NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF15z NOP - addax.w ACC,VF09,VF09x ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1 - mul.w VF05,VF05,VF05 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF07,VF00,VF09y NOP - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF14,VF17 NOP - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000f6 -EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF07,VF00,VF09y iaddiu VI06,VI06,0x00000003 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x lq.xyz VF18,-11(VI06) - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF19,VF14,VF17 NOP - add.xyz VF18,VF18,VF15 NOP - maddaw.xyz ACC,VF12,VF06w NOP - mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 - mul.xyz VF18,VF13,VF17 sq.xyz VF18,-11(VI06) - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibne VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - addax.w ACC,VF09,VF09x NOP -EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - maxx.z VF17,VF18,VF00x lq.xyz VF18,-8(VI06) - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,-5(VI06) - mul.w VF06,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF17z NOP - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-5(VI06) ; STALL_LATENCY ?2 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,-2(VI06) - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF18,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - adday.z ACC,VF18,VF18y NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maxx.z VF18,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF18z NOP - maddaw.xyz ACC,VF12,VF06w NOP - madd.xyz VF13,VF10,VF05 lq.xyz VF18,238(VI06) - mul.w VF06,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,241(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF18,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF07,VF00,VF09y NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,238(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_tri_pp4_vcl_15] - maxw.z VF09,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF10,71(VI00) - NOP lq.xyz VF11,3(VI03) - NOP lq.xyz VF14,72(VI00) - mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 - madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) - NOP xtop VI06 - maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 - maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) - sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP sqrt Q,VF16z ; STALL_LATENCY ?1 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) - NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 - NOP iaddiu VI06,VI06,0 - mul.xyz VF12,VF12,VF07 ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) - sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 - mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 - maddx.z VF21,VF09,VF21x NOP - NOP waitp - addw.x VF21,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF21z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0 - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - NOP move.xyz VF19,VF18 - NOP ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - NOP NOP - NOP NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 NOP - madday.w ACC,VF00,VF21y esadd P,VF18 - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z waitp - maxx.w VF05,VF06,VF00x mfp.w VF06,P - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - addq.y VF21,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 - addw.x VF21,VF00,VF00w NOP - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - maddz.w VF05,VF00,VF15z NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0x000000f6 - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 - madday.w ACC,VF00,VF21y NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 esadd P,VF18 - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF20x NOP - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z mfp.w VF06,P - mulq.xyz VF23,VF22,Q lq.xyz VF22,-11(VI06) - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - add.xyz VF22,VF22,VF23 NOP - addw.x VF21,VF00,VF00w NOP - mul.w VF06,VF07,VF07 NOP - addq.y VF21,VF00,Q NOP - mula.xyz ACC,VF20,VF06 sq.xyz VF22,-11(VI06) - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) - mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y - NOP NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF21,VF17,VF20 NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibne VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF19,VF20 NOP - maddaw.xyz ACC,VF12,VF07w NOP - madd.xyz VF21,VF10,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF21,Q lq.xyz VF21,-8(VI06) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 - mul.xyz VF21,VF17,VF20 sq.xyz VF21,-8(VI06) ; STALL_LATENCY ?1 - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w - mula.xyz ACC,VF20,VF06 NOP - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF17,VF13,Q lq.xyz VF13,-5(VI06) - mulq.xyz VF12,VF10,Q lq.xyz VF10,-2(VI06) ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 NOP - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,-5(VI06) - NOP b EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF10,-2(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - NOP waitp ; STALL_LATENCY ?1 - mul.w VF07,VF06,VF06 mfp.w VF06,P - mul.xyz VF21,VF17,VF20 NOP - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w - mula.xyz ACC,VF20,VF06 NOP - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF17,VF13,Q lq.xyz VF13,238(VI06) - mulq.xyz VF12,VF10,Q lq.xyz VF10,241(VI06) ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 NOP - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,238(VI06) - NOP b EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF10,241(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) - mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF17y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF13,VF13 - NOP mfp.w VF06,P - mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF17z NOP - mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF17y NOP - maddz.w VF07,VF00,VF17z NOP - mulw.xyz VF11,VF11,VF05w NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF11,VF06 NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF16z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF12,VF10,Q lq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_tri_pp4_vcl_23] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneralTri_CodeEnd: -; iCount=591 -; register stats: -; 12 VU User integer -; 24 VU User floating point diff --git a/vu1/general_vcl.vsm b/vu1/general_vcl.vsm deleted file mode 100644 index 3a970928..00000000 --- a/vu1/general_vcl.vsm +++ /dev/null @@ -1,771 +0,0 @@ -; === __LP__ EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === ldumb : optimal=22 clid=0 mlid=2 size=(22) -; === normal1 : optimal=22 clid=0 mlid=2 size=(22) -; === vuta : optimal=22 clid=0 mlid=2 size=(22) -; === dUp : optimal=22 clid=0 mlid=2 size=(22) -; === vuta1 : optimal=22 clid=0 mlid=2 size=(22) -; === __LP__ EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=10 mlid=10 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=3 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) -; === __LP__ EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) -; === __LP__ EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP: -; === dumb : optimal=4 clid=3 mlid=3 size=(5) -; === ldumb : optimal=4 clid=0 mlid=2 size=(5) -; === normal1 : optimal=4 clid=0 mlid=2 size=(5) -; === hDown : optimal=4 clid=0 mlid=2 size=(5) -; === vuta : optimal=4 clid=0 mlid=2 size=(5) -; === dUp : optimal=4 clid=0 mlid=2 size=(5) -; === normal : optimal=4 clid=0 mlid=2 size=(5) -; === another : optimal=4 clid=0 mlid=2 size=(5) -; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) -; === normal2 : optimal=4 clid=0 mlid=2 size=(5) -; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmGeneral_CodeStart - .global vsmGeneral_CodeEnd -vsmGeneral_CodeStart: -__v_vu1_general_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ normal2 ] 26 [26 0] 26 [main_loop_lid] - NOP loi 0x44fff000 - NOP lq.w VF06,57(VI00) - addi.xy VF05,VF00,I loi 0x45000000 - NOP xtop VI01 - NOP iaddiu VI03,VI01,0x00000005 - NOP mr32.z VF05,VF06 - NOP iaddiu VI04,VI01,0 - NOP ilw.x VI05,0(VI01) - NOP fcset 0 - NOP lq VF08,75(VI00) - NOP lq.xyz VF06,76(VI00) - NOP iadd VI06,VI03,VI05 - NOP iadd VI06,VI06,VI05 - NOP mtir VI07,VF08x - NOP ior VI07,VI07,VI05 - NOP mfir.x VF08,VI07 - NOP iaddiu VI07,VI01,0x00000001 - NOP ilw.w VI02,76(VI00) - NOP iadd VI06,VI06,VI05 - NOP sq VF08,236(VI04) - NOP iaddiu VI01,VI01,0x00000005 - NOP iaddiu VI08,VI07,0x00000004 - NOP iaddiu VI09,VI00,0x000003ff - NOP iaddiu VI10,VI00,0x00000800 - NOP iaddiu VI11,VI00,0x00000400 - maxi.w VF07,VF00,I iaddiu VI12,VI00,0x00000020 -adcLoop_lid: -; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] - NOP lq VF08,0(VI07) - ftoi0 VF08,VF08 NOP ; STALL_LATENCY ?3 - NOP mtir VI13,VF08x ; STALL_LATENCY ?3 - NOP iand VI14,VI13,VI11 - NOP NOP - NOP ibeq VI14,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pp4_vcl_8] - NOP iand VI14,VI13,VI10 - NOP iand VI13,VI13,VI09 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP isw.w VI14,3(VI13) - NOP mtir VI14,VF08y - NOP iand VI13,VI14,VI11 - NOP NOP - NOP ibeq VI13,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pp4_vcl_9] - NOP iand VI13,VI14,VI10 - NOP iand VI14,VI14,VI09 - NOP iadd VI14,VI14,VI01 - NOP isw.w VI12,0(VI14) - NOP isw.w VI13,3(VI14) - NOP mtir VI13,VF08z - NOP iand VI14,VI13,VI11 - NOP NOP - NOP ibeq VI14,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pp4_vcl_10] - NOP iand VI14,VI13,VI10 - NOP iand VI13,VI13,VI09 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP isw.w VI14,3(VI13) - NOP mtir VI14,VF08w - NOP iand VI13,VI14,VI11 - NOP NOP - NOP ibeq VI13,VI11,adcLoop_done_lid - NOP NOP -; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_general_pp4_vcl_11] - NOP iand VI13,VI14,VI09 - NOP iaddiu VI07,VI07,0x00000001 - NOP iand VI14,VI14,VI10 - NOP iadd VI13,VI13,VI01 - NOP isw.w VI12,0(VI13) - NOP ibne VI07,VI08,adcLoop_lid - NOP isw.w VI14,3(VI13) -adcLoop_done_lid: -; _LNOPT_w=[ normal2 ] 13 [19 0] 21 [adcLoop_done_lid] - NOP lq.xyz VF12,0(VI03) - mulax ACC,VF01,VF12x iaddiu VI07,VI00,0x00000020 ; STALL_LATENCY ?3 - madday ACC,VF02,VF12y lq.xyz VF10,57(VI00) - maddaz ACC,VF03,VF12z lq.xyz VF11,59(VI00) - maddw VF12,VF04,VF00w lq.w VF05,0(VI00) - mul.xyz VF10,VF10,VF11 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF12w - add.xyz VF07,VF07,VF10 ilw.w VI08,0(VI00) ; STALL_LATENCY ?2 - NOP iaddiu VI03,VI03,0x00000003 - NOP lq.xyz VF11,-1(VI03) - mulq.xyz VF12,VF12,Q sq.xyz VF07,238(VI04) ; STALL_LATENCY ?1 - max.xyz VF09,VF00,VF00 ibeq VI03,VI06,EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI1 - max.xyz VF08,VF00,VF00 iaddiu VI04,VI04,0 -; _LNOPT_w=[ ] 21 [19 0] 22 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__PRO1] - NOP NOP - add.xyz VF10,VF12,VF05 lq.xyz VF15,0(VI03) - sub.xyz VF13,VF09,VF12 iaddiu VI04,VI04,0x000000f3 - max.xyz VF09,VF12,VF12 NOP - mul.xyz VF14,VF12,VF06 NOP - mulax ACC,VF01,VF15x NOP - madday ACC,VF02,VF15y NOP - maddaz ACC,VF03,VF15z ilw.w VI09,-3(VI03) - maddw VF12,VF04,VF00w isub VI01,VI08,VI07 - mulq.xyz VF11,VF11,Q sq.xyz VF07,-2(VI04) - opmula.xyz ACCxyz,VF13xyz,VF08xyz iaddiu VI03,VI03,0x00000003 - opmsub.xyz VF16xyz,VF08xyz,VF13xyz iand VI10,VI09,VI08 - NOP div Q,VF00w,VF12w - NOP sq.xyz VF11,-6(VI04) - abs.xyz VF00,VF16 fmand VI11,VI08 ; STALL_LATENCY ?1 - NOP lq.xyz VF11,-1(VI03) - clipw.xyz VF14xyz,VF07w isub VI11,VI11,VI07 - NOP ior VI07,VI01,VI10 - mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 - NOP ibeq VI03,VI06,EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI0 - mulw.xyz VF08,VF13,VF05w fcand VI01,262143 -EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 22 [22 22] 22 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP] - ftoi4.xyz VF14,VF10 iand VI01,VI01,VI02 - add.xyz VF10,VF12,VF05 lq.xyz VF16,0(VI03) - sub.xyz VF13,VF09,VF12 ior VI10,VI01,VI10 - max.xyz VF09,VF12,VF12 ior VI10,VI10,VI09 - mul.xyz VF15,VF12,VF06 ilw.w VI09,-3(VI03) - mulax ACC,VF01,VF16x iaddiu VI10,VI10,0x00007fff - madday ACC,VF02,VF16y mfir.w VF14,VI10 - maddaz ACC,VF03,VF16z isub VI01,VI08,VI07 - maddw VF12,VF04,VF00w iand VI10,VI09,VI08 - mulq.xyz VF11,VF11,Q sq.xyz VF07,1(VI04) - opmula.xyz ACCxyz,VF13xyz,VF08xyz sq VF14,-4(VI04) - opmsub.xyz VF00xyz,VF08xyz,VF13xyz iaddiu VI03,VI03,0x00000003 - mulw.xyz VF08,VF13,VF05w div Q,VF00w,VF12w - clipw.xyz VF15xyz,VF07w sq.xyz VF11,-3(VI04) - NOP iaddiu VI04,VI04,0x00000003 - NOP fmand VI11,VI08 - NOP lq.xyz VF11,-1(VI03) - NOP isub VI11,VI11,VI07 - NOP ior VI07,VI01,VI10 - mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 - NOP ibne VI03,VI06,EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP - NOP fcand VI01,262143 -EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 21 [20 0] 23 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI0] - NOP NOP - sub.xyz VF09,VF09,VF12 NOP - mul.xyz VF07,VF12,VF06 iand VI01,VI01,VI02 - opmula.xyz ACCxyz,VF09xyz,VF08xyz ior VI10,VI01,VI10 ; STALL_LATENCY ?2 - opmsub.xyz VF14xyz,VF08xyz,VF09xyz ior VI10,VI10,VI09 - NOP ilw.w VI09,-3(VI03) - NOP iaddiu VI10,VI10,0x00007fff - clipw.xyz VF07xyz,VF07w mfir.w VF13,VI10 - abs.xyz VF00,VF14 fmand VI10,VI08 - NOP isub VI07,VI10,VI07 - ftoi4.xyz VF13,VF10 iand VI10,VI07,VI08 - add.xyz VF10,VF12,VF05 fcand VI01,262143 - NOP iand VI01,VI01,VI02 - mulq.xyz VF11,VF11,Q ior VI10,VI01,VI10 - NOP ior VI10,VI10,VI09 - ftoi4.xyz VF10,VF10 iaddiu VI10,VI10,0x00007fff - NOP mfir.w VF10,VI10 - NOP sq.xyz VF11,-3(VI04) - NOP sq VF13,-4(VI04) - NOP b EXPL_vu1_general_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF10,-1(VI04) -EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI1: -; _LNOPT_w=[ ] 18 [20 0] 24 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI1] - NOP NOP - NOP NOP - sub.xyz VF09,VF09,VF12 NOP - mul.xyz VF06,VF12,VF06 NOP - opmula.xyz ACCxyz,VF09xyz,VF08xyz NOP ; STALL_LATENCY ?2 - opmsub.xyz VF10xyz,VF08xyz,VF09xyz NOP - clipw.xyz VF06xyz,VF07w ilw.w VI03,-3(VI03) - abs.xyz VF00,VF10 fmand VI01,VI08 ; STALL_LATENCY ?2 - NOP isub VI07,VI01,VI07 - NOP iand VI08,VI07,VI08 - add.xyz VF07,VF12,VF05 fcand VI01,262143 - NOP iand VI02,VI01,VI02 - NOP ior VI02,VI02,VI08 - mulq.xyz VF11,VF11,Q ior VI03,VI02,VI03 - ftoi4.xyz VF07,VF07 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF07,VI03 - NOP sq.xyz VF11,237(VI04) ; STALL_LATENCY ?1 - NOP sq VF07,239(VI04) ; STALL_LATENCY ?1 -EXPL_vu1_general_pp4_vcl_xform_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 8 [8 0] 8 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__EXIT_POINT] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,60(VI00) - NOP lq.xyz VF07,61(VI00) - NOP lq.xyz VF08,66(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pp4_vcl_16] - maxw.z VF09,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 27 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,67(VI00) - NOP lq.xyz VF13,3(VI03) - NOP lq.xyz VF14,68(VI00) - NOP lq.xyz VF10,69(VI00) - mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) - maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 - NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 - add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) - NOP esadd P,VF14 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 - mul.xyz VF15,VF14,VF18 iadd VI07,VI07,VI05 ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) - NOP lq.xyz VF10,0(VI03) - NOP mr32.xyw VF09,VF15 - NOP iaddiu VI04,VI04,0x00000003 - mul.xyz VF18,VF13,VF18 iaddiu VI06,VI06,0 - NOP ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF18,VF14,VF17 NOP - mul.w VF05,VF05,VF05 NOP - NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF15z NOP - addax.w ACC,VF09,VF09x ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI1 - mul.w VF05,VF05,VF05 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF07,VF00,VF09y NOP - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF14,VF17 NOP - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI0 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000f6 -EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF07,VF00,VF09y iaddiu VI06,VI06,0x00000003 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x lq.xyz VF18,-11(VI06) - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF19,VF14,VF17 NOP - add.xyz VF18,VF18,VF15 NOP - maddaw.xyz ACC,VF12,VF06w NOP - mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 - mul.xyz VF18,VF13,VF17 sq.xyz VF18,-11(VI06) - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibne VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - addax.w ACC,VF09,VF09x NOP -EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - maxx.z VF17,VF18,VF00x lq.xyz VF18,-8(VI06) - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,-5(VI06) - mul.w VF06,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF17z NOP - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-5(VI06) ; STALL_LATENCY ?2 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,-2(VI06) - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF18,-2(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - adday.z ACC,VF18,VF18y NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maxx.z VF18,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF18z NOP - maddaw.xyz ACC,VF12,VF06w NOP - madd.xyz VF13,VF10,VF05 lq.xyz VF18,238(VI06) - mul.w VF06,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,241(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF18,241(VI06) ; STALL_LATENCY ?2 -EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF07,VF00,VF09y NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,238(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pp4_vcl_22] - maxw.z VF09,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF10,71(VI00) - NOP lq.xyz VF11,3(VI03) - NOP lq.xyz VF14,72(VI00) - mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 - madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) - NOP xtop VI06 - maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 - maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) - sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI07,VI07,VI05 - NOP sqrt Q,VF16z ; STALL_LATENCY ?1 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) - NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 - NOP iaddiu VI06,VI06,0 - mul.xyz VF12,VF12,VF07 ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0 -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) - sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 - mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 - maddx.z VF21,VF09,VF21x NOP - NOP waitp - addw.x VF21,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF21z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0 - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - NOP move.xyz VF19,VF18 - NOP ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - NOP NOP - NOP NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 NOP - madday.w ACC,VF00,VF21y esadd P,VF18 - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z waitp - maxx.w VF05,VF06,VF00x mfp.w VF06,P - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - addq.y VF21,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 - addw.x VF21,VF00,VF00w NOP - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - maddz.w VF05,VF00,VF15z NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0x000000f6 - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 - madday.w ACC,VF00,VF21y NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 esadd P,VF18 - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF20x NOP - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z mfp.w VF06,P - mulq.xyz VF23,VF22,Q lq.xyz VF22,-11(VI06) - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - add.xyz VF22,VF22,VF23 NOP - addw.x VF21,VF00,VF00w NOP - mul.w VF06,VF07,VF07 NOP - addq.y VF21,VF00,Q NOP - mula.xyz ACC,VF20,VF06 sq.xyz VF22,-11(VI06) - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) - mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y - NOP NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF21,VF17,VF20 NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibne VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF19,VF20 NOP - maddaw.xyz ACC,VF12,VF07w NOP - madd.xyz VF21,VF10,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF21,Q lq.xyz VF21,-8(VI06) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 - mul.xyz VF21,VF17,VF20 sq.xyz VF21,-8(VI06) ; STALL_LATENCY ?1 - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w - mula.xyz ACC,VF20,VF06 NOP - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF17,VF13,Q lq.xyz VF13,-5(VI06) - mulq.xyz VF12,VF10,Q lq.xyz VF10,-2(VI06) ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 NOP - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,-5(VI06) - NOP b EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF10,-2(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - NOP waitp ; STALL_LATENCY ?1 - mul.w VF07,VF06,VF06 mfp.w VF06,P - mul.xyz VF21,VF17,VF20 NOP - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w - mula.xyz ACC,VF20,VF06 NOP - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF17,VF13,Q lq.xyz VF13,238(VI06) - mulq.xyz VF12,VF10,Q lq.xyz VF10,241(VI06) ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 NOP - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 - NOP sq.xyz VF11,238(VI06) - NOP b EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sq.xyz VF10,241(VI06) ; STALL_LATENCY ?1 -EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) - mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF17y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF13,VF13 - NOP mfp.w VF06,P - mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF17z NOP - mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF17y NOP - maddz.w VF07,VF00,VF17z NOP - mulw.xyz VF11,VF11,VF05w NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF11,VF06 NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF16z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF12,VF10,Q lq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP sq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 -EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lid: -; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] - NOP lq.w VF05,60(VI00) - NOP xtop VI01 - NOP iaddiu VI02,VI01,0x000000ed - NOP loi 0x43000000 - muli.w VF05,VF05,I iadd VI03,VI02,VI05 - NOP iadd VI03,VI03,VI05 - NOP loi 0x437f0000 - NOP lq.xyz VF05,1(VI02) - minii.w VF05,VF05,I iadd VI03,VI03,VI05 - NOP iaddiu VI02,VI02,0x00000003 - minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 - ftoi0.w VF05,VF05 NOP -; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pp4_vcl_final_loop_lid__PRO1] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP NOP - ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI0 - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP] - NOP lq.xyz VF07,1(VI02) - NOP iaddiu VI02,VI02,0x00000003 - NOP sq VF05,-8(VI02) - ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP - minii.xyz VF06,VF07,I NOP -EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI0: -; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI0] - NOP NOP - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) - NOP b EXPL_vu1_general_pp4_vcl_final_loop_lid__EXIT_POINT - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 -EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI1: -; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI1] - NOP NOP - NOP NOP - ftoi0.xyz VF05,VF06 NOP - NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 -EXPL_vu1_general_pp4_vcl_final_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pp4_vcl_final_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI01,0x000000ec - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pp4_vcl_30] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmGeneral_CodeEnd: -; iCount=660 -; register stats: -; 15 VU User integer -; 24 VU User floating point diff --git a/vu1/indexed_vcl.vsm b/vu1/indexed_vcl.vsm deleted file mode 100644 index ba7a6c8d..00000000 --- a/vu1/indexed_vcl.vsm +++ /dev/null @@ -1,718 +0,0 @@ -; === __LP__ EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=10 mlid=10 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=3 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=5 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) -; === __LP__ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === hDown : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === another : optimal=34 clid=0 mlid=4 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) -; === __LP__ EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === ldumb : optimal=38 clid=0 mlid=2 size=(38) -; === vuta : optimal=38 clid=0 mlid=2 size=(38) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmIndexed_CodeStart - .global vsmIndexed_CodeEnd -vsmIndexed_CodeStart: -__v_vu1_indexed_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [main_loop_lid] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,60(VI00) - NOP lq.xyz VF07,61(VI00) - NOP lq.xyz VF08,66(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_7] - maxw.z VF09,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 26 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,67(VI00) - NOP lq.xyz VF13,3(VI03) - NOP lq.xyz VF14,68(VI00) - NOP lq.xyz VF10,69(VI00) - mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) - maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 - add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 - NOP esadd P,VF14 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 - mul.xyz VF15,VF14,VF18 iadd VI07,VI04,VI05 ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - NOP iadd VI05,VI07,VI05 - mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) - NOP mr32.xyw VF09,VF15 - NOP iaddiu VI04,VI04,0x00000003 - mul.xyz VF18,VF13,VF18 lq.xyz VF10,0(VI03) - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000ac -; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF18,VF14,VF17 NOP - mul.w VF05,VF05,VF05 NOP - NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF15z NOP - addax.w ACC,VF09,VF09x ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1 - mul.w VF05,VF05,VF05 NOP -; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF07,VF00,VF09y NOP - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF14,VF17 NOP - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0 - addax.w ACC,VF09,VF09x NOP -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF07,VF00,VF09y NOP - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x lq.xyz VF18,0(VI06) - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF19,VF14,VF17 NOP - add.xyz VF18,VF18,VF15 NOP - maddaw.xyz ACC,VF12,VF06w NOP - mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 - mul.xyz VF18,VF13,VF17 sqi.xyz VF18,(VI06++) - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - addax.w ACC,VF09,VF09x NOP -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - maxx.z VF17,VF18,VF00x lq.xyz VF18,0(VI06) - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?1 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) - mul.w VF06,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF17z NOP - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - adday.z ACC,VF18,VF18y NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maxx.z VF18,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF18z NOP - maddaw.xyz ACC,VF12,VF06w NOP - madd.xyz VF13,VF10,VF05 lq.xyz VF18,0(VI06) - mul.w VF06,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF07,VF00,VF09y NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lighting_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_13] - maxw.z VF09,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF10,71(VI00) - NOP lq.xyz VF11,3(VI03) - NOP lq.xyz VF14,72(VI00) - mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 - madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) - NOP xtop VI06 - maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 - maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) - sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI05,VI07,VI05 - NOP sqrt Q,VF16z ; STALL_LATENCY ?1 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) - NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 - NOP NOP - mul.xyz VF12,VF12,VF07 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0x000000ac -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) - sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 - mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 - maddx.z VF21,VF09,VF21x NOP - NOP waitp - addw.x VF21,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF21z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - NOP move.xyz VF19,VF18 - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - NOP NOP - NOP NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 NOP - madday.w ACC,VF00,VF21y esadd P,VF18 - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z waitp - maxx.w VF05,VF06,VF00x mfp.w VF06,P - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - addq.y VF21,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 - addw.x VF21,VF00,VF00w NOP - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - maddz.w VF05,VF00,VF15z NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF21y NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 esadd P,VF18 - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF20x NOP - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z mfp.w VF06,P - mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI06) - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - add.xyz VF22,VF22,VF23 NOP - addw.x VF21,VF00,VF00w NOP - mul.w VF06,VF07,VF07 NOP - addq.y VF21,VF00,Q NOP - mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI06++) - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) - mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y - NOP NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF21,VF17,VF20 NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF19,VF20 NOP - maddaw.xyz ACC,VF12,VF07w NOP - madd.xyz VF21,VF10,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI06) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 - mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI06++) ; STALL_LATENCY ?1 - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 NOP - mula.xyz ACC,VF20,VF06 NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulq.xyz VF17,VF13,Q waitq - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI06++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) - NOP waitp ; STALL_LATENCY ?1 - mul.w VF07,VF06,VF06 mfp.w VF06,P - mul.xyz VF21,VF17,VF20 NOP - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 NOP - mula.xyz ACC,VF20,VF06 NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulq.xyz VF17,VF13,Q waitq - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI06++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) - mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF17y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF13,VF13 - NOP mfp.w VF06,P - mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF17z NOP - mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF17y NOP - maddz.w VF07,VF00,VF17z NOP - mulw.xyz VF11,VF11,VF05w NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF11,VF06 NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF16z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?3 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?3 -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) -done_lighting_lid: -; _LNOPT_w=[ vuta ] 51 [50 0] 58 [done_lighting_lid] - NOP lq.w VF05,60(VI00) - NOP loi 0x43000000 - muli.w VF10,VF05,I xtop VI04 ; STALL_LATENCY ?2 - NOP ilw.y VI08,0(VI04) - NOP loi 0x437f0000 - maxi.w VF12,VF00,I ilw.z VI03,0(VI04) - minii.w VF10,VF10,I loi 0x437f0000 - NOP lq.xyz VF05,75(VI00) - NOP iaddiu VI06,VI04,0x00000005 - maxi.y VF10,VF00,I loi 0x40400000 - NOP mtir VI02,VF05x ; STALL_LATENCY ?1 - NOP ior VI03,VI02,VI03 - NOP mfir.x VF05,VI03 - NOP iaddiu VI03,VI00,0x0000004e - NOP mfir.w VF05,VI03 - maxi.z VF09,VF00,I loi 0x437d0000 - NOP iadd VI08,VI06,VI08 - NOP ilw.w VI09,0(VI06) - NOP sq VF05,77(VI00) - NOP lqi.w VF05,(VI06++) - NOP iaddiu VI05,VI04,0x000000ac - NOP iaddiu VI04,VI04,0x00000005 - NOP iaddiu VI07,VI00,0x000000ff - NOP iand VI09,VI09,VI07 - maxi.w VF08,VF00,I iadd VI01,VI09,VI09 - addy.w VF06,VF05,VF10y iadd VI01,VI01,VI09 - mulz.w VF05,VF05,VF09z iadd VI10,VI01,VI04 - NOP lq.xyz VF11,0(VI10) - add.w VF05,VF05,VF08 lq.w VF09,57(VI00) ; STALL_LATENCY ?2 - mulax ACC,VF01,VF11x loi 0x45000000 - madday ACC,VF02,VF11y NOP - maddaz ACC,VF03,VF11z iadd VI09,VI09,VI05 - maddw VF13,VF04,VF00w lq.xyz VF07,0(VI09) - NOP mtir VI11,VF05w - NOP div Q,VF00w,VF13w ; STALL_LATENCY ?2 - NOP iadd VI09,VI11,VI04 - NOP lq.xyz VF12,0(VI09) - maxi.w VF07,VF00,I mr32.z VF05,VF09 - miniw.xyz VF11,VF07,VF12w loi 0x44fff000 - addi.xy VF05,VF00,I iaddiu VI02,VI00,0x0000004b - mulax ACC,VF01,VF12x xgkick VI02 - mulq.xyz VF08,VF13,Q ilw.w VI02,76(VI00) - madday ACC,VF02,VF12y lq.xyz VF06,76(VI00) - maddaz ACC,VF03,VF12z fcset 0 - maddw VF16,VF04,VF00w mtir VI01,VF06w - add.xyz VF13,VF08,VF05 lq.xyz VF14,2(VI10) - mul.xyz VF08,VF08,VF06 iadd VI01,VI01,VI05 - NOP lq.xyz VF12,0(VI01) - ftoi0.w VF11,VF10 div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI09) -; _LNOPT_w=[ ] 38 [37 0] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__PRO1] - mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI06) - NOP NOP - NOP lqi.w VF05,(VI06++) - NOP sq VF11,1(VI03) - mulq.xyz VF11,VF16,Q fcand VI01,262143 - NOP iand VI11,VI11,VI07 - NOP iadd VI10,VI11,VI11 - addy.w VF06,VF05,VF10y iadd VI10,VI10,VI11 - mulz.w VF05,VF05,VF09z iadd VI12,VI10,VI04 - mul.xyz VF07,VF11,VF06 iadd VI11,VI11,VI05 - add.xyz VF14,VF11,VF05 lq.xyz VF11,0(VI12) - ftoi4.xyz VF16,VF13 iand VI09,VI01,VI02 - add.w VF05,VF05,VF08 ior VI09,VI09,VI00 - clipw.xyz VF07xyz,VF07w iaddiu VI01,VI09,0x00007fff - mulax ACC,VF01,VF11x mfir.w VF16,VI01 - madday ACC,VF02,VF11y mtir VI10,VF06w - maddaz ACC,VF03,VF11z mtir VI09,VF05w - maddw VF13,VF04,VF00w lq.xyz VF07,0(VI11) - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) - NOP iadd VI11,VI09,VI04 - NOP sq.xyz VF15,0(VI03) - NOP div Q,VF00w,VF13w - miniw.xyz VF11,VF07,VF12w iaddiu VI09,VI03,0 - miniw.xyz VF07,VF12,VF12w lq.xyz VF12,0(VI11) - NOP fcand VI01,262143 - ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - NOP iand VI03,VI01,VI02 - mulax ACC,VF01,VF12x ior VI03,VI03,VI00 - mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI03 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) - add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 - mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI09) - NOP div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP] - NOP ilw.w VI11,0(VI06) - NOP lqi.w VF05,(VI06++) - NOP sq VF11,1(VI03) - mulq.xyz VF15,VF14,Q fcand VI01,262143 - mulq.xyz VF11,VF16,Q iand VI11,VI11,VI07 - addy.w VF06,VF05,VF10y iadd VI10,VI11,VI11 - mulz.w VF05,VF05,VF09z iadd VI10,VI10,VI11 - NOP iadd VI12,VI10,VI04 - add.xyz VF14,VF11,VF05 iadd VI11,VI11,VI05 - mul.xyz VF18,VF11,VF06 mtir VI10,VF06w - add.w VF05,VF05,VF08 lq.xyz VF17,0(VI12) - ftoi4.xyz VF16,VF13 iand VI01,VI01,VI02 - ftoi0.xyz VF11,VF07 ior VI01,VI01,VI00 - clipw.xyz VF18xyz,VF07w iaddiu VI13,VI01,0x00007fff - mulax ACC,VF01,VF17x mtir VI01,VF05w - madday ACC,VF02,VF17y mfir.w VF16,VI13 - maddaz ACC,VF03,VF17z lq.xyz VF07,0(VI11) - maddw VF13,VF04,VF00w iadd VI11,VI01,VI04 - NOP sq.xyz VF15,0(VI03) - NOP sq VF16,2(VI03) - miniw.xyz VF11,VF07,VF12w sq VF11,4(VI09) - NOP div Q,VF00w,VF13w - miniw.xyz VF07,VF12,VF12w iaddiu VI09,VI03,0 - NOP lq.xyz VF12,0(VI11) - NOP fcand VI01,262143 - ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - mulq.xyz VF16,VF08,Q iand VI03,VI01,VI02 - mulax ACC,VF01,VF12x ior VI03,VI03,VI00 - mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI03 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) - add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 - mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI09) - NOP div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibne VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 25 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0] - mulq.xyz VF09,VF14,Q NOP - NOP NOP - NOP NOP - NOP NOP - mulq.xyz VF11,VF16,Q sq VF11,1(VI03) - NOP NOP - NOP fcand VI01,262143 - NOP iand VI01,VI01,VI02 - ftoi4.xyz VF16,VF13 ior VI01,VI01,VI00 - mul.xyz VF06,VF11,VF06 iaddiu VI01,VI01,0x00007fff - add.xyz VF14,VF11,VF05 mfir.w VF16,VI01 - ftoi0.xyz VF11,VF07 NOP - clipw.xyz VF06xyz,VF07w sq.xyz VF09,0(VI03) ; STALL_LATENCY ?1 - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) - miniw.xyz VF07,VF12,VF12w sq VF11,4(VI09) - NOP iaddiu VI09,VI03,0 - NOP fcand VI01,262143 - NOP iand VI03,VI01,VI02 - ftoi0.xyz VF11,VF07 ior VI03,VI03,VI00 - ftoi4.xyz VF12,VF14 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF12,VI03 - NOP sq.xyz VF16,3(VI09) - NOP sq VF11,4(VI09) - NOP b EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF12,5(VI09) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1: -; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1] - mulq.xyz VF07,VF14,Q NOP - NOP NOP - NOP sq VF11,1(VI03) - NOP fcand VI01,262143 - mulq.xyz VF11,VF16,Q iand VI04,VI01,VI02 - NOP ior VI04,VI04,VI00 - ftoi4.xyz VF16,VF13 iaddiu VI04,VI04,0x00007fff - NOP mfir.w VF16,VI04 - add.xyz VF14,VF11,VF05 sq.xyz VF07,0(VI03) - mul.xyz VF11,VF11,VF06 NOP - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) ; STALL_LATENCY ?1 - clipw.xyz VF11xyz,VF07w NOP ; STALL_LATENCY ?1 - NOP NOP - NOP NOP - miniw.xyz VF07,VF12,VF12w iaddiu VI04,VI03,0 - NOP fcand VI01,262143 - NOP iand VI03,VI01,VI02 - ftoi4.xyz VF12,VF14 ior VI03,VI03,VI00 - ftoi0.xyz VF11,VF07 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF12,VI03 - NOP sq.xyz VF16,3(VI04) - NOP sq VF11,4(VI04) ; STALL_LATENCY ?1 - NOP sq VF12,5(VI04) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI00,0x0000004d - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_21] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmIndexed_CodeEnd: -; iCount=637 -; register stats: -; 14 VU User integer -; 24 VU User floating point From 6a8c7f99baf68e0c1d36465e8ccaa9d316ec13c3 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Tue, 9 Dec 2025 23:39:33 +0100 Subject: [PATCH 02/17] Some fixes --- cmake/preprocess_vu1.cmake | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cmake/preprocess_vu1.cmake b/cmake/preprocess_vu1.cmake index d77b0245..5955f35f 100644 --- a/cmake/preprocess_vu1.cmake +++ b/cmake/preprocess_vu1.cmake @@ -16,12 +16,15 @@ elseif(STEP STREQUAL "pp2") if(NOT DEFINED GASP_TOOL) message(FATAL_ERROR "GASP_TOOL not defined") endif() + # Use wrapper script for better error handling execute_process( - COMMAND ${GASP_TOOL} -c ";" -I${SOURCE_DIR}/vu1 -o ${OUTPUT} ${INPUT} + COMMAND ${SOURCE_DIR}/cmake/run_masp.sh ${GASP_TOOL} ${SOURCE_DIR}/vu1 ${OUTPUT} ${INPUT} RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error ) if(NOT result EQUAL 0) - message(FATAL_ERROR "Step 2 preprocessing (${GASP_TOOL}) failed") + message(FATAL_ERROR "Step 2 preprocessing (${GASP_TOOL}) failed\nOutput: ${output}\nError: ${error}\nInput: ${INPUT}\nOutput: ${OUTPUT}") endif() elseif(STEP STREQUAL "pp3") @@ -36,8 +39,11 @@ elseif(STEP STREQUAL "pp3") elseif(STEP STREQUAL "pp4") # Step 4: C preprocessor with memory layout + # Use -w to suppress warnings about unmatched quotes in assembly comments + # Escape backslashes before preprocessing, then restore them after + # This preserves masp/gasp local labels like \xformed_vert while allowing normal C preprocessing execute_process( - COMMAND /bin/bash -c "cat ${INPUT} | ${COMPILER} -E -P -I${SOURCE_DIR}/vu1 -imacros ${MEM_HEADER} -o ${OUTPUT} -" + COMMAND /bin/bash -c "sed 's/\\\\/\\\\\\\\/g' ${INPUT} | ${COMPILER} -E -P -w -I${SOURCE_DIR}/vu1 -imacros ${MEM_HEADER} - | sed 's/\\\\\\\\/\\\\/g' > ${OUTPUT}" RESULT_VARIABLE result ) if(NOT result EQUAL 0) From 396575ef28fbd76d5268d60b0c360e4fad5d528a Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Wed, 10 Dec 2025 21:00:49 +0100 Subject: [PATCH 03/17] Fix CI/CD --- .github/workflows/compilation.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index d4bc2fcc..bcd0add3 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -30,6 +30,24 @@ jobs: make -j $(getconf _NPROCESSORS_ONLN) make install + - name: Compile masp + run: | + git clone https://github.com/fjtrujy/masp.git + cd masp + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="$PS2DEV" + cmake --build build -j $(getconf _NPROCESSORS_ONLN) + cmake --install build + + - name: Install openvcl + run: | + git clone https://github.com/fjtrujy/openvcl.git + cd openvcl + git checkout ps2gl + make -j $(getconf _NPROCESSORS_ONLN) clean + make -j $(getconf _NPROCESSORS_ONLN) + make -j $(getconf _NPROCESSORS_ONLN) install + + - name: Configure with CMake run: | mkdir build From c819f74a564764ae7c9bcd616d73997b3ada6db1 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Wed, 10 Dec 2025 22:53:14 +0100 Subject: [PATCH 04/17] Fix tricked out example --- examples/CMakeLists.txt | 85 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 4 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ded2ac9e..d48dbfb3 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -90,8 +90,86 @@ endfunction() add_ps2gl_example(box box/box.cpp) add_ps2gl_example(logo logo/logo.cpp) add_ps2gl_example(performance performance/performance.cpp) -# Note: tricked_out has linking issues (missing CBillboardRenderer symbols) -# add_ps2gl_example(tricked_out tricked_out/tricked_out.cpp) + +# tricked_out example requires special handling due to billboard_renderer VU1 code +# First, assemble the billboard VSM to a .vo file +set(BILLBOARD_VSM "${CMAKE_CURRENT_SOURCE_DIR}/tricked_out/billboard_renderer_vcl.vsm") +set(BILLBOARD_VO "${CMAKE_CURRENT_BINARY_DIR}/tricked_out/billboard_renderer.vo") + +# Create directory for output +file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/tricked_out") + +# Assemble the VSM file to .vo (DVP assembler should be available) +add_custom_command( + OUTPUT ${BILLBOARD_VO} + COMMAND dvp-as -o ${BILLBOARD_VO} ${BILLBOARD_VSM} + DEPENDS ${BILLBOARD_VSM} + COMMENT "Assembling billboard_renderer VU1 code" +) + +# Create a custom target for the billboard VU1 object +add_custom_target(billboard_vu1_object DEPENDS ${BILLBOARD_VO}) + +# Add tricked_out example with billboard_renderer sources +add_executable(tricked_out + tricked_out/tricked_out.cpp + tricked_out/billboard_renderer.cpp +) + +target_include_directories(tricked_out PRIVATE + ${CMAKE_SOURCE_DIR}/glut/include + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/src # For ps2gl internal headers + ${CMAKE_CURRENT_SOURCE_DIR}/shared_code + ${CMAKE_CURRENT_SOURCE_DIR}/tricked_out + ${PS2SDK}/ports/include +) + +target_compile_options(tricked_out PRIVATE + -Wno-strict-aliasing + -Wno-conversion-null +) + +target_compile_definitions(tricked_out PRIVATE + NO_VU0_VECTORS + NO_ASM +) + +target_link_directories(tricked_out PRIVATE + ${PS2SDK}/ports/lib +) + +# Link the billboard VU1 object file +target_link_libraries(tricked_out + ${BILLBOARD_VO} + shared_code + ps2glut + ps2gl + ps2stuff + pad + dma +) + +add_dependencies(tricked_out billboard_vu1_object) + +set_target_properties(tricked_out PROPERTIES + OUTPUT_NAME "tricked_out.elf" + SUFFIX "" +) + +# Strip the executable +add_custom_command(TARGET tricked_out POST_BUILD + COMMAND ${CMAKE_STRIP} --strip-all $ + COMMENT "Stripping tricked_out.elf" +) + +# Copy data files (car.bin texture) +add_custom_command(TARGET tricked_out POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_SOURCE_DIR}/tricked_out/car.bin + ${CMAKE_CURRENT_BINARY_DIR}/car.bin + COMMENT "Copying car.bin texture" +) # NeHe tutorials add_ps2gl_example(nehe_lesson02 nehe/lesson02/lesson2.cpp) @@ -101,7 +179,6 @@ add_ps2gl_example(nehe_lesson05 nehe/lesson05/lesson5.cpp) message(STATUS "") message(STATUS "ps2gl examples configured:") -message(STATUS " box, logo, performance") +message(STATUS " box, logo, performance, tricked_out") message(STATUS " nehe: lesson02, lesson03, lesson04, lesson05") -message(STATUS " Note: tricked_out disabled due to linking issues") message(STATUS "") From 51b5ef4f77351d113f6c3c2922621ce95ce5c434 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Wed, 10 Dec 2025 23:01:22 +0100 Subject: [PATCH 05/17] Fix CI --- .github/workflows/compilation.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index bcd0add3..52d6ab31 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -17,7 +17,9 @@ jobs: - name: Setup dependencies run: | apk update - apk add cmake build-base git make + apk add cmake build-base git make bash + # Create /bin/bash symlink if it doesn't exist (Alpine installs to /usr/bin/bash) + [ -f /bin/bash ] || ln -s /usr/bin/bash /bin/bash - name: Install ps2stuff run: | From 88ee3da11d308435d846d7d8fab5782ea6f86c49 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Wed, 10 Dec 2025 23:20:57 +0100 Subject: [PATCH 06/17] Update preprocess process --- cmake/preprocess_vu1.cmake | 69 +++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/cmake/preprocess_vu1.cmake b/cmake/preprocess_vu1.cmake index 5955f35f..2f47f2ae 100644 --- a/cmake/preprocess_vu1.cmake +++ b/cmake/preprocess_vu1.cmake @@ -1,54 +1,69 @@ -# VU1 preprocessing script -# Usage: cmake -D INPUT= -D OUTPUT= -D STEP= -D SOURCE_DIR= -D COMPILER= -D MEM_HEADER=
-P preprocess_vu1.cmake +# VU1 preprocessing script - Simplified version +# Usage: cmake -D INPUT= -D OUTPUT= -D STEP= -D SOURCE_DIR= -D COMPILER= -D MEM_HEADER=
-D GASP_TOOL= -P preprocess_vu1.cmake if(STEP STREQUAL "pp1") - # Step 1: Remove #include, #define, fix .include paths - execute_process( - COMMAND /bin/bash -c "cat ${INPUT} | sed -E 's/#include[[:space:]]+.+// ; s/#define[[:space:]]+.+// ; s|(\\.include[[:space:]]+)\\\"([^/].+)\\\"|\\1\\\"${SOURCE_DIR}/vu1/\\2\\\"|' > ${OUTPUT}" - RESULT_VARIABLE result - ) - if(NOT result EQUAL 0) - message(FATAL_ERROR "Step 1 preprocessing failed") - endif() + # Step 1: Clean up C preprocessor directives and fix .include paths + # - Remove #include and #define (will use gasp-style includes and C preprocessor later) + # - Fix .include paths to be absolute + file(READ "${INPUT}" content) + # Remove #include lines + string(REGEX REPLACE "#include[^\n]*\n" "" content "${content}") + # Remove #define lines + string(REGEX REPLACE "#define[^\n]*\n" "" content "${content}") + # Fix .include paths to be absolute (only for relative paths) + # Note: CMake regex doesn't support [[:space:]], use [ \t] instead + string(REGEX REPLACE "\\.include[ \t]+\"([^/][^\"]*)\"" ".include \"${SOURCE_DIR}/vu1/\\1\"" content "${content}") + file(WRITE "${OUTPUT}" "${content}") elseif(STEP STREQUAL "pp2") - # Step 2: gasp/masp preprocessor + # Step 2: gasp/masp preprocessor for macro expansion if(NOT DEFINED GASP_TOOL) message(FATAL_ERROR "GASP_TOOL not defined") endif() - # Use wrapper script for better error handling + + # Run masp directly execute_process( - COMMAND ${SOURCE_DIR}/cmake/run_masp.sh ${GASP_TOOL} ${SOURCE_DIR}/vu1 ${OUTPUT} ${INPUT} + COMMAND "${GASP_TOOL}" -c ";" -I"${SOURCE_DIR}/vu1" -o "${OUTPUT}" "${INPUT}" RESULT_VARIABLE result OUTPUT_VARIABLE output ERROR_VARIABLE error ) if(NOT result EQUAL 0) - message(FATAL_ERROR "Step 2 preprocessing (${GASP_TOOL}) failed\nOutput: ${output}\nError: ${error}\nInput: ${INPUT}\nOutput: ${OUTPUT}") + message(FATAL_ERROR "masp failed (exit ${result})\nCommand: ${GASP_TOOL} -c \";\" -I${SOURCE_DIR}/vu1 -o ${OUTPUT} ${INPUT}\nOutput: ${output}\nError: ${error}") + endif() + if(NOT EXISTS "${OUTPUT}") + message(FATAL_ERROR "masp did not create output file: ${OUTPUT}") endif() elseif(STEP STREQUAL "pp3") # Step 3: Array notation conversion - execute_process( - COMMAND /bin/bash -c "cat ${INPUT} | sed -E 's/\\[([0-9])\\]/_\\1/g ; s/\\[([w-zW-Z])\\]/\\1/g' > ${OUTPUT}" - RESULT_VARIABLE result - ) - if(NOT result EQUAL 0) - message(FATAL_ERROR "Step 3 preprocessing failed") - endif() + # Convert [0] -> _0, [1] -> _1, etc. + # Convert [x] -> x, [y] -> y, etc. (vector component access) + file(READ "${INPUT}" content) + string(REGEX REPLACE "\\[([0-9])\\]" "_\\1" content "${content}") + string(REGEX REPLACE "\\[([w-zW-Z])\\]" "\\1" content "${content}") + file(WRITE "${OUTPUT}" "${content}") elseif(STEP STREQUAL "pp4") - # Step 4: C preprocessor with memory layout - # Use -w to suppress warnings about unmatched quotes in assembly comments - # Escape backslashes before preprocessing, then restore them after - # This preserves masp/gasp local labels like \xformed_vert while allowing normal C preprocessing + # Step 4: C preprocessor for memory layout evaluation + if(NOT DEFINED COMPILER) + message(FATAL_ERROR "COMPILER not defined") + endif() + if(NOT DEFINED MEM_HEADER) + message(FATAL_ERROR "MEM_HEADER not defined") + endif() + + # Use -x assembler-with-cpp to force GCC to preprocess .vcl files as assembly execute_process( - COMMAND /bin/bash -c "sed 's/\\\\/\\\\\\\\/g' ${INPUT} | ${COMPILER} -E -P -w -I${SOURCE_DIR}/vu1 -imacros ${MEM_HEADER} - | sed 's/\\\\\\\\/\\\\/g' > ${OUTPUT}" + COMMAND "${COMPILER}" -E -P -w -x assembler-with-cpp -I"${SOURCE_DIR}/vu1" -imacros "${MEM_HEADER}" "${INPUT}" RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error ) if(NOT result EQUAL 0) - message(FATAL_ERROR "Step 4 preprocessing failed") + message(FATAL_ERROR "C preprocessor failed (exit ${result})\nError: ${error}") endif() + file(WRITE "${OUTPUT}" "${output}") else() message(FATAL_ERROR "Unknown step: ${STEP}") From b74d303fe8c1a0397d4c2dc1fc95c78845893ed0 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 18:12:45 +0200 Subject: [PATCH 07/17] TEMP: add Sony reference VSMs for openvcl validation The 12 sce_*_vcl.vsm files were generated by Sony's proprietary vcl tool from the matching vu1/*.vcl sources. They are committed here as the ground truth against which openvcl output is diffed by the upcoming regression harness. These artifacts are temporary. Once openvcl reaches functional parity with the proprietary pipeline (passing the regression harness for all 13 renderers), this commit should be dropped or squashed out via interactive rebase. Co-Authored-By: Claude Opus 4.7 (1M context) --- vu1/sce_fast_nolights_vcl.vsm | 179 ++++++ vu1/sce_fast_vcl.vsm | 257 +++++++++ vu1/sce_general_nospec_quad_vcl.vsm | 593 ++++++++++++++++++++ vu1/sce_general_nospec_tri_vcl.vsm | 490 +++++++++++++++++ vu1/sce_general_nospec_vcl.vsm | 570 +++++++++++++++++++ vu1/sce_general_pv_diff_quad_vcl.vsm | 714 ++++++++++++++++++++++++ vu1/sce_general_pv_diff_tri_vcl.vsm | 688 +++++++++++++++++++++++ vu1/sce_general_pv_diff_vcl.vsm | 769 ++++++++++++++++++++++++++ vu1/sce_general_quad_vcl.vsm | 794 +++++++++++++++++++++++++++ vu1/sce_general_tri_vcl.vsm | 689 +++++++++++++++++++++++ vu1/sce_general_vcl.vsm | 771 ++++++++++++++++++++++++++ vu1/sce_indexed_vcl.vsm | 718 ++++++++++++++++++++++++ 12 files changed, 7232 insertions(+) create mode 100644 vu1/sce_fast_nolights_vcl.vsm create mode 100644 vu1/sce_fast_vcl.vsm create mode 100644 vu1/sce_general_nospec_quad_vcl.vsm create mode 100644 vu1/sce_general_nospec_tri_vcl.vsm create mode 100644 vu1/sce_general_nospec_vcl.vsm create mode 100644 vu1/sce_general_pv_diff_quad_vcl.vsm create mode 100644 vu1/sce_general_pv_diff_tri_vcl.vsm create mode 100644 vu1/sce_general_pv_diff_vcl.vsm create mode 100644 vu1/sce_general_quad_vcl.vsm create mode 100644 vu1/sce_general_tri_vcl.vsm create mode 100644 vu1/sce_general_vcl.vsm create mode 100644 vu1/sce_indexed_vcl.vsm diff --git a/vu1/sce_fast_nolights_vcl.vsm b/vu1/sce_fast_nolights_vcl.vsm new file mode 100644 index 00000000..80532a0a --- /dev/null +++ b/vu1/sce_fast_nolights_vcl.vsm @@ -0,0 +1,179 @@ +; === __LP__ EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; === ldumb : optimal=12 clid=0 mlid=2 size=(12) +; === normal1 : optimal=12 clid=0 mlid=3 size=(12) +; === vuta : optimal=12 clid=0 mlid=2 size=(12) +; === dUp : optimal=12 clid=0 mlid=3 size=(12) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmFastNoLights_CodeStart + .global vsmFastNoLights_CodeEnd +vsmFastNoLights_CodeStart: +__v_vu1_fast_nolights_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 27 [27 0] 28 [__v_vu1_fast_nolights_pp4_vcl_4] + sub VF07,VF00,VF00 lq.w VF08,60(VI00) + sub VF06,VF00,VF00 lq.w VF01,57(VI00) + sub VF05,VF00,VF00 lq VF02,62(VI00) + maxw.x VF07,VF07,VF00w loi 0x44fff000 ; STALL_LATENCY ?1 + addi.xy VF08,VF00,I loi 0x43000000 + muli.w VF01,VF08,I mr32.z VF08,VF01 + maxw.y VF06,VF06,VF00w NOP + maxw.z VF05,VF05,VF00w lq.xyz VF01,58(VI00) + mulax ACC,VF07,VF02x loi 0x437f0000 + minii.w VF01,VF01,I move.xyz VF08,VF08 + max.w VF08,VF00,VF00 NOP + madday ACC,VF06,VF02y lq VF03,63(VI00) + maddaz ACC,VF05,VF02z NOP + ftoi0 VF01,VF01 NOP + maddw VF02,VF08,VF02w NOP + mulax ACC,VF07,VF03x lq VF04,64(VI00) + madday ACC,VF06,VF03y NOP + maddaz ACC,VF05,VF03z NOP + maddw VF03,VF08,VF03w NOP + mulax ACC,VF07,VF04x lq VF09,65(VI00) + madday ACC,VF06,VF04y NOP + maddaz ACC,VF05,VF04z NOP + maddw VF04,VF08,VF04w NOP + mulax ACC,VF07,VF09x NOP + madday ACC,VF06,VF09y NOP + maddaz[E] ACC,VF05,VF09z NOP + maddw VF05,VF08,VF09w NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 19 [19 0] 19 [main_loop_lid] + NOP xtop VI01 + NOP lq VF06,75(VI00) + NOP ilw.x VI04,0(VI01) + NOP iaddiu VI06,VI01,0x00000001 + NOP iaddiu VI02,VI01,0x00000005 + NOP mtir VI03,VF06x + NOP iadd VI05,VI02,VI04 + NOP ior VI03,VI03,VI04 + NOP iadd VI05,VI05,VI04 + NOP mfir.x VF06,VI03 + NOP iaddiu VI03,VI01,0 + NOP iadd VI04,VI05,VI04 + NOP iaddiu VI05,VI01,0x00000005 + NOP sq VF06,236(VI03) + NOP iaddiu VI07,VI06,0x00000004 + NOP iaddiu VI08,VI00,0x000003ff + NOP iaddiu VI09,VI00,0x00000800 + NOP iaddiu VI10,VI00,0x00000400 + NOP iaddiu VI11,VI00,0x00000020 +adcLoop_lid: +; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] + NOP lq VF06,0(VI06) + ftoi0 VF06,VF06 NOP ; STALL_LATENCY ?3 + NOP mtir VI12,VF06x ; STALL_LATENCY ?3 + NOP iand VI13,VI12,VI10 + NOP NOP + NOP ibeq VI13,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP NOP +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_nolights_pp4_vcl_8] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF06y + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,3(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_nolights_pp4_vcl_9] + NOP iand VI14,VI13,VI08 + NOP mtir VI12,VF06z + NOP iand VI13,VI13,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI12,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI13,3(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_nolights_pp4_vcl_10] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF06w + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,3(VI14) +; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_fast_nolights_pp4_vcl_11] + NOP iand VI12,VI13,VI08 + NOP iaddiu VI06,VI06,0x00000001 + NOP iand VI13,VI13,VI09 + NOP iadd VI12,VI12,VI05 + NOP isw.w VI11,0(VI12) + NOP ibne VI06,VI07,adcLoop_lid + NOP isw.w VI13,3(VI12) +EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT: +; _LNOPT_w=[ ] 8 [12 0] 12 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__ENTRY_POINT] + NOP lq.xyz VF06,0(VI02) + mulax ACC,VF02,VF06x ilw.w VI05,0(VI02) ; STALL_LATENCY ?3 + madday ACC,VF03,VF06y NOP + maddaz ACC,VF04,VF06z sq VF01,238(VI03) + maddw VF06,VF05,VF00w iaddiu VI02,VI02,0x00000003 + NOP iaddiu VI03,VI03,0 + NOP ibeq VI02,VI04,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI1 + NOP div Q,VF00w,VF06w ; STALL_LATENCY ?1 +; _LNOPT_w=[ ] 9 [12 0] 12 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__PRO1] + max.xyz VF08,VF06,VF06 lq.xyz VF10,0(VI02) + mulax ACC,VF02,VF10x sq VF01,241(VI03) ; STALL_LATENCY ?3 + madday ACC,VF03,VF10y lq.xyz VF07,-1(VI02) + maddaz ACC,VF04,VF10z iaddiu VI02,VI02,0x00000003 + maddw VF06,VF05,VF00w iaddiu VI06,VI05,0x00007fff + mulq.xyz VF10,VF08,Q iaddiu VI03,VI03,0x000000f3 + NOP ilw.w VI05,-3(VI02) + NOP ibeq VI02,VI04,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI0 + mulq.xyz VF07,VF07,Q div Q,VF00w,VF06w +EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 12 [12 12] 12 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP] + ftoi4.xyz VF10,VF10 lq.xyz VF08,0(VI02) + NOP mfir.w VF10,VI06 + NOP sq VF01,1(VI03) + max.xyz VF09,VF06,VF06 sq.xyz VF07,-6(VI03) + mulax ACC,VF02,VF08x lq.xyz VF07,-1(VI02) + madday ACC,VF03,VF08y sq VF10,-4(VI03) + maddaz ACC,VF04,VF08z iaddiu VI02,VI02,0x00000003 + maddw VF06,VF05,VF00w iaddiu VI06,VI05,0x00007fff + mulq.xyz VF10,VF09,Q iaddiu VI03,VI03,0x00000003 + NOP ilw.w VI05,-3(VI02) + NOP ibne VI02,VI04,EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__MAIN_LOOP + mulq.xyz VF07,VF07,Q div Q,VF00w,VF06w +EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI0: +; _LNOPT_w=[ ] 12 [13 0] 15 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI0] + NOP NOP + ftoi4.xyz VF10,VF10 NOP + max.xyz VF06,VF06,VF06 mfir.w VF10,VI06 + NOP sq.xyz VF07,-6(VI03) + NOP lq.xyz VF07,-1(VI02) + mulq.xyz VF10,VF06,Q sq VF10,-4(VI03) ; STALL_LATENCY ?1 + mulq.xyz VF07,VF07,Q iaddiu VI06,VI05,0x00007fff ; STALL_LATENCY ?1 + NOP mfir.w VF10,VI06 + ftoi4.xyz VF10,VF10 NOP + NOP sq.xyz VF07,-3(VI03) ; STALL_LATENCY ?1 + NOP b EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EXIT_POINT + NOP sq VF10,-1(VI03) +EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI1: +; _LNOPT_w=[ ] 8 [13 0] 15 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EPI1] + NOP NOP + NOP NOP + max.xyz VF07,VF06,VF06 lq.xyz VF06,-1(VI02) + mulq.xyz VF08,VF07,Q iaddiu VI05,VI05,0x00007fff ; STALL_LATENCY ?3 + mulq.xyz VF07,VF06,Q mfir.w VF06,VI05 + ftoi4.xyz VF06,VF08 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF07,237(VI03) + NOP sq VF06,239(VI03) ; STALL_LATENCY ?2 +EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_fast_nolights_pp4_vcl_adcLoop_done_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_fast_nolights_pp4_vcl_15] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmFastNoLights_CodeEnd: +; iCount=139 +; register stats: +; 16 VU User integer +; 11 VU User floating point diff --git a/vu1/sce_fast_vcl.vsm b/vu1/sce_fast_vcl.vsm new file mode 100644 index 00000000..da5e9417 --- /dev/null +++ b/vu1/sce_fast_vcl.vsm @@ -0,0 +1,257 @@ +; === __LP__ EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; === ldumb : optimal=16 clid=0 mlid=3 size=(16) +; === normal1 : optimal=16 clid=0 mlid=2 size=(16) +; === vuta : optimal=16 clid=0 mlid=3 size=(16) +; === dUp : optimal=16 clid=0 mlid=2 size=(16) +; === normal : optimal=16 clid=0 mlid=3 size=(16) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmFast_CodeStart + .global vsmFast_CodeEnd +vsmFast_CodeStart: +__v_vu1_fast_pp4_vcl_4: +; _LNOPT_w=[ normal ] 10 [10 0] 10 [__v_vu1_fast_pp4_vcl_4] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0 + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + max.xyz VF06,VF00,VF00 lq VF03,64(VI00) + max.xyz VF07,VF00,VF00 lq VF04,65(VI00) + max.xyz VF08,VF00,VF00 lq.xyz VF05,58(VI00) + max.xyz VF09,VF00,VF00 ilw.x VI03,1(VI02) + max.xyz VF10,VF00,VF00 ibeq VI01,VI00,finish_init_lid + max.xyz VF11,VF00,VF00 lq.xyz VF12,59(VI00) +; _LNOPT_w=[ vuta1 ] 12 [9 0] 12 [__v_vu1_fast_pp4_vcl_5] + NOP lq.xyz VF14,67(VI00) + NOP lq.xyz VF06,3(VI03) + NOP lq.xyz VF15,68(VI00) + NOP lq.xyz VF17,0(VI03) + NOP lq.xyz VF09,1(VI03) + NOP lq.xyz VF16,69(VI00) + mulax.xyz ACC,VF14,VF06x lq.xyz VF13,60(VI00) + mul.xyz VF17,VF12,VF17 NOP + madday.xyz ACC,VF15,VF06y isubiu VI04,VI01,0x00000001 + maddz.xyz VF06,VF16,VF06z NOP + mul.xyz VF09,VF09,VF13 ibeq VI04,VI00,finish_init_lid + add.xyz VF05,VF05,VF17 NOP +; _LNOPT_w=[ another ] 10 [14 0] 15 [__v_vu1_fast_pp4_vcl_6] + NOP iaddiu VI01,VI02,0 + NOP ilw.x VI02,2(VI01) + NOP lq.xyz VF07,3(VI02) ; STALL_LATENCY ?3 + NOP lq.xyz VF17,0(VI02) + mulax.xyz ACC,VF14,VF07x lq.xyz VF10,1(VI02) ; STALL_LATENCY ?2 + mul.xyz VF17,VF12,VF17 NOP + madday.xyz ACC,VF15,VF07y isubiu VI04,VI04,0x00000001 + maddz.xyz VF07,VF16,VF07z NOP + mul.xyz VF10,VF10,VF13 ibeq VI04,VI00,finish_init_lid + add.xyz VF05,VF05,VF17 NOP +; _LNOPT_w=[ normal2 ] 10 [14 0] 16 [__v_vu1_fast_pp4_vcl_7] + NOP iaddiu VI01,VI01,0 + NOP ilw.x VI01,3(VI01) + NOP lq.xyz VF08,3(VI01) ; STALL_LATENCY ?3 + NOP lq.xyz VF11,0(VI01) + mulax.xyz ACC,VF14,VF08x NOP ; STALL_LATENCY ?2 + madday.xyz ACC,VF15,VF08y lq.xyz VF14,1(VI01) + mul.xyz VF15,VF12,VF11 NOP + maddz.xyz VF08,VF16,VF08z NOP + mul.xyz VF11,VF14,VF13 NOP ; STALL_LATENCY ?1 + add.xyz VF05,VF05,VF15 NOP +finish_init_lid: +; _LNOPT_w=[ normal2 ] 35 [35 0] 35 [finish_init_lid] + sub VF17,VF00,VF00 lq.xyz VF15,57(VI00) + addy.x VF13,VF00,VF06y NOP + addz.x VF14,VF00,VF06z lq.w VF09,60(VI00) + sub VF18,VF00,VF00 mr32.x VF19,VF08 + mul.xyz VF15,VF12,VF15 lq.w VF11,57(VI00) + sub VF12,VF00,VF00 loi 0x43000000 + muli.w VF10,VF09,I loi 0x44fff000 + addi.xy VF19,VF00,I mr32.w VF09,VF19 + add.xyz VF05,VF05,VF15 mr32.z VF19,VF11 + maxw.x VF17,VF17,VF00w NOP + maxw.y VF18,VF18,VF00w NOP + maxw.z VF12,VF12,VF00w NOP + max.xyz VF19,VF19,VF19 NOP + mulax ACC,VF17,VF01x NOP + madday ACC,VF18,VF01y mr32.y VF08,VF07 + maddaz ACC,VF12,VF01z move.w VF19,VF00 + addx.y VF06,VF00,VF07x loi 0x4b4000ff + addx.z VF06,VF00,VF08x mr32.z VF07,VF09 + maxi.w VF09,VF00,I loi 0x437f0000 + minii.w VF10,VF10,I loi 0x4b400000 + addi.xyz VF13,VF05,I move.x VF07,VF13 + maddw VF14,VF19,VF01w move.x VF08,VF14 + mulax ACC,VF17,VF02x NOP + addi.w VF05,VF10,I NOP + madday ACC,VF18,VF02y NOP + maddaz ACC,VF12,VF02z NOP + maddw VF15,VF19,VF02w NOP + mulax ACC,VF17,VF03x NOP + madday ACC,VF18,VF03y NOP + maddaz ACC,VF12,VF03z NOP + maddw VF16,VF19,VF03w NOP + mulax ACC,VF17,VF04x NOP + madday ACC,VF18,VF04y NOP + maddaz[E] ACC,VF12,VF04z NOP + maddw VF17,VF19,VF04w NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 19 [19 0] 19 [main_loop_lid] + NOP xtop VI01 + NOP lq VF01,75(VI00) + NOP ilw.x VI04,0(VI01) + NOP iaddiu VI06,VI01,0x00000001 + NOP iaddiu VI02,VI01,0x00000005 + NOP mtir VI03,VF01x + NOP iadd VI05,VI02,VI04 + NOP ior VI03,VI03,VI04 + NOP iadd VI05,VI05,VI04 + NOP mfir.x VF01,VI03 + NOP iaddiu VI03,VI01,0 + NOP iadd VI04,VI05,VI04 + NOP iaddiu VI05,VI01,0x00000005 + NOP sq VF01,236(VI03) + NOP iaddiu VI07,VI06,0x00000004 + NOP iaddiu VI08,VI00,0x000003ff + NOP iaddiu VI09,VI00,0x00000800 + NOP iaddiu VI10,VI00,0x00000400 + NOP iaddiu VI11,VI00,0x00000020 +adcLoop_lid: +; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] + NOP lq VF01,0(VI06) + ftoi0 VF01,VF01 NOP ; STALL_LATENCY ?3 + NOP mtir VI12,VF01x ; STALL_LATENCY ?3 + NOP iand VI13,VI12,VI10 + NOP NOP + NOP ibeq VI13,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP NOP +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_pp4_vcl_12] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF01y + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,3(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_pp4_vcl_13] + NOP iand VI14,VI13,VI08 + NOP mtir VI12,VF01z + NOP iand VI13,VI13,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI12,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI13,3(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_pp4_vcl_14] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF01w + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,3(VI14) +; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_fast_pp4_vcl_15] + NOP iand VI12,VI13,VI08 + NOP iaddiu VI06,VI06,0x00000001 + NOP iand VI13,VI13,VI09 + NOP iadd VI12,VI12,VI05 + NOP isw.w VI11,0(VI12) + NOP ibne VI06,VI07,adcLoop_lid + NOP isw.w VI13,3(VI12) +EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT: +; _LNOPT_w=[ ] 11 [16 0] 16 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__ENTRY_POINT] + NOP lq.xyz VF05,1(VI02) + mulax.xyz ACC,VF06,VF05x NOP ; STALL_LATENCY ?3 + madday.xyz ACC,VF07,VF05y lq.xyz VF01,0(VI02) + maddz.xyz VF05,VF08,VF05z NOP + mulax ACC,VF14,VF01x NOP ; STALL_LATENCY ?2 + max.xyz VF05,VF05,VF00 NOP + madday ACC,VF15,VF01y NOP + maddaz ACC,VF16,VF01z iaddiu VI02,VI02,0x00000003 + maddw VF01,VF17,VF00w NOP + mulax.xyz ACC,VF09,VF05x ibeq VI02,VI04,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI1 + madday.xyz ACC,VF10,VF05y iaddiu VI03,VI03,0 +; _LNOPT_w=[ ] 15 [16 0] 16 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__PRO1] + maddz.xyz VF03,VF11,VF05z lq.xyz VF05,1(VI02) + NOP div Q,VF00w,VF01w + NOP lq.xyz VF12,0(VI02) + mulax.xyz ACC,VF06,VF05x NOP ; STALL_LATENCY ?1 + madday.xyz ACC,VF07,VF05y NOP + maddz.xyz VF05,VF08,VF05z NOP + mulax ACC,VF14,VF12x NOP + madday ACC,VF15,VF12y ilw.w VI05,-3(VI02) + maddaz ACC,VF16,VF12z move.xyz VF12,VF01 + max.xyz VF05,VF05,VF00 iaddiu VI03,VI03,0x000000f3 + maddw VF01,VF17,VF00w iaddiu VI02,VI02,0x00000003 + add.xyz VF03,VF03,VF13 iaddiu VI05,VI05,0x00007fff + mulq.xyz VF04,VF12,Q mfir.w VF03,VI05 + mulax.xyz ACC,VF09,VF05x ibeq VI02,VI04,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI0 + madday.xyz ACC,VF10,VF05y lq.xyz VF12,-4(VI02) +EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; _LPOPT_w=[ normal1 ] 16 [16 16] 16 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP] + maddz.xyz VF02,VF11,VF05z lq.xyz VF18,1(VI02) + miniw.xyz VF05,VF03,VF09w iaddiu VI03,VI03,0x00000003 + ftoi4.xyz VF03,VF04 NOP + mulq.xyz VF04,VF12,Q lq.xyz VF12,0(VI02) + mulax.xyz ACC,VF06,VF18x div Q,VF00w,VF01w + madday.xyz ACC,VF07,VF18y sq VF05,-8(VI03) + maddz.xyz VF05,VF08,VF18z NOP + mulax ACC,VF14,VF12x ilw.w VI05,-3(VI02) + madday ACC,VF15,VF12y iaddiu VI02,VI02,0x00000003 + maddaz ACC,VF16,VF12z move.xyz VF12,VF01 + max.xyz VF05,VF05,VF00 sq.xyz VF04,-9(VI03) + maddw VF01,VF17,VF00w iaddiu VI05,VI05,0x00007fff + add.xyz VF03,VF02,VF13 sq VF03,-7(VI03) + mulq.xyz VF04,VF12,Q mfir.w VF03,VI05 + mulax.xyz ACC,VF09,VF05x ibne VI02,VI04,EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__MAIN_LOOP + madday.xyz ACC,VF10,VF05y lq.xyz VF12,-4(VI02) +EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI0: +; _LNOPT_w=[ ] 16 [16 0] 18 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI0] + maddz.xyz VF02,VF11,VF05z div Q,VF00w,VF01w + miniw.xyz VF05,VF03,VF09w ilw.w VI04,-3(VI02) + ftoi4.xyz VF03,VF04 NOP + mulq.xyz VF04,VF12,Q move.xyz VF12,VF01 + NOP sq VF05,-5(VI03) ; STALL_LATENCY ?1 + add.xyz VF03,VF02,VF13 sq VF03,-4(VI03) + mulq.xyz VF04,VF12,Q sq.xyz VF04,-6(VI03) + NOP iaddiu VI04,VI04,0x00007fff + NOP lq.xyz VF12,-1(VI02) + miniw.xyz VF05,VF03,VF09w mfir.w VF03,VI04 + ftoi4.xyz VF03,VF04 NOP + mulq.xyz VF04,VF12,Q NOP ; STALL_LATENCY ?1 + NOP sq VF05,-2(VI03) + NOP sq VF03,-1(VI03) + NOP b EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EXIT_POINT + NOP sq.xyz VF04,-3(VI03) +EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI1: +; _LNOPT_w=[ ] 13 [16 0] 18 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EPI1] + NOP NOP + maddz.xyz VF05,VF11,VF05z div Q,VF00w,VF01w + NOP move.xyz VF01,VF01 + add.xyz VF05,VF05,VF13 lq.xyz VF03,-1(VI02) ; STALL_LATENCY ?2 + NOP ilw.w VI04,-3(VI02) + mulq.xyz VF02,VF01,Q waitq ; STALL_LATENCY ?1 + NOP iaddiu VI04,VI04,0x00007fff ; STALL_LATENCY ?1 + miniw.xyz VF05,VF05,VF09w mfir.w VF01,VI04 + ftoi4.xyz VF01,VF02 NOP + mulq.xyz VF02,VF03,Q NOP + NOP sq VF05,238(VI03) ; STALL_LATENCY ?1 + NOP sq VF01,239(VI03) + NOP sq.xyz VF02,237(VI03) +EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_fast_pp4_vcl_adcLoop_done_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_fast_pp4_vcl_19] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmFast_CodeEnd: +; iCount=211 +; register stats: +; 16 VU User integer +; 20 VU User floating point diff --git a/vu1/sce_general_nospec_quad_vcl.vsm b/vu1/sce_general_nospec_quad_vcl.vsm new file mode 100644 index 00000000..8519718f --- /dev/null +++ b/vu1/sce_general_nospec_quad_vcl.vsm @@ -0,0 +1,593 @@ +; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === normal1 : optimal=46 clid=1 mlid=3 size=(47) +; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=8 clid=5 mlid=5 size=(8) +; === ldumb : optimal=8 clid=0 mlid=4 size=(8) +; === normal1 : optimal=8 clid=0 mlid=4 size=(8) +; === hDown : optimal=8 clid=0 mlid=4 size=(8) +; === vuta : optimal=8 clid=0 mlid=4 size=(8) +; === dUp : optimal=8 clid=0 mlid=4 size=(8) +; === normal : optimal=8 clid=0 mlid=4 size=(8) +; === another : optimal=8 clid=0 mlid=4 size=(8) +; === vuta1 : optimal=8 clid=0 mlid=6 size=(8) +; === normal2 : optimal=8 clid=0 mlid=6 size=(8) +; === dumb2 : optimal=8 clid=4 mlid=6 size=(8) +; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=21 clid=3 mlid=3 size=(26) +; === ldumb : optimal=21 clid=0 mlid=3 size=(26) +; === normal1 : optimal=21 clid=0 mlid=3 size=(26) +; === hDown : optimal=21 clid=0 mlid=3 size=(26) +; === vuta : optimal=21 clid=0 mlid=2 size=(26) +; === dUp : optimal=21 clid=0 mlid=3 size=(26) +; === normal : optimal=21 clid=0 mlid=3 size=(26) +; === another : optimal=21 clid=0 mlid=3 size=(26) +; === vuta1 : optimal=21 clid=0 mlid=2 size=(26) +; === normal2 : optimal=21 clid=0 mlid=3 size=(26) +; === dumb2 : optimal=21 clid=3 mlid=3 size=(26) +; === __LP__ EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralNoSpecQuad_CodeStart + .global vsmGeneralNoSpecQuad_CodeEnd +vsmGeneralNoSpecQuad_CodeStart: +__v_vu1_general_nospec_quad_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_nospec_quad_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ vuta1 ] 45 [45 0] 45 [main_loop_lid] + NOP xtop VI05 + NOP lq.xyz VF09,57(VI00) + NOP lq.xyz VF08,59(VI00) + NOP iaddiu VI03,VI05,0x00000005 + NOP lq.xyz VF20,0(VI03) + NOP lq.w VF05,57(VI00) + NOP loi 0x44fff000 + mul.xyz VF09,VF09,VF08 iaddiu VI04,VI05,0x000000ed + mulax ACC,VF01,VF20x ilw.x VI05,0(VI05) + madday ACC,VF02,VF20y lq.xyz VF08,6(VI03) + maddaz ACC,VF03,VF20z mr32.z VF05,VF05 + maddw VF20,VF04,VF00w lq VF06,75(VI00) + addi.xy VF05,VF00,I lq.xyz VF14,9(VI03) + mulax ACC,VF01,VF08x loi 0x45000000 + madday ACC,VF02,VF08y ilw.w VI07,0(VI00) + maddaz ACC,VF03,VF08z div Q,VF00w,VF20w + maddw VF16,VF04,VF00w iadd VI06,VI03,VI05 + mulax ACC,VF01,VF14x lq.xyz VF13,3(VI03) + madday ACC,VF02,VF14y mtir VI08,VF06x + maddaz ACC,VF03,VF14z ior VI08,VI08,VI05 + maddw VF14,VF04,VF00w mfir.x VF06,VI08 + mulax ACC,VF01,VF13x iadd VI06,VI06,VI05 + madday ACC,VF02,VF13y iadd VI06,VI06,VI05 + maddaz ACC,VF03,VF13z lq.xyz VF07,58(VI00) + maddw VF13,VF04,VF00w sq VF06,-1(VI04) + NOP iaddiu VI08,VI00,0x00007fff + NOP iaddiu VI08,VI08,0x00000001 + NOP ilw.w VI02,76(VI00) + NOP div Q,VF00w,VF13w + add.xyz VF09,VF07,VF09 lq.xyz VF07,7(VI03) + NOP lq.xyz VF15,2(VI03) + mulq.xyz VF08,VF20,Q lq.xyz VF06,76(VI00) + NOP fcset 0 + maxi.w VF07,VF00,I lq.xyz VF17,10(VI03) + mulq.xyz VF15,VF15,Q sq.xyz VF07,10(VI03) + mul.xyz VF10,VF08,VF06 div Q,VF00w,VF16w + NOP lq.xyz VF07,5(VI03) + mulq.xyz VF13,VF13,Q iaddiu VI01,VI03,0 + add.xyz VF11,VF08,VF05 sq.xyz VF17,7(VI03) + clipw.xyz VF10xyz,VF07w lq.xyz VF17,11(VI03) + mulq.xyz VF10,VF07,Q lq.xyz VF20,8(VI03) + sub.xyz VF12,VF08,VF13 iaddiu VI03,VI01,0x0000000c + mul.xyz VF21,VF13,VF06 div Q,VF00w,VF14w + add.xyz VF08,VF13,VF05 ibeq VI03,VI06,EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI1 + mulq.xyz VF16,VF16,Q lq.w VF08,0(VI00) +; _LNOPT_w=[ ] 47 [45 0] 47 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__PRO1] + mulq.xyz VF19,VF20,Q NOP + NOP NOP + NOP NOP + clipw.xyz VF21xyz,VF07w NOP + mulq.xyz VF17,VF17,Q lq.xyz VF20,0(VI03) + mulq.xyz VF18,VF14,Q NOP + sub.xyz VF13,VF16,VF13 NOP + add.xyz VF07,VF16,VF05 NOP + mulax ACC,VF01,VF20x NOP + madday ACC,VF02,VF20y lq.xyz VF22,6(VI03) + maddaz ACC,VF03,VF20z NOP + maddw VF20,VF04,VF00w iaddiu VI01,VI03,0 + mul.xyz VF21,VF16,VF06 iaddiu VI09,VI04,0 + mulax ACC,VF01,VF22x lq.xyz VF14,9(VI03) + madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) + maddaz ACC,VF03,VF22z div Q,VF00w,VF20w + maddw VF16,VF04,VF00w lq.xyz VF23,7(VI03) + mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) + madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) + mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) + maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) + maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) + mulq.xyz VF22,VF20,Q lq.xyz VF17,11(VI03) + mulax ACC,VF01,VF24x lq.xyz VF23,5(VI03) + madday ACC,VF02,VF24y lq.xyz VF25,2(VI03) + maddaz ACC,VF03,VF24z sq.xyz VF19,9(VI04) + maddw VF24,VF04,VF00w sq.xyz VF10,3(VI04) + ftoi4.xyz VF11,VF11 sq.xyz VF09,4(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF09,7(VI04) + mul.xyz VF19,VF22,VF06 sq.xyz VF09,1(VI04) + clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w + mulw.xyz VF10,VF12,VF08w mfir.w VF11,VI08 + add.xyz VF12,VF18,VF05 lq.xyz VF20,8(VI03) + clipw.xyz VF19xyz,VF07w iaddiu VI03,VI01,0x0000000c + mulq.xyz VF15,VF25,Q fcand VI01,16777215 + opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,10(VI04) + opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq VF11,2(VI04) + ftoi4.xyz VF18,VF08 div Q,VF00w,VF16w + mulq.xyz VF13,VF24,Q mfir.w VF18,VI08 + ftoi4.xyz VF19,VF12 iand VI01,VI01,VI02 + mulq.xyz VF10,VF23,Q fmand VI10,VI07 + add.xyz VF11,VF22,VF05 ior VI01,VI01,VI10 + sub.xyz VF12,VF22,VF13 iaddiu VI01,VI01,0x00007fff + mul.xyz VF21,VF13,VF06 mfir.w VF19,VI01 + add.xyz VF08,VF13,VF05 div Q,VF00w,VF14w + ftoi4.xyz VF22,VF07 ibeq VI03,VI06,EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI0 + mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 +EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ normal1 ] 47 [45 45] 47 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP] + NOP sq VF19,8(VI04) + mulq.xyz VF19,VF20,Q sq VF18,5(VI04) + ftoi4.xyz VF11,VF11 NOP + sub.xyz VF13,VF16,VF13 sq VF22,11(VI04) + mulq.xyz VF17,VF17,Q iaddiu VI04,VI09,0x0000000c + add.xyz VF07,VF16,VF05 lq.xyz VF20,0(VI03) + mulq.xyz VF18,VF14,Q NOP + clipw.xyz VF21xyz,VF07w lq.xyz VF22,6(VI03) + mul.xyz VF21,VF16,VF06 iaddiu VI01,VI03,0 + mulax ACC,VF01,VF20x iaddiu VI09,VI04,0 + madday ACC,VF02,VF20y NOP + maddaz ACC,VF03,VF20z lq.xyz VF14,9(VI03) + maddw VF20,VF04,VF00w NOP + mulax ACC,VF01,VF22x lq.xyz VF23,7(VI03) + madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) + maddaz ACC,VF03,VF22z NOP + maddw VF16,VF04,VF00w div Q,VF00w,VF20w + mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) + madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) + mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) + maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) + maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) + mulax ACC,VF01,VF24x lq.xyz VF17,11(VI03) + mulq.xyz VF22,VF20,Q lq.xyz VF23,5(VI03) + madday ACC,VF02,VF24y lq.xyz VF20,8(VI03) + maddaz ACC,VF03,VF24z lq.xyz VF25,2(VI03) + maddw VF24,VF04,VF00w iaddiu VI03,VI01,0x0000000c + mul.xyz VF19,VF22,VF06 sq.xyz VF19,9(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF10,3(VI04) + mulw.xyz VF10,VF12,VF08w sq.xyz VF09,4(VI04) + clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w + clipw.xyz VF19xyz,VF07w sq.xyz VF09,7(VI04) + add.xyz VF12,VF18,VF05 sq.xyz VF09,1(VI04) + mulq.xyz VF15,VF25,Q mfir.w VF11,VI08 + opmula.xyz ACCxyz,VF10xyz,VF13xyz fcand VI01,16777215 + ftoi4.xyz VF18,VF08 iand VI01,VI01,VI02 + opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,10(VI04) + mulq.xyz VF13,VF24,Q div Q,VF00w,VF16w + mulq.xyz VF10,VF23,Q sq VF11,2(VI04) + add.xyz VF11,VF22,VF05 mfir.w VF18,VI08 + ftoi4.xyz VF19,VF12 fmand VI10,VI07 + sub.xyz VF12,VF22,VF13 ior VI01,VI01,VI10 + mul.xyz VF21,VF13,VF06 iaddiu VI01,VI01,0x00007fff + add.xyz VF08,VF13,VF05 mfir.w VF19,VI01 + NOP div Q,VF00w,VF14w + ftoi4.xyz VF22,VF07 ibne VI03,VI06,EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP + mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 +EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 32 [26 0] 32 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI0] + mulq.xyz VF19,VF20,Q sq VF19,8(VI04) + NOP NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF17,Q mfir.w VF11,VI08 + NOP NOP + mulq.xyz VF18,VF14,Q sq VF18,5(VI04) + NOP NOP + NOP NOP + clipw.xyz VF21xyz,VF07w sq VF22,11(VI04) + sub.xyz VF13,VF16,VF13 iaddiu VI04,VI09,0 + mul.xyz VF15,VF18,VF06 sq.xyz VF15,12(VI04) + mul.xyz VF21,VF16,VF06 sq.xyz VF19,21(VI04) + mulw.xyz VF10,VF12,VF08w sq.xyz VF10,15(VI04) + NOP sq.xyz VF17,18(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF09,16(VI04) + clipw.xyz VF21xyz,VF07w sq.xyz VF09,19(VI04) + opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,13(VI04) + opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,22(VI04) + NOP mfir.w VF18,VI08 + add.xyz VF12,VF18,VF05 fcand VI01,16777215 + ftoi4.xyz VF11,VF11 iand VI02,VI01,VI02 + add.xyz VF07,VF16,VF05 fmand VI07,VI07 + ftoi4.xyz VF18,VF08 ior VI02,VI02,VI07 + ftoi4.xyz VF19,VF12 iaddiu VI02,VI02,0x00007fff + NOP mfir.w VF19,VI02 + ftoi4.xyz VF22,VF07 sq VF11,14(VI04) + NOP mfir.w VF22,VI02 + NOP sq VF18,17(VI04) + NOP sq VF19,20(VI04) + NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EXIT_POINT + NOP sq VF22,23(VI04) +EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI1: +; _LNOPT_w=[ ] 29 [22 0] 29 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EPI1] + mulq.xyz VF20,VF20,Q NOP + NOP NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF17,Q NOP + mulq.xyz VF14,VF14,Q NOP + ftoi4.xyz VF11,VF11 NOP + sub.xyz VF13,VF16,VF13 NOP + mulw.xyz VF10,VF12,VF08w sq.xyz VF10,3(VI04) + clipw.xyz VF21xyz,VF07w sq.xyz VF15,0(VI04) + mul.xyz VF15,VF14,VF06 sq.xyz VF09,4(VI04) + mul.xyz VF21,VF16,VF06 sq.xyz VF09,7(VI04) + opmula.xyz ACCxyz,VF10xyz,VF13xyz mfir.w VF11,VI08 + opmsub.xyz VF18xyz,VF13xyz,VF10xyz sq.xyz VF20,9(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF09,1(VI04) + clipw.xyz VF21xyz,VF07w sq.xyz VF09,10(VI04) + NOP sq VF11,2(VI04) + abs.xyz VF00,VF18 mfir.w VF08,VI08 + add.xyz VF12,VF14,VF05 fmand VI07,VI07 + add.xyz VF07,VF16,VF05 fcand VI01,16777215 + NOP iand VI02,VI01,VI02 + ftoi4.xyz VF08,VF08 ior VI02,VI02,VI07 + ftoi4.xyz VF11,VF12 iaddiu VI02,VI02,0x00007fff + ftoi4.xyz VF07,VF07 mfir.w VF11,VI02 + NOP mfir.w VF07,VI02 + NOP sq.xyz VF17,6(VI04) + NOP sq VF08,5(VI04) + NOP sq VF11,8(VI04) + NOP sq VF07,11(VI04) +EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 6 [6 0] 6 [EXPL_vu1_general_nospec_quad_pp4_vcl_xform_loop_lid__EXIT_POINT] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,60(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_quad_pp4_vcl_9] + maxw.z VF07,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ normal2 ] 17 [17 0] 17 [dir_light_loop_lid] + NOP xtop VI06 + NOP ilw.x VI05,0(VI06) + NOP lq.xyz VF10,67(VI00) + NOP lq.xyz VF08,3(VI03) + NOP lq.xyz VF11,68(VI00) + NOP lq.xyz VF09,69(VI00) + NOP iaddiu VI04,VI06,0x00000005 + mulax.xyz ACC,VF10,VF08x iadd VI07,VI04,VI05 + madday.xyz ACC,VF11,VF08y iadd VI07,VI07,VI05 + maddz.xyz VF10,VF09,VF08z lq.xyz VF17,1(VI04) + NOP iaddiu VI06,VI06,0 + NOP iadd VI07,VI07,VI05 + NOP lq.xyz VF08,0(VI03) + mul.xyz VF17,VF10,VF17 iaddiu VI04,VI04,0x00000003 + NOP lq.xyz VF09,1(VI03) + NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI3 + NOP iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__PRO1] + adday.z ACC,VF17,VF17y lq.xyz VF12,1(VI04) + maddx.z VF11,VF07,VF17x NOP + mul.xyz VF17,VF10,VF12 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 + NOP NOP + NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2 + maxx.z VF11,VF11,VF00x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__PRO2] + adday.z ACC,VF17,VF17y lq.xyz VF13,1(VI04) + maddx.z VF12,VF07,VF17x NOP + mul.xyz VF17,VF10,VF13 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 + NOP NOP + mulz.xyz VF13,VF09,VF11z ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1 + maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 7 [7 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__PRO3] + adday.z ACC,VF17,VF17y lq.xyz VF14,1(VI04) + maddx.z VF12,VF07,VF17x NOP + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF17,VF10,VF14 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 + mulz.xyz VF13,VF09,VF11z lq.xyz VF15,238(VI06) + madd.xyz VF14,VF08,VF05 ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0 + maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0x000000f9 +EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 8 [8 8] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + adday.z ACC,VF17,VF17y lq.xyz VF18,1(VI04) + maddx.z VF12,VF07,VF17x NOP + add.xyz VF16,VF15,VF14 iaddiu VI06,VI06,0x00000003 + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF17,VF10,VF18 iaddiu VI04,VI04,0x00000003 + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-11(VI06) + mulz.xyz VF13,VF09,VF11z ibne VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + maxx.z VF11,VF12,VF00x sq.xyz VF16,-14(VI06) +EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 19 [23 0] 25 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + add.xyz VF10,VF15,VF14 NOP + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-8(VI06) + mulz.xyz VF13,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP + add.xyz VF17,VF15,VF14 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-5(VI06) + mulz.xyz VF13,VF09,VF11z NOP + NOP sq.xyz VF17,-8(VI06) + add.xyz VF09,VF15,VF14 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-2(VI06) + NOP sq.xyz VF09,-5(VI06) ; STALL_LATENCY ?1 + add.xyz VF08,VF15,VF14 sq.xyz VF10,-11(VI06) ; STALL_LATENCY ?1 + NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 18 [23 0] 23 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + mula.xyz ACC,VF13,VF06 NOP + mulz.xyz VF13,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 + madd.xyz VF12,VF08,VF05 lq.xyz VF10,238(VI06) + mula.xyz ACC,VF13,VF06 NOP + mulz.xyz VF13,VF09,VF11z NOP ; STALL_LATENCY ?1 + add.xyz VF12,VF10,VF12 NOP + madd.xyz VF10,VF08,VF05 lq.xyz VF17,241(VI06) + mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?1 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,244(VI06) + NOP sq.xyz VF12,238(VI06) + add.xyz VF11,VF17,VF10 NOP + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?1 + NOP sq.xyz VF11,241(VI06) ; STALL_LATENCY ?1 + NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,244(VI06) +EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 15 [23 0] 23 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + NOP NOP + mulz.xyz VF10,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF06 NOP ; STALL_LATENCY ?1 + mulz.xyz VF09,VF09,VF11z NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF08,VF05 lq.xyz VF17,238(VI06) + mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?2 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,241(VI06) + add.xyz VF11,VF17,VF10 NOP + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,238(VI06) + NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,241(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI3: +; _LNOPT_w=[ ] 8 [23 0] 23 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EPI3] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + maxx.z VF17,VF17,VF00x NOP ; STALL_LATENCY ?3 + mulz.xyz VF09,VF09,VF17z NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?3 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,238(VI06) + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_quad_pp4_vcl_15] + maxw.z VF07,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ normal2 ] 25 [32 0] 36 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,71(VI00) + NOP lq.xyz VF09,3(VI03) + NOP lq.xyz VF08,72(VI00) + NOP lq.xyz VF11,73(VI00) + NOP lq.xyz VF10,74(VI00) + mulax.xyz ACC,VF12,VF09x NOP + madday.xyz ACC,VF08,VF09y xtop VI06 + maddaz.xyz ACC,VF11,VF09z iaddiu VI04,VI06,0x00000005 + maddw.xyz VF11,VF10,VF00w lq.xyz VF12,0(VI04) + sub.xyz VF12,VF11,VF12 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF13,VF12,VF12 iaddiu VI06,VI06,0 ; STALL_LATENCY ?3 + NOP iaddiu VI06,VI06,0 + adday.z ACC,VF13,VF13y lq.xyz VF08,0(VI03) ; STALL_LATENCY ?2 + maddx.z VF13,VF07,VF13x lq.xyz VF09,1(VI03) + NOP sqrt Q,VF13z ; STALL_LATENCY ?3 + NOP NOP + NOP iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP iaddiu VI04,VI04,0x00000003 + addw.x VF13,VF00,VF00w lq.xyz VF10,5(VI03) + addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1 + NOP NOP +; _LNOPT_w=[ ] 20 [25 0] 26 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__PRO1] + NOP lq.xyz VF15,0(VI04) + NOP NOP + mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y + sub.xyz VF12,VF11,VF15 move.xyz VF15,VF12 ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF14x iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 + mul.xyz VF13,VF12,VF12 NOP ; STALL_LATENCY ?1 + NOP waitq + mulq.xyz VF15,VF15,Q lq.xyz VF16,-5(VI04) + adday.z ACC,VF13,VF13y NOP ; STALL_LATENCY ?1 + maddx.z VF13,VF07,VF13x NOP + mul.xyz VF15,VF15,VF16 NOP + madday.w ACC,VF00,VF14y sqrt Q,VF13z ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF14z NOP + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + maddz.w VF06,VF00,VF15z NOP + NOP NOP + addw.x VF13,VF00,VF00w NOP + addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0 + maxx.w VF06,VF06,VF00x iaddiu VI06,VI06,0x000000f3 +EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 26 [25 21] 26 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + NOP lq.xyz VF16,0(VI04) + NOP NOP + mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y + mulw.xyz VF13,VF09,VF06w iaddiu VI06,VI06,0x00000003 + sub.xyz VF12,VF11,VF16 move.xyz VF16,VF12 + NOP NOP + NOP NOP + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF13,VF12,VF12 NOP + mulq.xyz VF16,VF16,Q div Q,VF00w,VF05w + madd.xyz VF15,VF08,VF05 lq.xyz VF17,-2(VI04) + mulax.w ACC,VF00,VF14x NOP + adday.z ACC,VF13,VF13y NOP + maddx.z VF13,VF07,VF13x NOP + mul.xyz VF16,VF16,VF17 NOP + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z lq.xyz VF14,-8(VI06) + mulq.xyz VF15,VF15,Q sqrt Q,VF13z + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + maddz.w VF06,VF00,VF16z NOP + add.xyz VF14,VF14,VF15 NOP + NOP iaddiu VI04,VI04,0x00000003 + addw.x VF13,VF00,VF00w NOP + addq.y VF13,VF00,Q ibne VI04,VI07,EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + maxx.w VF06,VF06,VF00x sq.xyz VF14,-8(VI06) +EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 27 [43 0] 45 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0] + NOP NOP + NOP NOP + mul.xyz VF10,VF13,VF10 div Q,VF00w,VF13y + mulw.xyz VF13,VF09,VF06w move.xyz VF12,VF12 + mulax.w ACC,VF00,VF10x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF10y NOP + NOP NOP + mulq.xyz VF12,VF12,Q lq.xyz VF11,-2(VI04) + mul.xyz VF12,VF12,VF11 NOP ; STALL_LATENCY ?3 + maddz.w VF05,VF00,VF10z div Q,VF00w,VF05w + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF13,VF08,VF05 NOP + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF06,VF00,VF12z NOP + NOP div Q,VF00w,VF05w ; STALL_THRUPUT ?1 + mulq.xyz VF13,VF13,Q lq.xyz VF10,-5(VI06) + maxx.w VF06,VF06,VF00x NOP + add.xyz VF10,VF10,VF13 NOP ; STALL_LATENCY ?2 + mulw.xyz VF13,VF09,VF06w NOP + NOP sq.xyz VF10,-5(VI06) ; STALL_LATENCY ?2 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF08,VF08,VF05 NOP + mulq.xyz VF08,VF08,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 + add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 20 [43 0] 45 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1] + NOP NOP + NOP NOP + mul.xyz VF13,VF13,VF10 div Q,VF00w,VF13y + NOP move.xyz VF12,VF12 + mulax.w ACC,VF00,VF13x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF13y NOP + maddz.w VF05,VF00,VF13z NOP + mulq.xyz VF12,VF12,Q lq.xyz VF10,-2(VI04) + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?2 + mul.xyz VF12,VF12,VF10 NOP + mulax.w ACC,VF00,VF12x NOP ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF12y NOP + maddz.w VF06,VF00,VF12z NOP + maxx.w VF06,VF06,VF00x NOP ; STALL_LATENCY ?3 + mulw.xyz VF13,VF09,VF06w NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?3 + madd.xyz VF08,VF08,VF05 NOP + mulq.xyz VF08,VF08,Q lq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_nospec_quad_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_nospec_quad_pp4_vcl_23] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralNoSpecQuad_CodeEnd: +; iCount=485 +; register stats: +; 11 VU User integer +; 26 VU User floating point diff --git a/vu1/sce_general_nospec_tri_vcl.vsm b/vu1/sce_general_nospec_tri_vcl.vsm new file mode 100644 index 00000000..e95d85f9 --- /dev/null +++ b/vu1/sce_general_nospec_tri_vcl.vsm @@ -0,0 +1,490 @@ +; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === hDown : optimal=35 clid=0 mlid=2 size=(36) +; === dUp : optimal=35 clid=0 mlid=1 size=(36) +; === normal : optimal=35 clid=0 mlid=1 size=(36) +; === another : optimal=35 clid=0 mlid=2 size=(36) +; === normal2 : optimal=35 clid=0 mlid=2 size=(36) +; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=8 clid=5 mlid=5 size=(8) +; === ldumb : optimal=8 clid=0 mlid=4 size=(8) +; === normal1 : optimal=8 clid=0 mlid=4 size=(8) +; === hDown : optimal=8 clid=0 mlid=4 size=(8) +; === vuta : optimal=8 clid=0 mlid=4 size=(8) +; === dUp : optimal=8 clid=0 mlid=4 size=(8) +; === normal : optimal=8 clid=0 mlid=4 size=(8) +; === another : optimal=8 clid=0 mlid=4 size=(8) +; === vuta1 : optimal=8 clid=0 mlid=6 size=(8) +; === normal2 : optimal=8 clid=0 mlid=6 size=(8) +; === dumb2 : optimal=8 clid=4 mlid=6 size=(8) +; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=21 clid=3 mlid=3 size=(26) +; === ldumb : optimal=21 clid=0 mlid=3 size=(26) +; === normal1 : optimal=21 clid=0 mlid=3 size=(26) +; === hDown : optimal=21 clid=0 mlid=3 size=(26) +; === vuta : optimal=21 clid=0 mlid=2 size=(26) +; === dUp : optimal=21 clid=0 mlid=3 size=(26) +; === normal : optimal=21 clid=0 mlid=3 size=(26) +; === another : optimal=21 clid=0 mlid=3 size=(26) +; === vuta1 : optimal=21 clid=0 mlid=2 size=(26) +; === normal2 : optimal=21 clid=0 mlid=3 size=(26) +; === dumb2 : optimal=21 clid=3 mlid=3 size=(26) +; === __LP__ EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralNoSpecTri_CodeStart + .global vsmGeneralNoSpecTri_CodeEnd +vsmGeneralNoSpecTri_CodeStart: +__v_vu1_general_nospec_tri_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_nospec_tri_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ vuta1 ] 48 [48 0] 48 [main_loop_lid] + NOP lq.w VF05,57(VI00) + NOP loi 0x44fff000 + NOP xtop VI05 + NOP iaddiu VI03,VI05,0x00000005 + NOP mr32.z VF05,VF05 + NOP iaddiu VI04,VI05,0x000000ed + NOP ilw.x VI05,0(VI05) + NOP lq.xyz VF09,57(VI00) + NOP lq.xyz VF08,59(VI00) + NOP lq VF06,75(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP ilw.w VI07,0(VI00) + mul.xyz VF09,VF09,VF08 lq.xyz VF08,3(VI03) + NOP iadd VI06,VI03,VI05 + NOP iadd VI06,VI06,VI05 + NOP mtir VI08,VF06x + mulax ACC,VF01,VF08x ior VI08,VI08,VI05 + madday ACC,VF02,VF08y mfir.x VF06,VI08 + maddaz ACC,VF03,VF08z iadd VI06,VI06,VI05 + maddw VF15,VF04,VF00w lq.xyz VF14,0(VI03) + NOP lq.xyz VF07,58(VI00) + NOP sq VF06,-1(VI04) + NOP iaddiu VI08,VI00,0x00007fff + mulax ACC,VF01,VF14x div Q,VF00w,VF15w + madday ACC,VF02,VF14y lq.xyz VF08,6(VI03) + maddaz ACC,VF03,VF14z iaddiu VI08,VI08,0x00000001 + maddw VF14,VF04,VF00w ilw.w VI02,76(VI00) + add.xyz VF09,VF07,VF09 lq.xyz VF06,76(VI00) + mulax ACC,VF01,VF08x lq.xyz VF07,5(VI03) + madday ACC,VF02,VF08y lq.w VF08,0(VI00) + maddaz ACC,VF03,VF08z div Q,VF00w,VF14w + mulq.xyz VF08,VF15,Q fcset 0 + mulq.xyz VF07,VF07,Q iaddiu VI09,VI03,0 + maddw VF12,VF04,VF00w iaddiu VI10,VI04,0 + NOP sq.xyz VF09,1(VI04) + add.xyz VF10,VF08,VF05 lq.xyz VF15,2(VI03) + maxi.w VF07,VF00,I sq.xyz VF07,3(VI04) + mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w + mul.xyz VF13,VF08,VF06 mfir.w VF10,VI08 + ftoi4.xyz VF10,VF10 lq.xyz VF11,8(VI03) + mulq.xyz VF15,VF15,Q sq.xyz VF09,4(VI04) + sub.xyz VF14,VF07,VF08 sq.xyz VF09,7(VI04) + mul.xyz VF16,VF07,VF06 iaddiu VI03,VI09,0 + add.xyz VF10,VF07,VF05 sq VF10,5(VI04) + mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 + mulq.xyz VF15,VF11,Q sq.xyz VF15,0(VI04) + clipw.xyz VF16xyz,VF07w ibeq VI03,VI06,EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 +EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ dUp ] 36 [35 35] 36 [EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP] + ftoi4.xyz VF12,VF10 lq.xyz VF11,3(VI03) + sub.xyz VF10,VF07,VF08 NOP + mul.xyz VF08,VF07,VF06 iaddiu VI04,VI10,0x00000009 + mulw.xyz VF13,VF14,VF08w lq.xyz VF14,0(VI03) + mulax ACC,VF01,VF11x sq.xyz VF15,6(VI09) + madday ACC,VF02,VF11y mfir.w VF12,VI08 + maddaz ACC,VF03,VF11z NOP + maddw VF15,VF04,VF00w NOP + mulax ACC,VF01,VF14x lq.xyz VF11,6(VI03) + madday ACC,VF02,VF14y sq VF12,2(VI09) + maddaz ACC,VF03,VF14z iaddiu VI11,VI03,0 + maddw VF14,VF04,VF00w div Q,VF00w,VF15w + mulax ACC,VF01,VF11x lq.xyz VF16,5(VI03) + madday ACC,VF02,VF11y iaddiu VI10,VI04,0 + maddaz ACC,VF03,VF11z sq.xyz VF09,1(VI04) + maddw VF12,VF04,VF00w lq.xyz VF11,8(VI03) + clipw.xyz VF08xyz,VF07w sq.xyz VF09,4(VI04) + opmula.xyz ACCxyz,VF13xyz,VF10xyz mfir.w VF10,VI08 + mulq.xyz VF08,VF15,Q div Q,VF00w,VF14w + mulq.xyz VF16,VF16,Q lq.xyz VF15,2(VI03) + opmsub.xyz VF00xyz,VF10xyz,VF13xyz fcand VI01,262143 + NOP iand VI03,VI01,VI02 + mul.xyz VF13,VF08,VF06 sq.xyz VF09,7(VI04) + add.xyz VF10,VF08,VF05 sq.xyz VF16,3(VI04) + add.xyz VF17,VF07,VF05 fmand VI01,VI07 + mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w + mulq.xyz VF16,VF15,Q ior VI01,VI03,VI01 + ftoi4.xyz VF10,VF10 iaddiu VI03,VI11,0 + ftoi4.xyz VF15,VF17 iaddiu VI11,VI01,0x00007fff + sub.xyz VF14,VF07,VF08 mfir.w VF15,VI11 + mul.xyz VF16,VF07,VF06 sq.xyz VF16,0(VI04) + add.xyz VF10,VF07,VF05 sq VF10,5(VI04) + mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 + mulq.xyz VF15,VF11,Q sq VF15,8(VI09) + clipw.xyz VF16xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP + clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 +EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 20 [17 0] 21 [EXPL_vu1_general_nospec_tri_pp4_vcl_xform_loop_lid__EPI0] + ftoi4.xyz VF09,VF10 NOP + sub.xyz VF10,VF07,VF08 NOP + mul.xyz VF08,VF07,VF06 NOP + mulw.xyz VF14,VF14,VF08w NOP + add.xyz VF07,VF07,VF05 mfir.w VF09,VI08 + clipw.xyz VF08xyz,VF07w sq.xyz VF15,6(VI09) ; STALL_LATENCY ?1 + opmula.xyz ACCxyz,VF14xyz,VF10xyz lq.xyz VF06,60(VI00) + opmsub.xyz VF11xyz,VF10xyz,VF14xyz lq.xyz VF05,59(VI00) + NOP sq VF09,2(VI09) + NOP fcand VI01,262143 + NOP iand VI02,VI01,VI02 + abs.xyz VF00,VF11 fmand VI07,VI07 + NOP ior VI02,VI02,VI07 + NOP iaddiu VI02,VI02,0x00007fff + ftoi4.xyz VF15,VF07 ilw.x VI01,0(VI00) + NOP mfir.w VF15,VI02 + NOP iaddiu VI02,VI00,0x00000001 + NOP ilw.x VI03,0(VI02) + NOP ibeq VI01,VI00,pt_lights_lid + NOP sq VF15,8(VI09) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_tri_pp4_vcl_9] + maxw.z VF07,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ normal2 ] 17 [17 0] 17 [dir_light_loop_lid] + NOP xtop VI06 + NOP ilw.x VI05,0(VI06) + NOP lq.xyz VF10,67(VI00) + NOP lq.xyz VF08,3(VI03) + NOP lq.xyz VF11,68(VI00) + NOP lq.xyz VF09,69(VI00) + NOP iaddiu VI04,VI06,0x00000005 + mulax.xyz ACC,VF10,VF08x iadd VI07,VI04,VI05 + madday.xyz ACC,VF11,VF08y iadd VI07,VI07,VI05 + maddz.xyz VF10,VF09,VF08z lq.xyz VF17,1(VI04) + NOP iaddiu VI06,VI06,0 + NOP iadd VI07,VI07,VI05 + NOP lq.xyz VF08,0(VI03) + mul.xyz VF17,VF10,VF17 iaddiu VI04,VI04,0x00000003 + NOP lq.xyz VF09,1(VI03) + NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI3 + NOP iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__PRO1] + adday.z ACC,VF17,VF17y lq.xyz VF12,1(VI04) + maddx.z VF11,VF07,VF17x NOP + mul.xyz VF17,VF10,VF12 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 + NOP NOP + NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2 + maxx.z VF11,VF11,VF00x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__PRO2] + adday.z ACC,VF17,VF17y lq.xyz VF13,1(VI04) + maddx.z VF12,VF07,VF17x NOP + mul.xyz VF17,VF10,VF13 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 + NOP NOP + mulz.xyz VF13,VF09,VF11z ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1 + maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 7 [7 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__PRO3] + adday.z ACC,VF17,VF17y lq.xyz VF14,1(VI04) + maddx.z VF12,VF07,VF17x NOP + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF17,VF10,VF14 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 + mulz.xyz VF13,VF09,VF11z lq.xyz VF15,238(VI06) + madd.xyz VF14,VF08,VF05 ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0 + maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0x000000f9 +EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 8 [8 8] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + adday.z ACC,VF17,VF17y lq.xyz VF18,1(VI04) + maddx.z VF12,VF07,VF17x NOP + add.xyz VF16,VF15,VF14 iaddiu VI06,VI06,0x00000003 + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF17,VF10,VF18 iaddiu VI04,VI04,0x00000003 + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-11(VI06) + mulz.xyz VF13,VF09,VF11z ibne VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + maxx.z VF11,VF12,VF00x sq.xyz VF16,-14(VI06) +EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 19 [23 0] 25 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + add.xyz VF10,VF15,VF14 NOP + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-8(VI06) + mulz.xyz VF13,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP + add.xyz VF17,VF15,VF14 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-5(VI06) + mulz.xyz VF13,VF09,VF11z NOP + NOP sq.xyz VF17,-8(VI06) + add.xyz VF09,VF15,VF14 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-2(VI06) + NOP sq.xyz VF09,-5(VI06) ; STALL_LATENCY ?1 + add.xyz VF08,VF15,VF14 sq.xyz VF10,-11(VI06) ; STALL_LATENCY ?1 + NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 18 [23 0] 23 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + mula.xyz ACC,VF13,VF06 NOP + mulz.xyz VF13,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 + madd.xyz VF12,VF08,VF05 lq.xyz VF10,238(VI06) + mula.xyz ACC,VF13,VF06 NOP + mulz.xyz VF13,VF09,VF11z NOP ; STALL_LATENCY ?1 + add.xyz VF12,VF10,VF12 NOP + madd.xyz VF10,VF08,VF05 lq.xyz VF17,241(VI06) + mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?1 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,244(VI06) + NOP sq.xyz VF12,238(VI06) + add.xyz VF11,VF17,VF10 NOP + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?1 + NOP sq.xyz VF11,241(VI06) ; STALL_LATENCY ?1 + NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,244(VI06) +EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 15 [23 0] 23 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + NOP NOP + mulz.xyz VF10,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF06 NOP ; STALL_LATENCY ?1 + mulz.xyz VF09,VF09,VF11z NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF08,VF05 lq.xyz VF17,238(VI06) + mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?2 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,241(VI06) + add.xyz VF11,VF17,VF10 NOP + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,238(VI06) + NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,241(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI3: +; _LNOPT_w=[ ] 8 [23 0] 23 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EPI3] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + maxx.z VF17,VF17,VF00x NOP ; STALL_LATENCY ?3 + mulz.xyz VF09,VF09,VF17z NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?3 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,238(VI06) + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_tri_pp4_vcl_15] + maxw.z VF07,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ normal2 ] 25 [32 0] 36 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,71(VI00) + NOP lq.xyz VF09,3(VI03) + NOP lq.xyz VF08,72(VI00) + NOP lq.xyz VF11,73(VI00) + NOP lq.xyz VF10,74(VI00) + mulax.xyz ACC,VF12,VF09x NOP + madday.xyz ACC,VF08,VF09y xtop VI06 + maddaz.xyz ACC,VF11,VF09z iaddiu VI04,VI06,0x00000005 + maddw.xyz VF11,VF10,VF00w lq.xyz VF12,0(VI04) + sub.xyz VF12,VF11,VF12 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF13,VF12,VF12 iaddiu VI06,VI06,0 ; STALL_LATENCY ?3 + NOP iaddiu VI06,VI06,0 + adday.z ACC,VF13,VF13y lq.xyz VF08,0(VI03) ; STALL_LATENCY ?2 + maddx.z VF13,VF07,VF13x lq.xyz VF09,1(VI03) + NOP sqrt Q,VF13z ; STALL_LATENCY ?3 + NOP NOP + NOP iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP iaddiu VI04,VI04,0x00000003 + addw.x VF13,VF00,VF00w lq.xyz VF10,5(VI03) + addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1 + NOP NOP +; _LNOPT_w=[ ] 20 [25 0] 26 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__PRO1] + NOP lq.xyz VF15,0(VI04) + NOP NOP + mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y + sub.xyz VF12,VF11,VF15 move.xyz VF15,VF12 ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF14x iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 + mul.xyz VF13,VF12,VF12 NOP ; STALL_LATENCY ?1 + NOP waitq + mulq.xyz VF15,VF15,Q lq.xyz VF16,-5(VI04) + adday.z ACC,VF13,VF13y NOP ; STALL_LATENCY ?1 + maddx.z VF13,VF07,VF13x NOP + mul.xyz VF15,VF15,VF16 NOP + madday.w ACC,VF00,VF14y sqrt Q,VF13z ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF14z NOP + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + maddz.w VF06,VF00,VF15z NOP + NOP NOP + addw.x VF13,VF00,VF00w NOP + addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0 + maxx.w VF06,VF06,VF00x iaddiu VI06,VI06,0x000000f3 +EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 26 [25 21] 26 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + NOP lq.xyz VF16,0(VI04) + NOP NOP + mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y + mulw.xyz VF13,VF09,VF06w iaddiu VI06,VI06,0x00000003 + sub.xyz VF12,VF11,VF16 move.xyz VF16,VF12 + NOP NOP + NOP NOP + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF13,VF12,VF12 NOP + mulq.xyz VF16,VF16,Q div Q,VF00w,VF05w + madd.xyz VF15,VF08,VF05 lq.xyz VF17,-2(VI04) + mulax.w ACC,VF00,VF14x NOP + adday.z ACC,VF13,VF13y NOP + maddx.z VF13,VF07,VF13x NOP + mul.xyz VF16,VF16,VF17 NOP + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z lq.xyz VF14,-8(VI06) + mulq.xyz VF15,VF15,Q sqrt Q,VF13z + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + maddz.w VF06,VF00,VF16z NOP + add.xyz VF14,VF14,VF15 NOP + NOP iaddiu VI04,VI04,0x00000003 + addw.x VF13,VF00,VF00w NOP + addq.y VF13,VF00,Q ibne VI04,VI07,EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + maxx.w VF06,VF06,VF00x sq.xyz VF14,-8(VI06) +EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 27 [43 0] 45 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0] + NOP NOP + NOP NOP + mul.xyz VF10,VF13,VF10 div Q,VF00w,VF13y + mulw.xyz VF13,VF09,VF06w move.xyz VF12,VF12 + mulax.w ACC,VF00,VF10x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF10y NOP + NOP NOP + mulq.xyz VF12,VF12,Q lq.xyz VF11,-2(VI04) + mul.xyz VF12,VF12,VF11 NOP ; STALL_LATENCY ?3 + maddz.w VF05,VF00,VF10z div Q,VF00w,VF05w + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF13,VF08,VF05 NOP + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF06,VF00,VF12z NOP + NOP div Q,VF00w,VF05w ; STALL_THRUPUT ?1 + mulq.xyz VF13,VF13,Q lq.xyz VF10,-5(VI06) + maxx.w VF06,VF06,VF00x NOP + add.xyz VF10,VF10,VF13 NOP ; STALL_LATENCY ?2 + mulw.xyz VF13,VF09,VF06w NOP + NOP sq.xyz VF10,-5(VI06) ; STALL_LATENCY ?2 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF08,VF08,VF05 NOP + mulq.xyz VF08,VF08,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 + add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 20 [43 0] 45 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1] + NOP NOP + NOP NOP + mul.xyz VF13,VF13,VF10 div Q,VF00w,VF13y + NOP move.xyz VF12,VF12 + mulax.w ACC,VF00,VF13x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF13y NOP + maddz.w VF05,VF00,VF13z NOP + mulq.xyz VF12,VF12,Q lq.xyz VF10,-2(VI04) + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?2 + mul.xyz VF12,VF12,VF10 NOP + mulax.w ACC,VF00,VF12x NOP ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF12y NOP + maddz.w VF06,VF00,VF12z NOP + maxx.w VF06,VF06,VF00x NOP ; STALL_LATENCY ?3 + mulw.xyz VF13,VF09,VF06w NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?3 + madd.xyz VF08,VF08,VF05 NOP + mulq.xyz VF08,VF08,Q lq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_nospec_tri_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_nospec_tri_pp4_vcl_23] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralNoSpecTri_CodeEnd: +; iCount=383 +; register stats: +; 12 VU User integer +; 19 VU User floating point diff --git a/vu1/sce_general_nospec_vcl.vsm b/vu1/sce_general_nospec_vcl.vsm new file mode 100644 index 00000000..e6d086fc --- /dev/null +++ b/vu1/sce_general_nospec_vcl.vsm @@ -0,0 +1,570 @@ +; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=22 clid=0 mlid=2 size=(22) +; === normal1 : optimal=22 clid=0 mlid=2 size=(22) +; === vuta : optimal=22 clid=0 mlid=2 size=(22) +; === dUp : optimal=22 clid=0 mlid=2 size=(22) +; === vuta1 : optimal=22 clid=0 mlid=2 size=(22) +; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=8 clid=5 mlid=5 size=(8) +; === ldumb : optimal=8 clid=0 mlid=4 size=(8) +; === normal1 : optimal=8 clid=0 mlid=4 size=(8) +; === hDown : optimal=8 clid=0 mlid=4 size=(8) +; === vuta : optimal=8 clid=0 mlid=4 size=(8) +; === dUp : optimal=8 clid=0 mlid=4 size=(8) +; === normal : optimal=8 clid=0 mlid=4 size=(8) +; === another : optimal=8 clid=0 mlid=4 size=(8) +; === vuta1 : optimal=8 clid=0 mlid=6 size=(8) +; === normal2 : optimal=8 clid=0 mlid=6 size=(8) +; === dumb2 : optimal=8 clid=4 mlid=6 size=(8) +; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=21 clid=3 mlid=3 size=(26) +; === ldumb : optimal=21 clid=0 mlid=3 size=(26) +; === normal1 : optimal=21 clid=0 mlid=3 size=(26) +; === hDown : optimal=21 clid=0 mlid=3 size=(26) +; === vuta : optimal=21 clid=0 mlid=2 size=(26) +; === dUp : optimal=21 clid=0 mlid=3 size=(26) +; === normal : optimal=21 clid=0 mlid=3 size=(26) +; === another : optimal=21 clid=0 mlid=3 size=(26) +; === vuta1 : optimal=21 clid=0 mlid=2 size=(26) +; === normal2 : optimal=21 clid=0 mlid=3 size=(26) +; === dumb2 : optimal=21 clid=3 mlid=3 size=(26) +; === __LP__ EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralNoSpec_CodeStart + .global vsmGeneralNoSpec_CodeEnd +vsmGeneralNoSpec_CodeStart: +__v_vu1_general_nospec_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_nospec_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 26 [26 0] 26 [main_loop_lid] + NOP loi 0x44fff000 + NOP lq.w VF06,57(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI01 + NOP iaddiu VI03,VI01,0x00000005 + NOP mr32.z VF05,VF06 + NOP iaddiu VI04,VI01,0 + NOP ilw.x VI05,0(VI01) + NOP fcset 0 + NOP lq VF08,75(VI00) + NOP lq.xyz VF06,76(VI00) + NOP iadd VI06,VI03,VI05 + NOP iadd VI06,VI06,VI05 + NOP mtir VI07,VF08x + NOP ior VI07,VI07,VI05 + NOP mfir.x VF08,VI07 + NOP iaddiu VI07,VI01,0x00000001 + NOP ilw.w VI02,76(VI00) + NOP iadd VI06,VI06,VI05 + NOP sq VF08,236(VI04) + NOP iaddiu VI01,VI01,0x00000005 + NOP iaddiu VI08,VI07,0x00000004 + NOP iaddiu VI09,VI00,0x000003ff + NOP iaddiu VI10,VI00,0x00000800 + NOP iaddiu VI11,VI00,0x00000400 + maxi.w VF07,VF00,I iaddiu VI12,VI00,0x00000020 +adcLoop_lid: +; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] + NOP lq VF08,0(VI07) + ftoi0 VF08,VF08 NOP ; STALL_LATENCY ?3 + NOP mtir VI13,VF08x ; STALL_LATENCY ?3 + NOP iand VI14,VI13,VI11 + NOP NOP + NOP ibeq VI14,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_nospec_pp4_vcl_8] + NOP iand VI14,VI13,VI10 + NOP iand VI13,VI13,VI09 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP isw.w VI14,3(VI13) + NOP mtir VI14,VF08y + NOP iand VI13,VI14,VI11 + NOP NOP + NOP ibeq VI13,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_nospec_pp4_vcl_9] + NOP iand VI13,VI14,VI10 + NOP iand VI14,VI14,VI09 + NOP iadd VI14,VI14,VI01 + NOP isw.w VI12,0(VI14) + NOP isw.w VI13,3(VI14) + NOP mtir VI13,VF08z + NOP iand VI14,VI13,VI11 + NOP NOP + NOP ibeq VI14,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_nospec_pp4_vcl_10] + NOP iand VI14,VI13,VI10 + NOP iand VI13,VI13,VI09 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP isw.w VI14,3(VI13) + NOP mtir VI14,VF08w + NOP iand VI13,VI14,VI11 + NOP NOP + NOP ibeq VI13,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_general_nospec_pp4_vcl_11] + NOP iand VI13,VI14,VI09 + NOP iaddiu VI07,VI07,0x00000001 + NOP iand VI14,VI14,VI10 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP ibne VI07,VI08,adcLoop_lid + NOP isw.w VI14,3(VI13) +adcLoop_done_lid: +; _LNOPT_w=[ normal2 ] 13 [19 0] 21 [adcLoop_done_lid] + NOP lq.xyz VF12,0(VI03) + mulax ACC,VF01,VF12x iaddiu VI07,VI00,0x00000020 ; STALL_LATENCY ?3 + madday ACC,VF02,VF12y lq.xyz VF10,57(VI00) + maddaz ACC,VF03,VF12z lq.xyz VF11,59(VI00) + maddw VF12,VF04,VF00w lq.w VF05,0(VI00) + mul.xyz VF10,VF10,VF11 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF12w + add.xyz VF07,VF07,VF10 ilw.w VI08,0(VI00) ; STALL_LATENCY ?2 + NOP iaddiu VI03,VI03,0x00000003 + NOP lq.xyz VF11,-1(VI03) + mulq.xyz VF12,VF12,Q sq.xyz VF07,238(VI04) ; STALL_LATENCY ?1 + max.xyz VF09,VF00,VF00 ibeq VI03,VI06,EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI1 + max.xyz VF08,VF00,VF00 iaddiu VI04,VI04,0 +; _LNOPT_w=[ ] 21 [19 0] 22 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__PRO1] + NOP NOP + add.xyz VF10,VF12,VF05 lq.xyz VF15,0(VI03) + sub.xyz VF13,VF09,VF12 iaddiu VI04,VI04,0x000000f3 + max.xyz VF09,VF12,VF12 NOP + mul.xyz VF14,VF12,VF06 NOP + mulax ACC,VF01,VF15x NOP + madday ACC,VF02,VF15y NOP + maddaz ACC,VF03,VF15z ilw.w VI09,-3(VI03) + maddw VF12,VF04,VF00w isub VI01,VI08,VI07 + mulq.xyz VF11,VF11,Q sq.xyz VF07,-2(VI04) + opmula.xyz ACCxyz,VF13xyz,VF08xyz iaddiu VI03,VI03,0x00000003 + opmsub.xyz VF16xyz,VF08xyz,VF13xyz iand VI10,VI09,VI08 + NOP div Q,VF00w,VF12w + NOP sq.xyz VF11,-6(VI04) + abs.xyz VF00,VF16 fmand VI11,VI08 ; STALL_LATENCY ?1 + NOP lq.xyz VF11,-1(VI03) + clipw.xyz VF14xyz,VF07w isub VI11,VI11,VI07 + NOP ior VI07,VI01,VI10 + mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 + NOP ibeq VI03,VI06,EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI0 + mulw.xyz VF08,VF13,VF05w fcand VI01,262143 +EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 22 [22 22] 22 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP] + ftoi4.xyz VF14,VF10 iand VI01,VI01,VI02 + add.xyz VF10,VF12,VF05 lq.xyz VF16,0(VI03) + sub.xyz VF13,VF09,VF12 ior VI10,VI01,VI10 + max.xyz VF09,VF12,VF12 ior VI10,VI10,VI09 + mul.xyz VF15,VF12,VF06 ilw.w VI09,-3(VI03) + mulax ACC,VF01,VF16x iaddiu VI10,VI10,0x00007fff + madday ACC,VF02,VF16y mfir.w VF14,VI10 + maddaz ACC,VF03,VF16z isub VI01,VI08,VI07 + maddw VF12,VF04,VF00w iand VI10,VI09,VI08 + mulq.xyz VF11,VF11,Q sq.xyz VF07,1(VI04) + opmula.xyz ACCxyz,VF13xyz,VF08xyz sq VF14,-4(VI04) + opmsub.xyz VF00xyz,VF08xyz,VF13xyz iaddiu VI03,VI03,0x00000003 + mulw.xyz VF08,VF13,VF05w div Q,VF00w,VF12w + clipw.xyz VF15xyz,VF07w sq.xyz VF11,-3(VI04) + NOP iaddiu VI04,VI04,0x00000003 + NOP fmand VI11,VI08 + NOP lq.xyz VF11,-1(VI03) + NOP isub VI11,VI11,VI07 + NOP ior VI07,VI01,VI10 + mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 + NOP ibne VI03,VI06,EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__MAIN_LOOP + NOP fcand VI01,262143 +EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 21 [20 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + sub.xyz VF09,VF09,VF12 NOP + mul.xyz VF07,VF12,VF06 iand VI01,VI01,VI02 + opmula.xyz ACCxyz,VF09xyz,VF08xyz ior VI10,VI01,VI10 ; STALL_LATENCY ?2 + opmsub.xyz VF14xyz,VF08xyz,VF09xyz ior VI10,VI10,VI09 + NOP ilw.w VI09,-3(VI03) + NOP iaddiu VI10,VI10,0x00007fff + clipw.xyz VF07xyz,VF07w mfir.w VF13,VI10 + abs.xyz VF00,VF14 fmand VI10,VI08 + NOP isub VI07,VI10,VI07 + ftoi4.xyz VF13,VF10 iand VI10,VI07,VI08 + add.xyz VF10,VF12,VF05 fcand VI01,262143 + NOP iand VI01,VI01,VI02 + mulq.xyz VF11,VF11,Q ior VI10,VI01,VI10 + NOP ior VI10,VI10,VI09 + ftoi4.xyz VF10,VF10 iaddiu VI10,VI10,0x00007fff + NOP mfir.w VF10,VI10 + NOP sq.xyz VF11,-3(VI04) + NOP sq VF13,-4(VI04) + NOP b EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EXIT_POINT + NOP sq VF10,-1(VI04) +EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI1: +; _LNOPT_w=[ ] 18 [20 0] 24 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EPI1] + NOP NOP + NOP NOP + sub.xyz VF09,VF09,VF12 NOP + mul.xyz VF06,VF12,VF06 NOP + opmula.xyz ACCxyz,VF09xyz,VF08xyz NOP ; STALL_LATENCY ?2 + opmsub.xyz VF10xyz,VF08xyz,VF09xyz NOP + clipw.xyz VF06xyz,VF07w ilw.w VI03,-3(VI03) + abs.xyz VF00,VF10 fmand VI01,VI08 ; STALL_LATENCY ?2 + NOP isub VI07,VI01,VI07 + NOP iand VI08,VI07,VI08 + add.xyz VF07,VF12,VF05 fcand VI01,262143 + NOP iand VI02,VI01,VI02 + NOP ior VI02,VI02,VI08 + mulq.xyz VF11,VF11,Q ior VI03,VI02,VI03 + ftoi4.xyz VF07,VF07 iaddiu VI03,VI03,0x00007fff + NOP mfir.w VF07,VI03 + NOP sq.xyz VF11,237(VI04) ; STALL_LATENCY ?1 + NOP sq VF07,239(VI04) ; STALL_LATENCY ?1 +EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 6 [6 0] 6 [EXPL_vu1_general_nospec_pp4_vcl_xform_loop_lid__EXIT_POINT] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,60(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_pp4_vcl_16] + maxw.z VF07,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ normal2 ] 17 [17 0] 17 [dir_light_loop_lid] + NOP xtop VI06 + NOP ilw.x VI05,0(VI06) + NOP lq.xyz VF10,67(VI00) + NOP lq.xyz VF08,3(VI03) + NOP lq.xyz VF11,68(VI00) + NOP lq.xyz VF09,69(VI00) + NOP iaddiu VI04,VI06,0x00000005 + mulax.xyz ACC,VF10,VF08x iadd VI07,VI04,VI05 + madday.xyz ACC,VF11,VF08y iadd VI07,VI07,VI05 + maddz.xyz VF10,VF09,VF08z lq.xyz VF17,1(VI04) + NOP iaddiu VI06,VI06,0 + NOP iadd VI07,VI07,VI05 + NOP lq.xyz VF08,0(VI03) + mul.xyz VF17,VF10,VF17 iaddiu VI04,VI04,0x00000003 + NOP lq.xyz VF09,1(VI03) + NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI3 + NOP iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__PRO1] + adday.z ACC,VF17,VF17y lq.xyz VF12,1(VI04) + maddx.z VF11,VF07,VF17x NOP + mul.xyz VF17,VF10,VF12 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 + NOP NOP + NOP ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI2 + maxx.z VF11,VF11,VF00x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 6 [6 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__PRO2] + adday.z ACC,VF17,VF17y lq.xyz VF13,1(VI04) + maddx.z VF12,VF07,VF17x NOP + mul.xyz VF17,VF10,VF13 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?2 + NOP NOP + mulz.xyz VF13,VF09,VF11z ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI1 + maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 7 [7 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__PRO3] + adday.z ACC,VF17,VF17y lq.xyz VF14,1(VI04) + maddx.z VF12,VF07,VF17x NOP + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF17,VF10,VF14 iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 + mulz.xyz VF13,VF09,VF11z lq.xyz VF15,238(VI06) + madd.xyz VF14,VF08,VF05 ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI0 + maxx.z VF11,VF12,VF00x iaddiu VI06,VI06,0x000000f9 +EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 8 [8 8] 8 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + adday.z ACC,VF17,VF17y lq.xyz VF18,1(VI04) + maddx.z VF12,VF07,VF17x NOP + add.xyz VF16,VF15,VF14 iaddiu VI06,VI06,0x00000003 + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF17,VF10,VF18 iaddiu VI04,VI04,0x00000003 + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-11(VI06) + mulz.xyz VF13,VF09,VF11z ibne VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + maxx.z VF11,VF12,VF00x sq.xyz VF16,-14(VI06) +EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 19 [23 0] 25 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI0] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + add.xyz VF10,VF15,VF14 NOP + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-8(VI06) + mulz.xyz VF13,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP + add.xyz VF17,VF15,VF14 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-5(VI06) + mulz.xyz VF13,VF09,VF11z NOP + NOP sq.xyz VF17,-8(VI06) + add.xyz VF09,VF15,VF14 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF14,VF08,VF05 lq.xyz VF15,-2(VI06) + NOP sq.xyz VF09,-5(VI06) ; STALL_LATENCY ?1 + add.xyz VF08,VF15,VF14 sq.xyz VF10,-11(VI06) ; STALL_LATENCY ?1 + NOP b EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 18 [23 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI1] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + mula.xyz ACC,VF13,VF06 NOP + mulz.xyz VF13,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 + madd.xyz VF12,VF08,VF05 lq.xyz VF10,238(VI06) + mula.xyz ACC,VF13,VF06 NOP + mulz.xyz VF13,VF09,VF11z NOP ; STALL_LATENCY ?1 + add.xyz VF12,VF10,VF12 NOP + madd.xyz VF10,VF08,VF05 lq.xyz VF17,241(VI06) + mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?1 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,244(VI06) + NOP sq.xyz VF12,238(VI06) + add.xyz VF11,VF17,VF10 NOP + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?1 + NOP sq.xyz VF11,241(VI06) ; STALL_LATENCY ?1 + NOP b EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,244(VI06) +EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 15 [23 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI2] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + NOP NOP + mulz.xyz VF10,VF09,VF11z NOP + maxx.z VF11,VF17,VF00x NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF06 NOP ; STALL_LATENCY ?1 + mulz.xyz VF09,VF09,VF11z NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF08,VF05 lq.xyz VF17,238(VI06) + mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?2 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,241(VI06) + add.xyz VF11,VF17,VF10 NOP + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,238(VI06) + NOP b EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,241(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI3: +; _LNOPT_w=[ ] 8 [23 0] 23 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EPI3] + adday.z ACC,VF17,VF17y NOP + maddx.z VF17,VF07,VF17x NOP + maxx.z VF17,VF17,VF00x NOP ; STALL_LATENCY ?3 + mulz.xyz VF09,VF09,VF17z NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF09,VF06 NOP ; STALL_LATENCY ?3 + madd.xyz VF09,VF08,VF05 lq.xyz VF08,238(VI06) + add.xyz VF08,VF08,VF09 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_nospec_pp4_vcl_22] + maxw.z VF07,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ normal2 ] 25 [32 0] 36 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,71(VI00) + NOP lq.xyz VF09,3(VI03) + NOP lq.xyz VF08,72(VI00) + NOP lq.xyz VF11,73(VI00) + NOP lq.xyz VF10,74(VI00) + mulax.xyz ACC,VF12,VF09x NOP + madday.xyz ACC,VF08,VF09y xtop VI06 + maddaz.xyz ACC,VF11,VF09z iaddiu VI04,VI06,0x00000005 + maddw.xyz VF11,VF10,VF00w lq.xyz VF12,0(VI04) + sub.xyz VF12,VF11,VF12 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF13,VF12,VF12 iaddiu VI06,VI06,0 ; STALL_LATENCY ?3 + NOP iaddiu VI06,VI06,0 + adday.z ACC,VF13,VF13y lq.xyz VF08,0(VI03) ; STALL_LATENCY ?2 + maddx.z VF13,VF07,VF13x lq.xyz VF09,1(VI03) + NOP sqrt Q,VF13z ; STALL_LATENCY ?3 + NOP NOP + NOP iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP iaddiu VI04,VI04,0x00000003 + addw.x VF13,VF00,VF00w lq.xyz VF10,5(VI03) + addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI1 + NOP NOP +; _LNOPT_w=[ ] 20 [25 0] 26 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__PRO1] + NOP lq.xyz VF15,0(VI04) + NOP NOP + mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y + sub.xyz VF12,VF11,VF15 move.xyz VF15,VF12 ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF14x iaddiu VI04,VI04,0x00000003 ; STALL_LATENCY ?1 + mul.xyz VF13,VF12,VF12 NOP ; STALL_LATENCY ?1 + NOP waitq + mulq.xyz VF15,VF15,Q lq.xyz VF16,-5(VI04) + adday.z ACC,VF13,VF13y NOP ; STALL_LATENCY ?1 + maddx.z VF13,VF07,VF13x NOP + mul.xyz VF15,VF15,VF16 NOP + madday.w ACC,VF00,VF14y sqrt Q,VF13z ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF14z NOP + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + maddz.w VF06,VF00,VF15z NOP + NOP NOP + addw.x VF13,VF00,VF00w NOP + addq.y VF13,VF00,Q ibeq VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI0 + maxx.w VF06,VF06,VF00x iaddiu VI06,VI06,0x000000f3 +EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 26 [25 21] 26 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + NOP lq.xyz VF16,0(VI04) + NOP NOP + mul.xyz VF14,VF13,VF10 div Q,VF00w,VF13y + mulw.xyz VF13,VF09,VF06w iaddiu VI06,VI06,0x00000003 + sub.xyz VF12,VF11,VF16 move.xyz VF16,VF12 + NOP NOP + NOP NOP + mula.xyz ACC,VF13,VF06 NOP + mul.xyz VF13,VF12,VF12 NOP + mulq.xyz VF16,VF16,Q div Q,VF00w,VF05w + madd.xyz VF15,VF08,VF05 lq.xyz VF17,-2(VI04) + mulax.w ACC,VF00,VF14x NOP + adday.z ACC,VF13,VF13y NOP + maddx.z VF13,VF07,VF13x NOP + mul.xyz VF16,VF16,VF17 NOP + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z lq.xyz VF14,-8(VI06) + mulq.xyz VF15,VF15,Q sqrt Q,VF13z + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + maddz.w VF06,VF00,VF16z NOP + add.xyz VF14,VF14,VF15 NOP + NOP iaddiu VI04,VI04,0x00000003 + addw.x VF13,VF00,VF00w NOP + addq.y VF13,VF00,Q ibne VI04,VI07,EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + maxx.w VF06,VF06,VF00x sq.xyz VF14,-8(VI06) +EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 27 [43 0] 45 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI0] + NOP NOP + NOP NOP + mul.xyz VF10,VF13,VF10 div Q,VF00w,VF13y + mulw.xyz VF13,VF09,VF06w move.xyz VF12,VF12 + mulax.w ACC,VF00,VF10x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF10y NOP + NOP NOP + mulq.xyz VF12,VF12,Q lq.xyz VF11,-2(VI04) + mul.xyz VF12,VF12,VF11 NOP ; STALL_LATENCY ?3 + maddz.w VF05,VF00,VF10z div Q,VF00w,VF05w + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF13,VF08,VF05 NOP + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF06,VF00,VF12z NOP + NOP div Q,VF00w,VF05w ; STALL_THRUPUT ?1 + mulq.xyz VF13,VF13,Q lq.xyz VF10,-5(VI06) + maxx.w VF06,VF06,VF00x NOP + add.xyz VF10,VF10,VF13 NOP ; STALL_LATENCY ?2 + mulw.xyz VF13,VF09,VF06w NOP + NOP sq.xyz VF10,-5(VI06) ; STALL_LATENCY ?2 + mula.xyz ACC,VF13,VF06 NOP + madd.xyz VF08,VF08,VF05 NOP + mulq.xyz VF08,VF08,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 + add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF08,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 20 [43 0] 45 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EPI1] + NOP NOP + NOP NOP + mul.xyz VF13,VF13,VF10 div Q,VF00w,VF13y + NOP move.xyz VF12,VF12 + mulax.w ACC,VF00,VF13x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF13y NOP + maddz.w VF05,VF00,VF13z NOP + mulq.xyz VF12,VF12,Q lq.xyz VF10,-2(VI04) + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?2 + mul.xyz VF12,VF12,VF10 NOP + mulax.w ACC,VF00,VF12x NOP ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF12y NOP + maddz.w VF06,VF00,VF12z NOP + maxx.w VF06,VF06,VF00x NOP ; STALL_LATENCY ?3 + mulw.xyz VF13,VF09,VF06w NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF13,VF06 NOP ; STALL_LATENCY ?3 + madd.xyz VF08,VF08,VF05 NOP + mulq.xyz VF08,VF08,Q lq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF08,VF09,VF08 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF08,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_nospec_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_nospec_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_nospec_pp4_vcl_30] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralNoSpec_CodeEnd: +; iCount=450 +; register stats: +; 15 VU User integer +; 19 VU User floating point diff --git a/vu1/sce_general_pv_diff_quad_vcl.vsm b/vu1/sce_general_pv_diff_quad_vcl.vsm new file mode 100644 index 00000000..22351c35 --- /dev/null +++ b/vu1/sce_general_pv_diff_quad_vcl.vsm @@ -0,0 +1,714 @@ +; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === normal1 : optimal=50 clid=0 mlid=1 size=(51) +; === hDown : optimal=50 clid=0 mlid=1 size=(51) +; === another : optimal=50 clid=0 mlid=1 size=(51) +; === normal2 : optimal=50 clid=0 mlid=2 size=(51) +; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=4 mlid=4 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=4 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=6 mlid=6 size=(18) +; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === hDown : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === normal : optimal=34 clid=0 mlid=4 size=(39) +; === another : optimal=34 clid=0 mlid=4 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=4 size=(39) +; === normal2 : optimal=34 clid=0 mlid=5 size=(39) +; === __LP__ EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralPVDiffQuad_CodeStart + .global vsmGeneralPVDiffQuad_CodeEnd +vsmGeneralPVDiffQuad_CodeStart: +__v_vu1_general_pv_diff_quad_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pv_diff_quad_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ vuta1 ] 53 [53 0] 57 [main_loop_lid] + NOP lq.xyz VF09,57(VI00) + NOP lq.xyz VF08,59(VI00) + mul.xyz VF09,VF09,VF08 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?3 + NOP xtop VI05 + NOP iaddiu VI03,VI05,0x00000005 + add.xyz VF09,VF07,VF09 lq.xyz VF07,0(VI03) ; STALL_LATENCY ?1 + NOP lq.w VF05,57(VI00) + NOP loi 0x44fff000 + NOP iaddiu VI04,VI05,0x000000ed + mulax ACC,VF01,VF07x lq VF06,75(VI00) + madday ACC,VF02,VF07y ilw.x VI05,0(VI05) + maddaz ACC,VF03,VF07z lq.xyz VF20,8(VI03) + maddw VF22,VF04,VF00w mr32.z VF05,VF05 + NOP mtir VI08,VF06x + NOP ior VI08,VI08,VI05 + mulax ACC,VF01,VF20x lq.xyz VF15,12(VI03) + madday ACC,VF02,VF20y div Q,VF00w,VF22w + maddaz ACC,VF03,VF20z lq.xyz VF12,15(VI03) + maddw VF20,VF04,VF00w mfir.x VF06,VI08 + mulax ACC,VF01,VF15x lq.xyz VF14,4(VI03) + madday ACC,VF02,VF15y lq.xyz VF07,11(VI03) + maddaz ACC,VF03,VF15z sq.xyz VF12,11(VI03) + maddw VF15,VF04,VF00w lq.xyz VF12,9(VI03) + mulax ACC,VF01,VF14x div Q,VF00w,VF20w + madday ACC,VF02,VF14y lq.xyz VF18,13(VI03) + maddaz ACC,VF03,VF14z lq.xyz VF10,2(VI03) + maddw VF12,VF04,VF00w sq.xyz VF12,13(VI03) + addi.xy VF05,VF00,I loi 0x45000000 + mulq.xyz VF13,VF22,Q sq VF06,-1(VI04) + mulq.xyz VF10,VF10,Q lq.xyz VF06,76(VI00) + NOP div Q,VF00w,VF12w + NOP ilw.w VI07,0(VI00) + NOP lq.xyz VF11,10(VI03) + maxi.w VF07,VF00,I sq.xyz VF18,9(VI03) + mul.xyz VF08,VF13,VF06 lq.xyz VF18,6(VI03) + mulq.xyz VF20,VF20,Q iadd VI06,VI03,VI05 + mulq.xyz VF11,VF11,Q iadd VI06,VI06,VI05 + add.xyz VF14,VF13,VF05 div Q,VF00w,VF15w + mulq.xyz VF16,VF12,Q fcset 0 + clipw.xyz VF08xyz,VF07w iadd VI06,VI06,VI05 + add.xyz VF07,VF20,VF05 sq.xyz VF07,15(VI03) + mulq.xyz VF12,VF18,Q iadd VI06,VI06,VI05 + sub.xyz VF22,VF13,VF16 iaddiu VI08,VI00,0x00007fff + mul.xyz VF17,VF16,VF06 lq.w VF08,0(VI00) + mulq.xyz VF08,VF15,Q iaddiu VI08,VI08,0x00000001 + add.xyz VF13,VF16,VF05 iaddiu VI01,VI03,0 + mul.xyz VF15,VF20,VF06 iaddiu VI01,VI01,0 + clipw.xyz VF17xyz,VF07w lq.xyz VF18,14(VI03) + mul.xyz VF17,VF08,VF06 iaddiu VI03,VI01,0 + mulw.xyz VF22,VF22,VF08w iaddiu VI03,VI03,0x00000010 + sub.xyz VF20,VF20,VF16 ilw.w VI02,76(VI00) + add.xyz VF08,VF08,VF05 ibeq VI03,VI06,EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF17xyz,VF07w mfir.w VF10,VI08 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ normal1 ] 51 [50 50] 51 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP] + ftoi4.xyz VF19,VF14 lq.xyz VF16,0(VI03) + opmula.xyz ACCxyz,VF22xyz,VF20xyz lq.xyz VF17,8(VI03) + opmsub.xyz VF00xyz,VF20xyz,VF22xyz lq.xyz VF21,12(VI03) + mulq.xyz VF20,VF18,Q iaddiu VI01,VI03,0 + mulax ACC,VF01,VF16x iaddiu VI10,VI04,0 + madday ACC,VF02,VF16y lq.xyz VF14,4(VI03) + maddaz ACC,VF03,VF16z fmand VI09,VI07 + maddw VF22,VF04,VF00w lq.xyz VF16,11(VI03) + mulax ACC,VF01,VF17x lq.xyz VF23,15(VI03) + madday ACC,VF02,VF17y lq.xyz VF18,13(VI03) + maddaz ACC,VF03,VF17z mfir.w VF19,VI08 + maddw VF17,VF04,VF00w div Q,VF00w,VF22w + mulax ACC,VF01,VF21x sq.xyz VF23,11(VI03) + madday ACC,VF02,VF21y lq.xyz VF23,9(VI03) + maddaz ACC,VF03,VF21z sq VF19,2(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF20,6(VI04) + ftoi4.xyz VF10,VF13 sq.xyz VF10,0(VI04) + maddw VF15,VF04,VF00w sq.xyz VF11,9(VI04) + mulq.xyz VF13,VF22,Q sq.xyz VF12,3(VI04) + mulax ACC,VF01,VF14x div Q,VF00w,VF17w + madday ACC,VF02,VF14y sq VF10,5(VI04) + maddaz ACC,VF03,VF14z lq.xyz VF10,2(VI03) + mul.xyz VF19,VF13,VF06 iaddiu VI11,VI10,0 + maddw VF12,VF04,VF00w sq.xyz VF23,13(VI03) + add.xyz VF14,VF13,VF05 sq.xyz VF18,9(VI03) + mulq.xyz VF10,VF10,Q lq.xyz VF11,10(VI03) + clipw.xyz VF19xyz,VF07w sq.xyz VF16,15(VI03) + mulq.xyz VF16,VF17,Q div Q,VF00w,VF12w + NOP iaddiu VI10,VI01,0 + NOP fcand VI01,16777215 + ftoi4.xyz VF19,VF07 iand VI12,VI01,VI02 + add.xyz VF07,VF16,VF05 lq.xyz VF18,6(VI03) + NOP iaddiu VI01,VI11,0 + mulq.xyz VF11,VF11,Q ior VI09,VI12,VI09 + mulq.xyz VF17,VF12,Q div Q,VF00w,VF15w + NOP iaddiu VI09,VI09,0x00007fff + mulq.xyz VF12,VF18,Q mfir.w VF20,VI09 + ftoi4.xyz VF20,VF08 mfir.w VF19,VI09 + sub.xyz VF22,VF13,VF17 sq.xyz VF09,10(VI04) + mul.xyz VF21,VF17,VF06 sq.xyz VF09,4(VI04) + add.xyz VF13,VF17,VF05 lq.xyz VF18,14(VI03) + mulq.xyz VF08,VF15,Q sq VF19,11(VI04) + NOP sq VF20,8(VI04) + clipw.xyz VF21xyz,VF07w sq.xyz VF09,1(VI04) + mul.xyz VF15,VF16,VF06 sq.xyz VF09,7(VI04) + mul.xyz VF19,VF08,VF06 iaddiu VI04,VI01,0x0000000c + add.xyz VF08,VF08,VF05 iaddiu VI03,VI10,0 + mulw.xyz VF22,VF22,VF08w iaddiu VI03,VI03,0x00000010 + sub.xyz VF20,VF16,VF17 NOP + clipw.xyz VF19xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP + NOP mfir.w VF10,VI08 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 28 [27 0] 28 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + opmula.xyz ACCxyz,VF22xyz,VF20xyz sq.xyz VF10,0(VI04) + opmsub.xyz VF16xyz,VF20xyz,VF22xyz mfir.w VF05,VI08 + ftoi4.xyz VF05,VF14 sq.xyz VF11,9(VI04) + mulq.xyz VF20,VF18,Q sq.xyz VF12,3(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF09,10(VI04) + abs.xyz VF00,VF16 fmand VI07,VI07 + NOP sq VF05,2(VI04) + NOP sq.xyz VF20,6(VI04) + NOP fcand VI01,16777215 + NOP iand VI02,VI01,VI02 + NOP ior VI02,VI02,VI07 + ftoi4.xyz VF20,VF08 iaddiu VI02,VI02,0x00007fff + ftoi4.xyz VF07,VF07 mfir.w VF20,VI02 + NOP mfir.w VF07,VI02 + NOP sq.xyz VF09,4(VI04) + NOP sq.xyz VF09,1(VI04) + ftoi4.xyz VF10,VF13 sq VF20,8(VI04) + NOP sq VF07,11(VI04) + NOP sq.xyz VF09,7(VI04) + NOP ilw.x VI01,0(VI00) + NOP sq VF10,5(VI04) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,61(VI00) + NOP lq.xyz VF07,66(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_quad_pp4_vcl_9] + maxw.z VF08,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 28 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF11,67(VI00) + NOP lq.xyz VF12,3(VI03) + NOP lq.xyz VF13,68(VI00) + NOP lq.xyz VF09,69(VI00) + mulax.xyz ACC,VF11,VF12x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF13,VF12y ilw.x VI05,0(VI06) + maddz.xyz VF12,VF09,VF12z iaddiu VI04,VI06,0x00000005 + NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 + add.xyz VF13,VF07,VF12 lq.xyz VF10,2(VI03) + NOP esadd P,VF13 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF13,VF13,VF05w lq.xyz VF16,1(VI04) ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + mul.xyz VF14,VF13,VF16 iadd VI07,VI07,VI05 ; STALL_LATENCY ?2 + mul.xyz VF15,VF12,VF16 iadd VI07,VI07,VI05 + mul.xyz VF11,VF10,VF06 lq.xyz VF10,1(VI03) + NOP lq.xyz VF09,0(VI03) + NOP mr32.xyw VF16,VF14 + adday.z ACC,VF15,VF15y iaddiu VI04,VI04,0x00000004 + maddx.z VF16,VF08,VF15x iaddiu VI06,VI06,0 + NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 11 [13 0] 14 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF09,VF00,VF16y NOP + maxx.z VF15,VF16,VF00x lq.xyz VF16,1(VI04) + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 + mul.xyz VF14,VF13,VF16 NOP + mul.xyz VF17,VF12,VF16 NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulz.xyz VF15,VF10,VF15z mr32.xyw VF16,VF14 + adday.z ACC,VF17,VF17y iaddiu VI04,VI04,0x00000004 + maddx.z VF16,VF08,VF17x NOP + mul.w VF06,VF06,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1 + addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 15 [15 0] 17 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF09,VF00,VF16y NOP + maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF17,VF13,VF16 NOP + mul.w VF05,VF07,VF07 NOP + mul.xyz VF18,VF12,VF16 NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 mr32.xyw VF16,VF17 ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) + mul.w VF06,VF06,VF06 move.xyz VF17,VF15 + maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 + addax.w ACC,VF16,VF16x NOP + mulz.xyz VF15,VF10,VF14z ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0 + mula.xyz ACC,VF17,VF19 iaddiu VI06,VI06,0x000000f6 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF09,VF00,VF16y iaddiu VI06,VI06,0x00000003 + maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) + mul.w VF07,VF06,VF06 NOP + maddaw.xyz ACC,VF11,VF05w NOP + maxx.w VF06,VF09,VF00x NOP + mul.xyz VF17,VF13,VF16 NOP + mul.w VF05,VF07,VF07 NOP + madd.xyz VF20,VF09,VF05 lq.xyz VF19,-11(VI06) + mul.w VF06,VF06,VF06 NOP + mul.xyz VF18,VF12,VF16 NOP + mul.w VF05,VF05,VF05 NOP + add.xyz VF20,VF19,VF20 mr32.xyw VF16,VF17 + mul.w VF06,VF06,VF06 move.xyz VF17,VF15 + adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) + maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 + addax.w ACC,VF16,VF16x sq.xyz VF20,-11(VI06) + mulz.xyz VF15,VF10,VF14z ibne VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + mula.xyz ACC,VF17,VF19 NOP +EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 26 [38 0] 38 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF09,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF13,VF09,VF05 lq.xyz VF12,-8(VI06) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF07,VF07 NOP + maxx.z VF16,VF16,VF00x NOP + add.xyz VF14,VF12,VF13 lq.xyz VF13,-5(VI04) + mul.w VF06,VF06,VF06 move.xyz VF12,VF15 + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF15,VF10,VF16z NOP + NOP sq.xyz VF14,-8(VI06) + mul.w VF07,VF06,VF06 NOP + mula.xyz ACC,VF12,VF13 NOP + maddaw.xyz ACC,VF11,VF05w move.xyz VF15,VF15 + madd.xyz VF16,VF09,VF05 lq.xyz VF10,-5(VI06) + mul.w VF05,VF07,VF07 NOP + add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF15,VF10 sq.xyz VF16,-5(VI06) ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF11,VF09,VF05 lq.xyz VF09,-2(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF09,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 23 [38 0] 38 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF09,VF00,VF16y lq.xyz VF13,-5(VI04) + NOP NOP + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + maxx.z VF16,VF16,VF00x NOP + mul.w VF06,VF06,VF06 move.xyz VF12,VF15 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF15,VF10,VF16z NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF12,VF13 NOP + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF16,VF09,VF05 lq.xyz VF10,238(VI06) + mul.w VF05,VF07,VF07 move.xyz VF15,VF15 + add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF15,VF10 sq.xyz VF16,238(VI06) ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF11,VF09,VF05 lq.xyz VF09,241(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF09,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 15 [38 0] 38 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF09,VF00,VF16y NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + maxx.z VF16,VF16,VF00x NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 + mulz.xyz VF10,VF10,VF16z lq.xyz VF16,-1(VI04) + mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP move.xyz VF10,VF10 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF10,VF16 NOP + maddaw.xyz ACC,VF11,VF05w NOP ; STALL_LATENCY ?2 + madd.xyz VF11,VF09,VF05 lq.xyz VF09,238(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_quad_pp4_vcl_15] + maxw.z VF08,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 36 [43 0] 50 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF09,71(VI00) + NOP lq.xyz VF10,3(VI03) + NOP lq.xyz VF12,72(VI00) + NOP lq.xyz VF13,73(VI00) + mulax.xyz ACC,VF09,VF10x lq.xyz VF09,74(VI00) ; STALL_LATENCY ?1 + madday.xyz ACC,VF12,VF10y xtop VI06 + maddaz.xyz ACC,VF13,VF10z iaddiu VI04,VI06,0x00000005 + maddw.xyz VF13,VF09,VF00w lq.xyz VF16,0(VI04) ; STALL_LATENCY ?1 + sub.xyz VF16,VF13,VF16 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF15,VF16,VF16 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 + adday.z ACC,VF15,VF15y lq.xyz VF10,1(VI03) ; STALL_LATENCY ?3 + maddx.z VF15,VF08,VF15x lq.xyz VF09,0(VI03) + NOP sqrt Q,VF15z ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF15,VF00,Q iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP div Q,VF00w,VF15y + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF15,VF00,VF00w lq.xyz VF12,5(VI03) + mulq.xyz VF16,VF16,Q iaddiu VI04,VI04,0x00000004 + NOP iaddiu VI06,VI06,0 + mul.xyz VF11,VF11,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF15,VF15,VF12 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF17,VF07,VF16 lq.xyz VF18,0(VI04) + sub.xyz VF18,VF13,VF18 esadd P,VF17 ; STALL_LATENCY ?3 + mul.xyz VF20,VF18,VF18 lq.xyz VF19,-3(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF20,VF20y NOP ; STALL_LATENCY ?3 + maddx.z VF20,VF08,VF20x NOP + NOP waitp + addw.x VF20,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF20z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF20,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF20y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 + mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0 + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + NOP move.xyz VF18,VF17 + NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF20x mfp.w VF06,P +; _LNOPT_w=[ ] 31 [38 0] 39 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) + NOP NOP + NOP NOP + mulw.xyz VF21,VF18,VF06w NOP + sub.xyz VF18,VF13,VF22 NOP + madday.w ACC,VF00,VF20y esadd P,VF17 + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF19,VF21,VF19 NOP + mul.xyz VF20,VF18,VF18 NOP + maxx.w VF05,VF06,VF00x NOP ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF19x NOP + adday.z ACC,VF20,VF20y NOP + maddx.z VF20,VF08,VF20x NOP + madday.w ACC,VF00,VF19y NOP + maddz.w VF07,VF00,VF19z waitp + mulw.xyz VF21,VF10,VF05w mfp.w VF06,P + mulax.w ACC,VF00,VF14x sqrt Q,VF20z + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + addw.x VF20,VF00,VF00w ersqrt P,VF06w + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + addq.y VF20,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF20y ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF14y lq.xyz VF19,-3(VI04) + maddz.w VF05,VF00,VF14z NOP + mul.w VF06,VF06,VF06 lq.xyz VF22,-5(VI04) ; STALL_LATENCY ?1 + mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 + mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0x000000f6 + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + mul.w VF07,VF06,VF06 move.xyz VF18,VF17 + mula.xyz ACC,VF21,VF22 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF20x mfp.w VF06,P +EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) + mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 + madday.w ACC,VF00,VF20y NOP + mulw.xyz VF21,VF18,VF06w NOP + sub.xyz VF18,VF13,VF22 esadd P,VF17 + maddaw.xyz ACC,VF11,VF07w NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF21,VF21,VF19 NOP + mul.xyz VF20,VF18,VF18 NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF21x NOP + adday.z ACC,VF20,VF20y NOP + maddx.z VF20,VF08,VF20x NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF07,VF00,VF21z mfp.w VF06,P + NOP lq.xyz VF22,-11(VI06) + mulq.xyz VF19,VF19,Q sqrt Q,VF20z + mulw.xyz VF21,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP + mulax.w ACC,VF00,VF14x ersqrt P,VF06w + add.xyz VF19,VF22,VF19 NOP + addw.x VF20,VF00,VF00w NOP + mul.w VF06,VF05,VF05 NOP + addq.y VF20,VF00,Q lq.xyz VF22,-5(VI04) + madday.w ACC,VF00,VF14y sq.xyz VF19,-11(VI06) + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mul.w VF06,VF06,VF06 lq.xyz VF19,-3(VI04) + mul.xyz VF15,VF20,VF12 div Q,VF00w,VF20y + mula.xyz ACC,VF21,VF22 NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF20,VF16,VF19 NOP + NOP NOP + NOP NOP + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + mul.w VF07,VF06,VF06 move.xyz VF18,VF17 + NOP ibne VI04,VI07,EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF20x mfp.w VF06,P +EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF12,VF07,VF16 NOP + mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + madday.w ACC,VF00,VF20y NOP + mul.w VF07,VF07,VF07 NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF20,VF18,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF10,VF05w NOP + madd.xyz VF19,VF09,VF05 ersqrt P,VF06w + maxx.w VF05,VF07,VF00x waitq ; STALL_LATENCY ?1 + mulq.xyz VF19,VF19,Q lq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF14x sq.xyz VF19,-8(VI06) ; STALL_LATENCY ?1 + mula.xyz ACC,VF20,VF18 lq.xyz VF19,-3(VI04) + mul.w VF06,VF06,VF06 NOP + mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 ; STALL_LATENCY ?2 + mul.w VF07,VF06,VF06 mfp.w VF06,P + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mulax.w ACC,VF00,VF20x NOP + mulw.xyz VF12,VF18,VF06w NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF12,VF12,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF07,VF00,VF12z NOP + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF19,Q NOP + mulax.w ACC,VF00,VF14x lq.xyz VF12,-5(VI06) + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z NOP + add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) + mul.w VF07,VF07,VF07 NOP + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF12 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 sq.xyz VF19,-5(VI06) + mulq.xyz VF19,VF19,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 + add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF19,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 52 [93 0] 93 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF12,VF07,VF16 NOP + mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 + mul.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF19x mfp.w VF06,P + madday.w ACC,VF00,VF19y NOP + maddz.w VF07,VF00,VF19z lq.xyz VF19,-3(VI04) + mulw.xyz VF20,VF10,VF05w NOP + mulax.w ACC,VF00,VF14x ersqrt P,VF06w + madday.w ACC,VF00,VF14y NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF20,VF18 NOP + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mulax.w ACC,VF00,VF20x NOP + mulw.xyz VF12,VF18,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF12,VF12,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF07,VF00,VF12z NOP + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF19,Q NOP + mulax.w ACC,VF00,VF14x lq.xyz VF12,238(VI06) + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z NOP + add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) + mul.w VF07,VF07,VF07 NOP + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF12 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 sq.xyz VF19,238(VI06) + mulq.xyz VF19,VF19,Q lq.xyz VF09,241(VI06) ; STALL_LATENCY ?3 + add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF19,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF12,VF07,VF16 lq.xyz VF13,-3(VI04) + mul.xyz VF16,VF16,VF13 esadd P,VF12 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x move.xyz VF15,VF15 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF16y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF12,VF12 + NOP mfp.w VF06,P + mulw.xyz VF12,VF12,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF16z NOP + mul.xyz VF16,VF12,VF13 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF16x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF16y NOP + maddz.w VF07,VF00,VF16z lq.xyz VF16,-1(VI04) + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF10,VF16 NOP + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF15y NOP + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?1 + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF09,VF09,VF05 NOP + mulq.xyz VF09,VF09,Q lq.xyz VF11,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF09,VF11,VF09 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pv_diff_quad_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pv_diff_quad_pp4_vcl_23] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralPVDiffQuad_CodeEnd: +; iCount=613 +; register stats: +; 13 VU User integer +; 24 VU User floating point diff --git a/vu1/sce_general_pv_diff_tri_vcl.vsm b/vu1/sce_general_pv_diff_tri_vcl.vsm new file mode 100644 index 00000000..7dab5c3c --- /dev/null +++ b/vu1/sce_general_pv_diff_tri_vcl.vsm @@ -0,0 +1,688 @@ +; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === hDown : optimal=35 clid=0 mlid=2 size=(36) +; === dUp : optimal=35 clid=0 mlid=1 size=(36) +; === normal : optimal=35 clid=0 mlid=1 size=(36) +; === another : optimal=35 clid=0 mlid=2 size=(36) +; === normal2 : optimal=35 clid=0 mlid=2 size=(36) +; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=4 mlid=4 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=4 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=6 mlid=6 size=(18) +; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === hDown : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === normal : optimal=34 clid=0 mlid=4 size=(39) +; === another : optimal=34 clid=0 mlid=4 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=4 size=(39) +; === normal2 : optimal=34 clid=0 mlid=5 size=(39) +; === __LP__ EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralPVDiffTri_CodeStart + .global vsmGeneralPVDiffTri_CodeEnd +vsmGeneralPVDiffTri_CodeStart: +__v_vu1_general_pv_diff_tri_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pv_diff_tri_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ vuta1 ] 49 [49 0] 49 [main_loop_lid] + NOP lq.w VF05,57(VI00) + NOP loi 0x44fff000 + NOP xtop VI05 + NOP iaddiu VI03,VI05,0x00000005 + NOP mr32.z VF05,VF05 + NOP iaddiu VI04,VI05,0x000000ed + NOP ilw.x VI05,0(VI05) + NOP lq VF06,75(VI00) + NOP lq.xyz VF09,57(VI00) + NOP lq.xyz VF08,59(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP ilw.w VI07,0(VI00) + NOP iadd VI06,VI03,VI05 + mul.xyz VF09,VF09,VF08 lq.xyz VF08,4(VI03) + NOP iadd VI06,VI06,VI05 + NOP iadd VI06,VI06,VI05 + NOP mtir VI08,VF06x + mulax ACC,VF01,VF08x ior VI08,VI08,VI05 + madday ACC,VF02,VF08y mfir.x VF06,VI08 + maddaz ACC,VF03,VF08z iadd VI06,VI06,VI05 + maddw VF15,VF04,VF00w lq.xyz VF14,0(VI03) + NOP lq.xyz VF07,58(VI00) + NOP sq VF06,-1(VI04) + NOP iaddiu VI08,VI00,0x00007fff + mulax ACC,VF01,VF14x div Q,VF00w,VF15w + madday ACC,VF02,VF14y lq.xyz VF08,8(VI03) + maddaz ACC,VF03,VF14z iaddiu VI08,VI08,0x00000001 + maddw VF14,VF04,VF00w ilw.w VI02,76(VI00) + add.xyz VF09,VF07,VF09 lq.xyz VF06,76(VI00) + mulax ACC,VF01,VF08x lq.xyz VF07,6(VI03) + madday ACC,VF02,VF08y lq.w VF08,0(VI00) + maddaz ACC,VF03,VF08z div Q,VF00w,VF14w + mulq.xyz VF08,VF15,Q fcset 0 + mulq.xyz VF07,VF07,Q iaddiu VI09,VI03,0 + maddw VF12,VF04,VF00w iaddiu VI10,VI04,0 + NOP sq.xyz VF09,1(VI04) + add.xyz VF10,VF08,VF05 lq.xyz VF15,2(VI03) + maxi.w VF07,VF00,I sq.xyz VF07,3(VI04) + mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w + mul.xyz VF13,VF08,VF06 mfir.w VF10,VI08 + ftoi4.xyz VF10,VF10 lq.xyz VF11,10(VI03) + mulq.xyz VF15,VF15,Q sq.xyz VF09,4(VI04) + sub.xyz VF14,VF07,VF08 sq.xyz VF09,7(VI04) + mul.xyz VF16,VF07,VF06 iaddiu VI03,VI09,0 + add.xyz VF10,VF07,VF05 sq VF10,5(VI04) + mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x0000000c + mulq.xyz VF15,VF11,Q sq.xyz VF15,0(VI04) + clipw.xyz VF16xyz,VF07w ibeq VI03,VI06,EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ dUp ] 36 [35 35] 36 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP] + ftoi4.xyz VF12,VF10 lq.xyz VF11,4(VI03) + sub.xyz VF10,VF07,VF08 NOP + mul.xyz VF08,VF07,VF06 iaddiu VI04,VI10,0x00000009 + mulw.xyz VF13,VF14,VF08w lq.xyz VF14,0(VI03) + mulax ACC,VF01,VF11x sq.xyz VF15,6(VI09) + madday ACC,VF02,VF11y mfir.w VF12,VI08 + maddaz ACC,VF03,VF11z NOP + maddw VF15,VF04,VF00w NOP + mulax ACC,VF01,VF14x lq.xyz VF11,8(VI03) + madday ACC,VF02,VF14y sq VF12,2(VI09) + maddaz ACC,VF03,VF14z iaddiu VI11,VI03,0 + maddw VF14,VF04,VF00w div Q,VF00w,VF15w + mulax ACC,VF01,VF11x lq.xyz VF16,6(VI03) + madday ACC,VF02,VF11y iaddiu VI10,VI04,0 + maddaz ACC,VF03,VF11z sq.xyz VF09,1(VI04) + maddw VF12,VF04,VF00w lq.xyz VF11,10(VI03) + clipw.xyz VF08xyz,VF07w sq.xyz VF09,4(VI04) + opmula.xyz ACCxyz,VF13xyz,VF10xyz mfir.w VF10,VI08 + mulq.xyz VF08,VF15,Q div Q,VF00w,VF14w + mulq.xyz VF16,VF16,Q lq.xyz VF15,2(VI03) + opmsub.xyz VF00xyz,VF10xyz,VF13xyz fcand VI01,262143 + NOP iand VI03,VI01,VI02 + mul.xyz VF13,VF08,VF06 sq.xyz VF09,7(VI04) + add.xyz VF10,VF08,VF05 sq.xyz VF16,3(VI04) + add.xyz VF17,VF07,VF05 fmand VI01,VI07 + mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w + mulq.xyz VF16,VF15,Q ior VI01,VI03,VI01 + ftoi4.xyz VF10,VF10 iaddiu VI03,VI11,0 + ftoi4.xyz VF15,VF17 iaddiu VI11,VI01,0x00007fff + sub.xyz VF14,VF07,VF08 mfir.w VF15,VI11 + mul.xyz VF16,VF07,VF06 sq.xyz VF16,0(VI04) + add.xyz VF10,VF07,VF05 sq VF10,5(VI04) + mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x0000000c + mulq.xyz VF15,VF11,Q sq VF15,8(VI09) + clipw.xyz VF16xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP + clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 20 [17 0] 21 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_xform_loop_lid__EPI0] + ftoi4.xyz VF09,VF10 NOP + sub.xyz VF10,VF07,VF08 mfir.w VF09,VI08 + mul.xyz VF08,VF07,VF06 NOP + mulw.xyz VF14,VF14,VF08w NOP + add.xyz VF07,VF07,VF05 sq.xyz VF15,6(VI09) + clipw.xyz VF08xyz,VF07w lq.xyz VF06,61(VI00) ; STALL_LATENCY ?1 + opmula.xyz ACCxyz,VF14xyz,VF10xyz lq.xyz VF05,59(VI00) + opmsub.xyz VF11xyz,VF10xyz,VF14xyz sq VF09,2(VI09) + ftoi4.xyz VF15,VF07 lq.xyz VF07,66(VI00) + NOP fcand VI01,262143 + NOP iand VI02,VI01,VI02 + abs.xyz VF00,VF11 fmand VI07,VI07 + NOP ior VI02,VI02,VI07 + NOP iaddiu VI02,VI02,0x00007fff + NOP ilw.x VI01,0(VI00) + NOP mfir.w VF15,VI02 + NOP iaddiu VI02,VI00,0x00000001 + NOP ilw.x VI03,0(VI02) + NOP ibeq VI01,VI00,pt_lights_lid + NOP sq VF15,8(VI09) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_tri_pp4_vcl_9] + maxw.z VF08,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 28 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF11,67(VI00) + NOP lq.xyz VF12,3(VI03) + NOP lq.xyz VF13,68(VI00) + NOP lq.xyz VF09,69(VI00) + mulax.xyz ACC,VF11,VF12x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF13,VF12y ilw.x VI05,0(VI06) + maddz.xyz VF12,VF09,VF12z iaddiu VI04,VI06,0x00000005 + NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 + add.xyz VF13,VF07,VF12 lq.xyz VF10,2(VI03) + NOP esadd P,VF13 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF13,VF13,VF05w lq.xyz VF16,1(VI04) ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + mul.xyz VF14,VF13,VF16 iadd VI07,VI07,VI05 ; STALL_LATENCY ?2 + mul.xyz VF15,VF12,VF16 iadd VI07,VI07,VI05 + mul.xyz VF11,VF10,VF06 lq.xyz VF10,1(VI03) + NOP lq.xyz VF09,0(VI03) + NOP mr32.xyw VF16,VF14 + adday.z ACC,VF15,VF15y iaddiu VI04,VI04,0x00000004 + maddx.z VF16,VF08,VF15x iaddiu VI06,VI06,0 + NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 11 [13 0] 14 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF09,VF00,VF16y NOP + maxx.z VF15,VF16,VF00x lq.xyz VF16,1(VI04) + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 + mul.xyz VF14,VF13,VF16 NOP + mul.xyz VF17,VF12,VF16 NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulz.xyz VF15,VF10,VF15z mr32.xyw VF16,VF14 + adday.z ACC,VF17,VF17y iaddiu VI04,VI04,0x00000004 + maddx.z VF16,VF08,VF17x NOP + mul.w VF06,VF06,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1 + addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 15 [15 0] 17 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF09,VF00,VF16y NOP + maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF17,VF13,VF16 NOP + mul.w VF05,VF07,VF07 NOP + mul.xyz VF18,VF12,VF16 NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 mr32.xyw VF16,VF17 ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) + mul.w VF06,VF06,VF06 move.xyz VF17,VF15 + maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 + addax.w ACC,VF16,VF16x NOP + mulz.xyz VF15,VF10,VF14z ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0 + mula.xyz ACC,VF17,VF19 iaddiu VI06,VI06,0x000000f6 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF09,VF00,VF16y iaddiu VI06,VI06,0x00000003 + maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) + mul.w VF07,VF06,VF06 NOP + maddaw.xyz ACC,VF11,VF05w NOP + maxx.w VF06,VF09,VF00x NOP + mul.xyz VF17,VF13,VF16 NOP + mul.w VF05,VF07,VF07 NOP + madd.xyz VF20,VF09,VF05 lq.xyz VF19,-11(VI06) + mul.w VF06,VF06,VF06 NOP + mul.xyz VF18,VF12,VF16 NOP + mul.w VF05,VF05,VF05 NOP + add.xyz VF20,VF19,VF20 mr32.xyw VF16,VF17 + mul.w VF06,VF06,VF06 move.xyz VF17,VF15 + adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) + maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 + addax.w ACC,VF16,VF16x sq.xyz VF20,-11(VI06) + mulz.xyz VF15,VF10,VF14z ibne VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + mula.xyz ACC,VF17,VF19 NOP +EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 26 [38 0] 38 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF09,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF13,VF09,VF05 lq.xyz VF12,-8(VI06) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF07,VF07 NOP + maxx.z VF16,VF16,VF00x NOP + add.xyz VF14,VF12,VF13 lq.xyz VF13,-5(VI04) + mul.w VF06,VF06,VF06 move.xyz VF12,VF15 + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF15,VF10,VF16z NOP + NOP sq.xyz VF14,-8(VI06) + mul.w VF07,VF06,VF06 NOP + mula.xyz ACC,VF12,VF13 NOP + maddaw.xyz ACC,VF11,VF05w move.xyz VF15,VF15 + madd.xyz VF16,VF09,VF05 lq.xyz VF10,-5(VI06) + mul.w VF05,VF07,VF07 NOP + add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF15,VF10 sq.xyz VF16,-5(VI06) ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF11,VF09,VF05 lq.xyz VF09,-2(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF09,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 23 [38 0] 38 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF09,VF00,VF16y lq.xyz VF13,-5(VI04) + NOP NOP + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + maxx.z VF16,VF16,VF00x NOP + mul.w VF06,VF06,VF06 move.xyz VF12,VF15 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF15,VF10,VF16z NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF12,VF13 NOP + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF16,VF09,VF05 lq.xyz VF10,238(VI06) + mul.w VF05,VF07,VF07 move.xyz VF15,VF15 + add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF15,VF10 sq.xyz VF16,238(VI06) ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF11,VF09,VF05 lq.xyz VF09,241(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF09,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 15 [38 0] 38 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF09,VF00,VF16y NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + maxx.z VF16,VF16,VF00x NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 + mulz.xyz VF10,VF10,VF16z lq.xyz VF16,-1(VI04) + mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP move.xyz VF10,VF10 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF10,VF16 NOP + maddaw.xyz ACC,VF11,VF05w NOP ; STALL_LATENCY ?2 + madd.xyz VF11,VF09,VF05 lq.xyz VF09,238(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_tri_pp4_vcl_15] + maxw.z VF08,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 36 [43 0] 50 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF09,71(VI00) + NOP lq.xyz VF10,3(VI03) + NOP lq.xyz VF12,72(VI00) + NOP lq.xyz VF13,73(VI00) + mulax.xyz ACC,VF09,VF10x lq.xyz VF09,74(VI00) ; STALL_LATENCY ?1 + madday.xyz ACC,VF12,VF10y xtop VI06 + maddaz.xyz ACC,VF13,VF10z iaddiu VI04,VI06,0x00000005 + maddw.xyz VF13,VF09,VF00w lq.xyz VF16,0(VI04) ; STALL_LATENCY ?1 + sub.xyz VF16,VF13,VF16 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF15,VF16,VF16 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 + adday.z ACC,VF15,VF15y lq.xyz VF10,1(VI03) ; STALL_LATENCY ?3 + maddx.z VF15,VF08,VF15x lq.xyz VF09,0(VI03) + NOP sqrt Q,VF15z ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF15,VF00,Q iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP div Q,VF00w,VF15y + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF15,VF00,VF00w lq.xyz VF12,5(VI03) + mulq.xyz VF16,VF16,Q iaddiu VI04,VI04,0x00000004 + NOP iaddiu VI06,VI06,0 + mul.xyz VF11,VF11,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF15,VF15,VF12 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF17,VF07,VF16 lq.xyz VF18,0(VI04) + sub.xyz VF18,VF13,VF18 esadd P,VF17 ; STALL_LATENCY ?3 + mul.xyz VF20,VF18,VF18 lq.xyz VF19,-3(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF20,VF20y NOP ; STALL_LATENCY ?3 + maddx.z VF20,VF08,VF20x NOP + NOP waitp + addw.x VF20,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF20z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF20,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF20y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 + mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0 + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + NOP move.xyz VF18,VF17 + NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF20x mfp.w VF06,P +; _LNOPT_w=[ ] 31 [38 0] 39 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) + NOP NOP + NOP NOP + mulw.xyz VF21,VF18,VF06w NOP + sub.xyz VF18,VF13,VF22 NOP + madday.w ACC,VF00,VF20y esadd P,VF17 + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF19,VF21,VF19 NOP + mul.xyz VF20,VF18,VF18 NOP + maxx.w VF05,VF06,VF00x NOP ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF19x NOP + adday.z ACC,VF20,VF20y NOP + maddx.z VF20,VF08,VF20x NOP + madday.w ACC,VF00,VF19y NOP + maddz.w VF07,VF00,VF19z waitp + mulw.xyz VF21,VF10,VF05w mfp.w VF06,P + mulax.w ACC,VF00,VF14x sqrt Q,VF20z + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + addw.x VF20,VF00,VF00w ersqrt P,VF06w + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + addq.y VF20,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF20y ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF14y lq.xyz VF19,-3(VI04) + maddz.w VF05,VF00,VF14z NOP + mul.w VF06,VF06,VF06 lq.xyz VF22,-5(VI04) ; STALL_LATENCY ?1 + mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 + mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0x000000f6 + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + mul.w VF07,VF06,VF06 move.xyz VF18,VF17 + mula.xyz ACC,VF21,VF22 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF20x mfp.w VF06,P +EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) + mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 + madday.w ACC,VF00,VF20y NOP + mulw.xyz VF21,VF18,VF06w NOP + sub.xyz VF18,VF13,VF22 esadd P,VF17 + maddaw.xyz ACC,VF11,VF07w NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF21,VF21,VF19 NOP + mul.xyz VF20,VF18,VF18 NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF21x NOP + adday.z ACC,VF20,VF20y NOP + maddx.z VF20,VF08,VF20x NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF07,VF00,VF21z mfp.w VF06,P + NOP lq.xyz VF22,-11(VI06) + mulq.xyz VF19,VF19,Q sqrt Q,VF20z + mulw.xyz VF21,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP + mulax.w ACC,VF00,VF14x ersqrt P,VF06w + add.xyz VF19,VF22,VF19 NOP + addw.x VF20,VF00,VF00w NOP + mul.w VF06,VF05,VF05 NOP + addq.y VF20,VF00,Q lq.xyz VF22,-5(VI04) + madday.w ACC,VF00,VF14y sq.xyz VF19,-11(VI06) + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mul.w VF06,VF06,VF06 lq.xyz VF19,-3(VI04) + mul.xyz VF15,VF20,VF12 div Q,VF00w,VF20y + mula.xyz ACC,VF21,VF22 NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF20,VF16,VF19 NOP + NOP NOP + NOP NOP + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + mul.w VF07,VF06,VF06 move.xyz VF18,VF17 + NOP ibne VI04,VI07,EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF20x mfp.w VF06,P +EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF12,VF07,VF16 NOP + mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + madday.w ACC,VF00,VF20y NOP + mul.w VF07,VF07,VF07 NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF20,VF18,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF10,VF05w NOP + madd.xyz VF19,VF09,VF05 ersqrt P,VF06w + maxx.w VF05,VF07,VF00x waitq ; STALL_LATENCY ?1 + mulq.xyz VF19,VF19,Q lq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF14x sq.xyz VF19,-8(VI06) ; STALL_LATENCY ?1 + mula.xyz ACC,VF20,VF18 lq.xyz VF19,-3(VI04) + mul.w VF06,VF06,VF06 NOP + mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 ; STALL_LATENCY ?2 + mul.w VF07,VF06,VF06 mfp.w VF06,P + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mulax.w ACC,VF00,VF20x NOP + mulw.xyz VF12,VF18,VF06w NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF12,VF12,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF07,VF00,VF12z NOP + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF19,Q NOP + mulax.w ACC,VF00,VF14x lq.xyz VF12,-5(VI06) + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z NOP + add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) + mul.w VF07,VF07,VF07 NOP + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF12 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 sq.xyz VF19,-5(VI06) + mulq.xyz VF19,VF19,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 + add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF19,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 52 [93 0] 93 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF12,VF07,VF16 NOP + mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 + mul.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF19x mfp.w VF06,P + madday.w ACC,VF00,VF19y NOP + maddz.w VF07,VF00,VF19z lq.xyz VF19,-3(VI04) + mulw.xyz VF20,VF10,VF05w NOP + mulax.w ACC,VF00,VF14x ersqrt P,VF06w + madday.w ACC,VF00,VF14y NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF20,VF18 NOP + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mulax.w ACC,VF00,VF20x NOP + mulw.xyz VF12,VF18,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF12,VF12,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF07,VF00,VF12z NOP + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF19,Q NOP + mulax.w ACC,VF00,VF14x lq.xyz VF12,238(VI06) + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z NOP + add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) + mul.w VF07,VF07,VF07 NOP + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF12 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 sq.xyz VF19,238(VI06) + mulq.xyz VF19,VF19,Q lq.xyz VF09,241(VI06) ; STALL_LATENCY ?3 + add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF19,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF12,VF07,VF16 lq.xyz VF13,-3(VI04) + mul.xyz VF16,VF16,VF13 esadd P,VF12 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x move.xyz VF15,VF15 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF16y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF12,VF12 + NOP mfp.w VF06,P + mulw.xyz VF12,VF12,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF16z NOP + mul.xyz VF16,VF12,VF13 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF16x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF16y NOP + maddz.w VF07,VF00,VF16z lq.xyz VF16,-1(VI04) + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF10,VF16 NOP + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF15y NOP + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?1 + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF09,VF09,VF05 NOP + mulq.xyz VF09,VF09,Q lq.xyz VF11,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF09,VF11,VF09 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pv_diff_tri_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pv_diff_tri_pp4_vcl_23] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralPVDiffTri_CodeEnd: +; iCount=586 +; register stats: +; 12 VU User integer +; 23 VU User floating point diff --git a/vu1/sce_general_pv_diff_vcl.vsm b/vu1/sce_general_pv_diff_vcl.vsm new file mode 100644 index 00000000..cf088de1 --- /dev/null +++ b/vu1/sce_general_pv_diff_vcl.vsm @@ -0,0 +1,769 @@ +; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=22 clid=0 mlid=2 size=(22) +; === normal1 : optimal=22 clid=0 mlid=2 size=(22) +; === vuta : optimal=22 clid=0 mlid=2 size=(22) +; === dUp : optimal=22 clid=0 mlid=2 size=(22) +; === vuta1 : optimal=22 clid=0 mlid=2 size=(22) +; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=4 mlid=4 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=4 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=6 mlid=6 size=(18) +; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === hDown : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === normal : optimal=34 clid=0 mlid=4 size=(39) +; === another : optimal=34 clid=0 mlid=4 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=4 size=(39) +; === normal2 : optimal=34 clid=0 mlid=5 size=(39) +; === __LP__ EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralPVDiff_CodeStart + .global vsmGeneralPVDiff_CodeEnd +vsmGeneralPVDiff_CodeStart: +__v_vu1_general_pv_diff_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pv_diff_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 27 [27 0] 27 [main_loop_lid] + NOP loi 0x44fff000 + NOP lq.w VF06,57(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI01 + NOP iaddiu VI03,VI01,0x00000005 + NOP mr32.z VF05,VF06 + NOP iaddiu VI04,VI01,0 + NOP ilw.x VI05,0(VI01) + NOP fcset 0 + NOP lq VF08,75(VI00) + NOP lq.xyz VF06,76(VI00) + NOP iadd VI06,VI03,VI05 + NOP iadd VI06,VI06,VI05 + NOP iadd VI06,VI06,VI05 + NOP mtir VI07,VF08x + NOP ior VI07,VI07,VI05 + NOP mfir.x VF08,VI07 + NOP iaddiu VI07,VI01,0x00000001 + NOP ilw.w VI02,76(VI00) + NOP iadd VI06,VI06,VI05 + NOP sq VF08,236(VI04) + NOP iaddiu VI01,VI01,0x00000005 + NOP iaddiu VI08,VI07,0x00000004 + NOP iaddiu VI09,VI00,0x000003ff + NOP iaddiu VI10,VI00,0x00000800 + NOP iaddiu VI11,VI00,0x00000400 + maxi.w VF07,VF00,I iaddiu VI12,VI00,0x00000020 +adcLoop_lid: +; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] + NOP lq VF08,0(VI07) + ftoi0 VF08,VF08 NOP ; STALL_LATENCY ?3 + NOP mtir VI13,VF08x ; STALL_LATENCY ?3 + NOP iand VI14,VI13,VI11 + NOP NOP + NOP ibeq VI14,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pv_diff_pp4_vcl_8] + NOP iand VI14,VI13,VI10 + NOP iand VI13,VI13,VI09 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP isw.w VI14,4(VI13) + NOP mtir VI14,VF08y + NOP iand VI13,VI14,VI11 + NOP NOP + NOP ibeq VI13,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pv_diff_pp4_vcl_9] + NOP iand VI13,VI14,VI10 + NOP iand VI14,VI14,VI09 + NOP iadd VI14,VI14,VI01 + NOP isw.w VI12,0(VI14) + NOP isw.w VI13,4(VI14) + NOP mtir VI13,VF08z + NOP iand VI14,VI13,VI11 + NOP NOP + NOP ibeq VI14,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pv_diff_pp4_vcl_10] + NOP iand VI14,VI13,VI10 + NOP iand VI13,VI13,VI09 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP isw.w VI14,4(VI13) + NOP mtir VI14,VF08w + NOP iand VI13,VI14,VI11 + NOP NOP + NOP ibeq VI13,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_general_pv_diff_pp4_vcl_11] + NOP iand VI13,VI14,VI09 + NOP iaddiu VI07,VI07,0x00000001 + NOP iand VI14,VI14,VI10 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP ibne VI07,VI08,adcLoop_lid + NOP isw.w VI14,4(VI13) +adcLoop_done_lid: +; _LNOPT_w=[ normal2 ] 13 [19 0] 21 [adcLoop_done_lid] + NOP lq.xyz VF12,0(VI03) + mulax ACC,VF01,VF12x iaddiu VI07,VI00,0x00000020 ; STALL_LATENCY ?3 + madday ACC,VF02,VF12y lq.xyz VF10,57(VI00) + maddaz ACC,VF03,VF12z lq.xyz VF11,59(VI00) + maddw VF12,VF04,VF00w lq.w VF05,0(VI00) + mul.xyz VF10,VF10,VF11 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF12w + add.xyz VF07,VF07,VF10 ilw.w VI08,0(VI00) ; STALL_LATENCY ?2 + NOP iaddiu VI03,VI03,0x00000004 + NOP lq.xyz VF11,-2(VI03) + mulq.xyz VF12,VF12,Q sq.xyz VF07,238(VI04) ; STALL_LATENCY ?1 + max.xyz VF09,VF00,VF00 ibeq VI03,VI06,EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI1 + max.xyz VF08,VF00,VF00 iaddiu VI04,VI04,0 +; _LNOPT_w=[ ] 21 [19 0] 22 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__PRO1] + NOP NOP + add.xyz VF10,VF12,VF05 lq.xyz VF15,0(VI03) + sub.xyz VF13,VF09,VF12 iaddiu VI04,VI04,0x000000f3 + max.xyz VF09,VF12,VF12 NOP + mul.xyz VF14,VF12,VF06 NOP + mulax ACC,VF01,VF15x NOP + madday ACC,VF02,VF15y NOP + maddaz ACC,VF03,VF15z ilw.w VI09,-4(VI03) + maddw VF12,VF04,VF00w isub VI01,VI08,VI07 + mulq.xyz VF11,VF11,Q sq.xyz VF07,-2(VI04) + opmula.xyz ACCxyz,VF13xyz,VF08xyz iaddiu VI03,VI03,0x00000004 + opmsub.xyz VF16xyz,VF08xyz,VF13xyz iand VI10,VI09,VI08 + NOP div Q,VF00w,VF12w + NOP sq.xyz VF11,-6(VI04) + abs.xyz VF00,VF16 fmand VI11,VI08 ; STALL_LATENCY ?1 + NOP lq.xyz VF11,-2(VI03) + clipw.xyz VF14xyz,VF07w isub VI11,VI11,VI07 + NOP ior VI07,VI01,VI10 + mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 + NOP ibeq VI03,VI06,EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI0 + mulw.xyz VF08,VF13,VF05w fcand VI01,262143 +EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 22 [22 22] 22 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP] + ftoi4.xyz VF14,VF10 iand VI01,VI01,VI02 + add.xyz VF10,VF12,VF05 lq.xyz VF16,0(VI03) + sub.xyz VF13,VF09,VF12 ior VI10,VI01,VI10 + max.xyz VF09,VF12,VF12 ior VI10,VI10,VI09 + mul.xyz VF15,VF12,VF06 ilw.w VI09,-4(VI03) + mulax ACC,VF01,VF16x iaddiu VI10,VI10,0x00007fff + madday ACC,VF02,VF16y mfir.w VF14,VI10 + maddaz ACC,VF03,VF16z isub VI01,VI08,VI07 + maddw VF12,VF04,VF00w iand VI10,VI09,VI08 + mulq.xyz VF11,VF11,Q sq.xyz VF07,1(VI04) + opmula.xyz ACCxyz,VF13xyz,VF08xyz sq VF14,-4(VI04) + opmsub.xyz VF00xyz,VF08xyz,VF13xyz iaddiu VI03,VI03,0x00000004 + mulw.xyz VF08,VF13,VF05w div Q,VF00w,VF12w + clipw.xyz VF15xyz,VF07w sq.xyz VF11,-3(VI04) + NOP iaddiu VI04,VI04,0x00000003 + NOP fmand VI11,VI08 + NOP lq.xyz VF11,-2(VI03) + NOP isub VI11,VI11,VI07 + NOP ior VI07,VI01,VI10 + mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 + NOP ibne VI03,VI06,EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__MAIN_LOOP + NOP fcand VI01,262143 +EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 21 [20 0] 23 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + sub.xyz VF09,VF09,VF12 NOP + mul.xyz VF07,VF12,VF06 iand VI01,VI01,VI02 + opmula.xyz ACCxyz,VF09xyz,VF08xyz ior VI10,VI01,VI10 ; STALL_LATENCY ?2 + opmsub.xyz VF14xyz,VF08xyz,VF09xyz ior VI10,VI10,VI09 + NOP ilw.w VI09,-4(VI03) + NOP iaddiu VI10,VI10,0x00007fff + clipw.xyz VF07xyz,VF07w mfir.w VF13,VI10 + abs.xyz VF00,VF14 fmand VI10,VI08 + NOP isub VI07,VI10,VI07 + ftoi4.xyz VF13,VF10 iand VI10,VI07,VI08 + add.xyz VF10,VF12,VF05 fcand VI01,262143 + NOP iand VI01,VI01,VI02 + mulq.xyz VF11,VF11,Q ior VI10,VI01,VI10 + NOP ior VI10,VI10,VI09 + ftoi4.xyz VF10,VF10 iaddiu VI10,VI10,0x00007fff + NOP mfir.w VF10,VI10 + NOP sq.xyz VF11,-3(VI04) + NOP sq VF13,-4(VI04) + NOP b EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EXIT_POINT + NOP sq VF10,-1(VI04) +EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI1: +; _LNOPT_w=[ ] 18 [20 0] 24 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EPI1] + NOP NOP + NOP NOP + sub.xyz VF09,VF09,VF12 NOP + mul.xyz VF06,VF12,VF06 NOP + opmula.xyz ACCxyz,VF09xyz,VF08xyz NOP ; STALL_LATENCY ?2 + opmsub.xyz VF10xyz,VF08xyz,VF09xyz NOP + clipw.xyz VF06xyz,VF07w ilw.w VI03,-4(VI03) + abs.xyz VF00,VF10 fmand VI01,VI08 ; STALL_LATENCY ?2 + NOP isub VI07,VI01,VI07 + NOP iand VI08,VI07,VI08 + add.xyz VF07,VF12,VF05 fcand VI01,262143 + NOP iand VI02,VI01,VI02 + NOP ior VI02,VI02,VI08 + mulq.xyz VF11,VF11,Q ior VI03,VI02,VI03 + ftoi4.xyz VF07,VF07 iaddiu VI03,VI03,0x00007fff + NOP mfir.w VF07,VI03 + NOP sq.xyz VF11,237(VI04) ; STALL_LATENCY ?1 + NOP sq VF07,239(VI04) ; STALL_LATENCY ?1 +EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 7 [7 0] 7 [EXPL_vu1_general_pv_diff_pp4_vcl_xform_loop_lid__EXIT_POINT] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,61(VI00) + NOP lq.xyz VF07,66(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_pp4_vcl_16] + maxw.z VF08,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 28 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF11,67(VI00) + NOP lq.xyz VF12,3(VI03) + NOP lq.xyz VF13,68(VI00) + NOP lq.xyz VF09,69(VI00) + mulax.xyz ACC,VF11,VF12x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF13,VF12y ilw.x VI05,0(VI06) + maddz.xyz VF12,VF09,VF12z iaddiu VI04,VI06,0x00000005 + NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 + add.xyz VF13,VF07,VF12 lq.xyz VF10,2(VI03) + NOP esadd P,VF13 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF13,VF13,VF05w lq.xyz VF16,1(VI04) ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + mul.xyz VF14,VF13,VF16 iadd VI07,VI07,VI05 ; STALL_LATENCY ?2 + mul.xyz VF15,VF12,VF16 iadd VI07,VI07,VI05 + mul.xyz VF11,VF10,VF06 lq.xyz VF10,1(VI03) + NOP lq.xyz VF09,0(VI03) + NOP mr32.xyw VF16,VF14 + adday.z ACC,VF15,VF15y iaddiu VI04,VI04,0x00000004 + maddx.z VF16,VF08,VF15x iaddiu VI06,VI06,0 + NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 11 [13 0] 14 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF09,VF00,VF16y NOP + maxx.z VF15,VF16,VF00x lq.xyz VF16,1(VI04) + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 + mul.xyz VF14,VF13,VF16 NOP + mul.xyz VF17,VF12,VF16 NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulz.xyz VF15,VF10,VF15z mr32.xyw VF16,VF14 + adday.z ACC,VF17,VF17y iaddiu VI04,VI04,0x00000004 + maddx.z VF16,VF08,VF17x NOP + mul.w VF06,VF06,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI1 + addax.w ACC,VF16,VF16x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 15 [15 0] 17 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF09,VF00,VF16y NOP + maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF17,VF13,VF16 NOP + mul.w VF05,VF07,VF07 NOP + mul.xyz VF18,VF12,VF16 NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 mr32.xyw VF16,VF17 ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) + mul.w VF06,VF06,VF06 move.xyz VF17,VF15 + maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 + addax.w ACC,VF16,VF16x NOP + mulz.xyz VF15,VF10,VF14z ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI0 + mula.xyz ACC,VF17,VF19 iaddiu VI06,VI06,0x000000f6 +EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF09,VF00,VF16y iaddiu VI06,VI06,0x00000003 + maxx.z VF14,VF16,VF00x lq.xyz VF16,1(VI04) + mul.w VF07,VF06,VF06 NOP + maddaw.xyz ACC,VF11,VF05w NOP + maxx.w VF06,VF09,VF00x NOP + mul.xyz VF17,VF13,VF16 NOP + mul.w VF05,VF07,VF07 NOP + madd.xyz VF20,VF09,VF05 lq.xyz VF19,-11(VI06) + mul.w VF06,VF06,VF06 NOP + mul.xyz VF18,VF12,VF16 NOP + mul.w VF05,VF05,VF05 NOP + add.xyz VF20,VF19,VF20 mr32.xyw VF16,VF17 + mul.w VF06,VF06,VF06 move.xyz VF17,VF15 + adday.z ACC,VF18,VF18y lq.xyz VF19,-5(VI04) + maddx.z VF16,VF08,VF18x iaddiu VI04,VI04,0x00000004 + addax.w ACC,VF16,VF16x sq.xyz VF20,-11(VI06) + mulz.xyz VF15,VF10,VF14z ibne VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + mula.xyz ACC,VF17,VF19 NOP +EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 26 [38 0] 38 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF09,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF13,VF09,VF05 lq.xyz VF12,-8(VI06) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF07,VF07 NOP + maxx.z VF16,VF16,VF00x NOP + add.xyz VF14,VF12,VF13 lq.xyz VF13,-5(VI04) + mul.w VF06,VF06,VF06 move.xyz VF12,VF15 + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF15,VF10,VF16z NOP + NOP sq.xyz VF14,-8(VI06) + mul.w VF07,VF06,VF06 NOP + mula.xyz ACC,VF12,VF13 NOP + maddaw.xyz ACC,VF11,VF05w move.xyz VF15,VF15 + madd.xyz VF16,VF09,VF05 lq.xyz VF10,-5(VI06) + mul.w VF05,VF07,VF07 NOP + add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF15,VF10 sq.xyz VF16,-5(VI06) ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF11,VF09,VF05 lq.xyz VF09,-2(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF09,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 23 [38 0] 38 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF09,VF00,VF16y lq.xyz VF13,-5(VI04) + NOP NOP + mul.w VF07,VF06,VF06 NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + maxx.z VF16,VF16,VF00x NOP + mul.w VF06,VF06,VF06 move.xyz VF12,VF15 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF15,VF10,VF16z NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF12,VF13 NOP + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF16,VF09,VF05 lq.xyz VF10,238(VI06) + mul.w VF05,VF07,VF07 move.xyz VF15,VF15 + add.xyz VF16,VF10,VF16 lq.xyz VF10,-1(VI04) ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF15,VF10 sq.xyz VF16,238(VI06) ; STALL_LATENCY ?2 + maddaw.xyz ACC,VF11,VF05w NOP + madd.xyz VF11,VF09,VF05 lq.xyz VF09,241(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF09,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 15 [38 0] 38 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF09,VF00,VF16y NOP + maxx.w VF06,VF09,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + maxx.z VF16,VF16,VF00x NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 + mulz.xyz VF10,VF10,VF16z lq.xyz VF16,-1(VI04) + mul.w VF05,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP move.xyz VF10,VF10 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF10,VF16 NOP + maddaw.xyz ACC,VF11,VF05w NOP ; STALL_LATENCY ?2 + madd.xyz VF11,VF09,VF05 lq.xyz VF09,238(VI06) + add.xyz VF09,VF09,VF11 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pv_diff_pp4_vcl_22] + maxw.z VF08,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 36 [43 0] 50 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF09,71(VI00) + NOP lq.xyz VF10,3(VI03) + NOP lq.xyz VF12,72(VI00) + NOP lq.xyz VF13,73(VI00) + mulax.xyz ACC,VF09,VF10x lq.xyz VF09,74(VI00) ; STALL_LATENCY ?1 + madday.xyz ACC,VF12,VF10y xtop VI06 + maddaz.xyz ACC,VF13,VF10z iaddiu VI04,VI06,0x00000005 + maddw.xyz VF13,VF09,VF00w lq.xyz VF16,0(VI04) ; STALL_LATENCY ?1 + sub.xyz VF16,VF13,VF16 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF15,VF16,VF16 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 + adday.z ACC,VF15,VF15y lq.xyz VF10,1(VI03) ; STALL_LATENCY ?3 + maddx.z VF15,VF08,VF15x lq.xyz VF09,0(VI03) + NOP sqrt Q,VF15z ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF15,VF00,Q iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP div Q,VF00w,VF15y + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF15,VF00,VF00w lq.xyz VF12,5(VI03) + mulq.xyz VF16,VF16,Q iaddiu VI04,VI04,0x00000004 + NOP iaddiu VI06,VI06,0 + mul.xyz VF11,VF11,VF06 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF15,VF15,VF12 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF17,VF07,VF16 lq.xyz VF18,0(VI04) + sub.xyz VF18,VF13,VF18 esadd P,VF17 ; STALL_LATENCY ?3 + mul.xyz VF20,VF18,VF18 lq.xyz VF19,-3(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF20,VF20y NOP ; STALL_LATENCY ?3 + maddx.z VF20,VF08,VF20x NOP + NOP waitp + addw.x VF20,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF20z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF20,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF20y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 + mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0 + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + NOP move.xyz VF18,VF17 + NOP ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF20x mfp.w VF06,P +; _LNOPT_w=[ ] 31 [38 0] 39 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) + NOP NOP + NOP NOP + mulw.xyz VF21,VF18,VF06w NOP + sub.xyz VF18,VF13,VF22 NOP + madday.w ACC,VF00,VF20y esadd P,VF17 + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF19,VF21,VF19 NOP + mul.xyz VF20,VF18,VF18 NOP + maxx.w VF05,VF06,VF00x NOP ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF19x NOP + adday.z ACC,VF20,VF20y NOP + maddx.z VF20,VF08,VF20x NOP + madday.w ACC,VF00,VF19y NOP + maddz.w VF07,VF00,VF19z waitp + mulw.xyz VF21,VF10,VF05w mfp.w VF06,P + mulax.w ACC,VF00,VF14x sqrt Q,VF20z + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + addw.x VF20,VF00,VF00w ersqrt P,VF06w + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + addq.y VF20,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF20y ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF14y lq.xyz VF19,-3(VI04) + maddz.w VF05,VF00,VF14z NOP + mul.w VF06,VF06,VF06 lq.xyz VF22,-5(VI04) ; STALL_LATENCY ?1 + mul.xyz VF15,VF20,VF12 move.xyz VF14,VF15 + mul.xyz VF20,VF16,VF19 iaddiu VI06,VI06,0x000000f6 + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + mul.w VF07,VF06,VF06 move.xyz VF18,VF17 + mula.xyz ACC,VF21,VF22 ibeq VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF20x mfp.w VF06,P +EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF17,VF07,VF16 lq.xyz VF22,0(VI04) + mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 + madday.w ACC,VF00,VF20y NOP + mulw.xyz VF21,VF18,VF06w NOP + sub.xyz VF18,VF13,VF22 esadd P,VF17 + maddaw.xyz ACC,VF11,VF07w NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF21,VF21,VF19 NOP + mul.xyz VF20,VF18,VF18 NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF21x NOP + adday.z ACC,VF20,VF20y NOP + maddx.z VF20,VF08,VF20x NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF07,VF00,VF21z mfp.w VF06,P + NOP lq.xyz VF22,-11(VI06) + mulq.xyz VF19,VF19,Q sqrt Q,VF20z + mulw.xyz VF21,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP + mulax.w ACC,VF00,VF14x ersqrt P,VF06w + add.xyz VF19,VF22,VF19 NOP + addw.x VF20,VF00,VF00w NOP + mul.w VF06,VF05,VF05 NOP + addq.y VF20,VF00,Q lq.xyz VF22,-5(VI04) + madday.w ACC,VF00,VF14y sq.xyz VF19,-11(VI06) + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mul.w VF06,VF06,VF06 lq.xyz VF19,-3(VI04) + mul.xyz VF15,VF20,VF12 div Q,VF00w,VF20y + mula.xyz ACC,VF21,VF22 NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF20,VF16,VF19 NOP + NOP NOP + NOP NOP + mulq.xyz VF16,VF18,Q iaddiu VI04,VI04,0x00000004 + mul.w VF07,VF06,VF06 move.xyz VF18,VF17 + NOP ibne VI04,VI07,EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF20x mfp.w VF06,P +EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF12,VF07,VF16 NOP + mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + madday.w ACC,VF00,VF20y NOP + mul.w VF07,VF07,VF07 NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF20,VF18,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF10,VF05w NOP + madd.xyz VF19,VF09,VF05 ersqrt P,VF06w + maxx.w VF05,VF07,VF00x waitq ; STALL_LATENCY ?1 + mulq.xyz VF19,VF19,Q lq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulax.w ACC,VF00,VF14x sq.xyz VF19,-8(VI06) ; STALL_LATENCY ?1 + mula.xyz ACC,VF20,VF18 lq.xyz VF19,-3(VI04) + mul.w VF06,VF06,VF06 NOP + mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 ; STALL_LATENCY ?2 + mul.w VF07,VF06,VF06 mfp.w VF06,P + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mulax.w ACC,VF00,VF20x NOP + mulw.xyz VF12,VF18,VF06w NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF12,VF12,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF07,VF00,VF12z NOP + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF19,Q NOP + mulax.w ACC,VF00,VF14x lq.xyz VF12,-5(VI06) + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z NOP + add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) + mul.w VF07,VF07,VF07 NOP + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF12 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 sq.xyz VF19,-5(VI06) + mulq.xyz VF19,VF19,Q lq.xyz VF09,-2(VI06) ; STALL_LATENCY ?3 + add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF19,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 52 [93 0] 93 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF12,VF07,VF16 NOP + mulw.xyz VF18,VF18,VF06w esadd P,VF12 ; STALL_LATENCY ?3 + mul.xyz VF19,VF18,VF19 lq.xyz VF18,-5(VI04) ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF19x mfp.w VF06,P + madday.w ACC,VF00,VF19y NOP + maddz.w VF07,VF00,VF19z lq.xyz VF19,-3(VI04) + mulw.xyz VF20,VF10,VF05w NOP + mulax.w ACC,VF00,VF14x ersqrt P,VF06w + madday.w ACC,VF00,VF14y NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF20,VF18 NOP + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.xyz VF20,VF16,VF19 move.xyz VF18,VF12 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF14z move.xyz VF14,VF15 + mulax.w ACC,VF00,VF20x NOP + mulw.xyz VF12,VF18,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF06,VF00,VF20z NOP + mul.xyz VF12,VF12,VF19 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF12x NOP + madday.w ACC,VF00,VF12y NOP + maddz.w VF07,VF00,VF12z NOP + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF19,Q NOP + mulax.w ACC,VF00,VF14x lq.xyz VF12,238(VI06) + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + madday.w ACC,VF00,VF14y NOP + maddz.w VF05,VF00,VF14z NOP + add.xyz VF19,VF12,VF19 lq.xyz VF12,-1(VI04) + mul.w VF07,VF07,VF07 NOP + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + mula.xyz ACC,VF10,VF12 NOP + maddaw.xyz ACC,VF11,VF07w NOP + madd.xyz VF19,VF09,VF05 sq.xyz VF19,238(VI06) + mulq.xyz VF19,VF19,Q lq.xyz VF09,241(VI06) ; STALL_LATENCY ?3 + add.xyz VF19,VF09,VF19 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF19,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF12,VF07,VF16 lq.xyz VF13,-3(VI04) + mul.xyz VF16,VF16,VF13 esadd P,VF12 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x move.xyz VF15,VF15 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF16y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF12,VF12 + NOP mfp.w VF06,P + mulw.xyz VF12,VF12,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF16z NOP + mul.xyz VF16,VF12,VF13 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF16x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF16y NOP + maddz.w VF07,VF00,VF16z lq.xyz VF16,-1(VI04) + mulw.xyz VF10,VF10,VF05w NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF10,VF16 NOP + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF15y NOP + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?1 + NOP div Q,VF00w,VF05w ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF11,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF09,VF09,VF05 NOP + mulq.xyz VF09,VF09,Q lq.xyz VF11,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF09,VF11,VF09 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF09,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pv_diff_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pv_diff_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pv_diff_pp4_vcl_30] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralPVDiff_CodeEnd: +; iCount=654 +; register stats: +; 15 VU User integer +; 23 VU User floating point diff --git a/vu1/sce_general_quad_vcl.vsm b/vu1/sce_general_quad_vcl.vsm new file mode 100644 index 00000000..0963cd46 --- /dev/null +++ b/vu1/sce_general_quad_vcl.vsm @@ -0,0 +1,794 @@ +; === __LP__ EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === normal1 : optimal=46 clid=1 mlid=3 size=(47) +; === __LP__ EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=10 mlid=10 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=3 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) +; === __LP__ EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) +; === __LP__ EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralQuad_CodeStart + .global vsmGeneralQuad_CodeEnd +vsmGeneralQuad_CodeStart: +__v_vu1_general_quad_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_quad_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ vuta1 ] 45 [45 0] 45 [main_loop_lid] + NOP xtop VI05 + NOP lq.xyz VF09,57(VI00) + NOP lq.xyz VF08,59(VI00) + NOP iaddiu VI03,VI05,0x00000005 + NOP lq.xyz VF20,0(VI03) + NOP lq.w VF05,57(VI00) + NOP loi 0x44fff000 + mul.xyz VF09,VF09,VF08 iaddiu VI04,VI05,0x000000ed + mulax ACC,VF01,VF20x ilw.x VI05,0(VI05) + madday ACC,VF02,VF20y lq.xyz VF08,6(VI03) + maddaz ACC,VF03,VF20z mr32.z VF05,VF05 + maddw VF20,VF04,VF00w lq VF06,75(VI00) + addi.xy VF05,VF00,I lq.xyz VF14,9(VI03) + mulax ACC,VF01,VF08x loi 0x45000000 + madday ACC,VF02,VF08y ilw.w VI07,0(VI00) + maddaz ACC,VF03,VF08z div Q,VF00w,VF20w + maddw VF16,VF04,VF00w iadd VI06,VI03,VI05 + mulax ACC,VF01,VF14x lq.xyz VF13,3(VI03) + madday ACC,VF02,VF14y mtir VI08,VF06x + maddaz ACC,VF03,VF14z ior VI08,VI08,VI05 + maddw VF14,VF04,VF00w mfir.x VF06,VI08 + mulax ACC,VF01,VF13x iadd VI06,VI06,VI05 + madday ACC,VF02,VF13y iadd VI06,VI06,VI05 + maddaz ACC,VF03,VF13z lq.xyz VF07,58(VI00) + maddw VF13,VF04,VF00w sq VF06,-1(VI04) + NOP iaddiu VI08,VI00,0x00007fff + NOP iaddiu VI08,VI08,0x00000001 + NOP ilw.w VI02,76(VI00) + NOP div Q,VF00w,VF13w + add.xyz VF09,VF07,VF09 lq.xyz VF07,7(VI03) + NOP lq.xyz VF15,2(VI03) + mulq.xyz VF08,VF20,Q lq.xyz VF06,76(VI00) + NOP fcset 0 + maxi.w VF07,VF00,I lq.xyz VF17,10(VI03) + mulq.xyz VF15,VF15,Q sq.xyz VF07,10(VI03) + mul.xyz VF10,VF08,VF06 div Q,VF00w,VF16w + NOP lq.xyz VF07,5(VI03) + mulq.xyz VF13,VF13,Q iaddiu VI01,VI03,0 + add.xyz VF11,VF08,VF05 sq.xyz VF17,7(VI03) + clipw.xyz VF10xyz,VF07w lq.xyz VF17,11(VI03) + mulq.xyz VF10,VF07,Q lq.xyz VF20,8(VI03) + sub.xyz VF12,VF08,VF13 iaddiu VI03,VI01,0x0000000c + mul.xyz VF21,VF13,VF06 div Q,VF00w,VF14w + add.xyz VF08,VF13,VF05 ibeq VI03,VI06,EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI1 + mulq.xyz VF16,VF16,Q lq.w VF08,0(VI00) +; _LNOPT_w=[ ] 47 [45 0] 47 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__PRO1] + mulq.xyz VF19,VF20,Q NOP + NOP NOP + NOP NOP + clipw.xyz VF21xyz,VF07w NOP + mulq.xyz VF17,VF17,Q lq.xyz VF20,0(VI03) + mulq.xyz VF18,VF14,Q NOP + sub.xyz VF13,VF16,VF13 NOP + add.xyz VF07,VF16,VF05 NOP + mulax ACC,VF01,VF20x NOP + madday ACC,VF02,VF20y lq.xyz VF22,6(VI03) + maddaz ACC,VF03,VF20z NOP + maddw VF20,VF04,VF00w iaddiu VI01,VI03,0 + mul.xyz VF21,VF16,VF06 iaddiu VI09,VI04,0 + mulax ACC,VF01,VF22x lq.xyz VF14,9(VI03) + madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) + maddaz ACC,VF03,VF22z div Q,VF00w,VF20w + maddw VF16,VF04,VF00w lq.xyz VF23,7(VI03) + mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) + madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) + mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) + maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) + maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) + mulq.xyz VF22,VF20,Q lq.xyz VF17,11(VI03) + mulax ACC,VF01,VF24x lq.xyz VF23,5(VI03) + madday ACC,VF02,VF24y lq.xyz VF25,2(VI03) + maddaz ACC,VF03,VF24z sq.xyz VF19,9(VI04) + maddw VF24,VF04,VF00w sq.xyz VF10,3(VI04) + ftoi4.xyz VF11,VF11 sq.xyz VF09,4(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF09,7(VI04) + mul.xyz VF19,VF22,VF06 sq.xyz VF09,1(VI04) + clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w + mulw.xyz VF10,VF12,VF08w mfir.w VF11,VI08 + add.xyz VF12,VF18,VF05 lq.xyz VF20,8(VI03) + clipw.xyz VF19xyz,VF07w iaddiu VI03,VI01,0x0000000c + mulq.xyz VF15,VF25,Q fcand VI01,16777215 + opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,10(VI04) + opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq VF11,2(VI04) + ftoi4.xyz VF18,VF08 div Q,VF00w,VF16w + mulq.xyz VF13,VF24,Q mfir.w VF18,VI08 + ftoi4.xyz VF19,VF12 iand VI01,VI01,VI02 + mulq.xyz VF10,VF23,Q fmand VI10,VI07 + add.xyz VF11,VF22,VF05 ior VI01,VI01,VI10 + sub.xyz VF12,VF22,VF13 iaddiu VI01,VI01,0x00007fff + mul.xyz VF21,VF13,VF06 mfir.w VF19,VI01 + add.xyz VF08,VF13,VF05 div Q,VF00w,VF14w + ftoi4.xyz VF22,VF07 ibeq VI03,VI06,EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI0 + mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 +EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ normal1 ] 47 [45 45] 47 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP] + NOP sq VF19,8(VI04) + mulq.xyz VF19,VF20,Q sq VF18,5(VI04) + ftoi4.xyz VF11,VF11 NOP + sub.xyz VF13,VF16,VF13 sq VF22,11(VI04) + mulq.xyz VF17,VF17,Q iaddiu VI04,VI09,0x0000000c + add.xyz VF07,VF16,VF05 lq.xyz VF20,0(VI03) + mulq.xyz VF18,VF14,Q NOP + clipw.xyz VF21xyz,VF07w lq.xyz VF22,6(VI03) + mul.xyz VF21,VF16,VF06 iaddiu VI01,VI03,0 + mulax ACC,VF01,VF20x iaddiu VI09,VI04,0 + madday ACC,VF02,VF20y NOP + maddaz ACC,VF03,VF20z lq.xyz VF14,9(VI03) + maddw VF20,VF04,VF00w NOP + mulax ACC,VF01,VF22x lq.xyz VF23,7(VI03) + madday ACC,VF02,VF22y lq.xyz VF25,10(VI03) + maddaz ACC,VF03,VF22z NOP + maddw VF16,VF04,VF00w div Q,VF00w,VF20w + mulax ACC,VF01,VF14x sq.xyz VF15,0(VI04) + madday ACC,VF02,VF14y lq.xyz VF24,3(VI03) + mul.xyz VF15,VF18,VF06 sq.xyz VF25,7(VI03) + maddaz ACC,VF03,VF14z sq.xyz VF23,10(VI03) + maddw VF14,VF04,VF00w sq.xyz VF17,6(VI04) + mulax ACC,VF01,VF24x lq.xyz VF17,11(VI03) + mulq.xyz VF22,VF20,Q lq.xyz VF23,5(VI03) + madday ACC,VF02,VF24y lq.xyz VF20,8(VI03) + maddaz ACC,VF03,VF24z lq.xyz VF25,2(VI03) + maddw VF24,VF04,VF00w iaddiu VI03,VI01,0x0000000c + mul.xyz VF19,VF22,VF06 sq.xyz VF19,9(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF10,3(VI04) + mulw.xyz VF10,VF12,VF08w sq.xyz VF09,4(VI04) + clipw.xyz VF21xyz,VF07w div Q,VF00w,VF24w + clipw.xyz VF19xyz,VF07w sq.xyz VF09,7(VI04) + add.xyz VF12,VF18,VF05 sq.xyz VF09,1(VI04) + mulq.xyz VF15,VF25,Q mfir.w VF11,VI08 + opmula.xyz ACCxyz,VF10xyz,VF13xyz fcand VI01,16777215 + ftoi4.xyz VF18,VF08 iand VI01,VI01,VI02 + opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,10(VI04) + mulq.xyz VF13,VF24,Q div Q,VF00w,VF16w + mulq.xyz VF10,VF23,Q sq VF11,2(VI04) + add.xyz VF11,VF22,VF05 mfir.w VF18,VI08 + ftoi4.xyz VF19,VF12 fmand VI10,VI07 + sub.xyz VF12,VF22,VF13 ior VI01,VI01,VI10 + mul.xyz VF21,VF13,VF06 iaddiu VI01,VI01,0x00007fff + add.xyz VF08,VF13,VF05 mfir.w VF19,VI01 + NOP div Q,VF00w,VF14w + ftoi4.xyz VF22,VF07 ibne VI03,VI06,EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__MAIN_LOOP + mulq.xyz VF16,VF16,Q mfir.w VF22,VI01 +EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 32 [26 0] 32 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI0] + mulq.xyz VF19,VF20,Q sq VF19,8(VI04) + NOP NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF17,Q mfir.w VF11,VI08 + NOP NOP + mulq.xyz VF18,VF14,Q sq VF18,5(VI04) + NOP NOP + NOP NOP + clipw.xyz VF21xyz,VF07w sq VF22,11(VI04) + sub.xyz VF13,VF16,VF13 iaddiu VI04,VI09,0 + mul.xyz VF15,VF18,VF06 sq.xyz VF15,12(VI04) + mul.xyz VF21,VF16,VF06 sq.xyz VF19,21(VI04) + mulw.xyz VF10,VF12,VF08w sq.xyz VF10,15(VI04) + NOP sq.xyz VF17,18(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF09,16(VI04) + clipw.xyz VF21xyz,VF07w sq.xyz VF09,19(VI04) + opmula.xyz ACCxyz,VF10xyz,VF13xyz sq.xyz VF09,13(VI04) + opmsub.xyz VF00xyz,VF13xyz,VF10xyz sq.xyz VF09,22(VI04) + NOP mfir.w VF18,VI08 + add.xyz VF12,VF18,VF05 fcand VI01,16777215 + ftoi4.xyz VF11,VF11 iand VI02,VI01,VI02 + add.xyz VF07,VF16,VF05 fmand VI07,VI07 + ftoi4.xyz VF18,VF08 ior VI02,VI02,VI07 + ftoi4.xyz VF19,VF12 iaddiu VI02,VI02,0x00007fff + NOP mfir.w VF19,VI02 + ftoi4.xyz VF22,VF07 sq VF11,14(VI04) + NOP mfir.w VF22,VI02 + NOP sq VF18,17(VI04) + NOP sq VF19,20(VI04) + NOP b EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EXIT_POINT + NOP sq VF22,23(VI04) +EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI1: +; _LNOPT_w=[ ] 29 [22 0] 29 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EPI1] + mulq.xyz VF20,VF20,Q NOP + NOP NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF17,Q NOP + mulq.xyz VF14,VF14,Q NOP + ftoi4.xyz VF11,VF11 NOP + sub.xyz VF13,VF16,VF13 NOP + mulw.xyz VF10,VF12,VF08w sq.xyz VF10,3(VI04) + clipw.xyz VF21xyz,VF07w sq.xyz VF15,0(VI04) + mul.xyz VF15,VF14,VF06 sq.xyz VF09,4(VI04) + mul.xyz VF21,VF16,VF06 sq.xyz VF09,7(VI04) + opmula.xyz ACCxyz,VF10xyz,VF13xyz mfir.w VF11,VI08 + opmsub.xyz VF18xyz,VF13xyz,VF10xyz sq.xyz VF20,9(VI04) + clipw.xyz VF15xyz,VF07w sq.xyz VF09,1(VI04) + clipw.xyz VF21xyz,VF07w sq.xyz VF09,10(VI04) + NOP sq VF11,2(VI04) + abs.xyz VF00,VF18 mfir.w VF08,VI08 + add.xyz VF12,VF14,VF05 fmand VI07,VI07 + add.xyz VF07,VF16,VF05 fcand VI01,16777215 + NOP iand VI02,VI01,VI02 + ftoi4.xyz VF08,VF08 ior VI02,VI02,VI07 + ftoi4.xyz VF11,VF12 iaddiu VI02,VI02,0x00007fff + ftoi4.xyz VF07,VF07 mfir.w VF11,VI02 + NOP mfir.w VF07,VI02 + NOP sq.xyz VF17,6(VI04) + NOP sq VF08,5(VI04) + NOP sq VF11,8(VI04) + NOP sq VF07,11(VI04) +EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 8 [8 0] 8 [EXPL_vu1_general_quad_pp4_vcl_xform_loop_lid__EXIT_POINT] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,60(VI00) + NOP lq.xyz VF07,61(VI00) + NOP lq.xyz VF08,66(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_quad_pp4_vcl_9] + maxw.z VF09,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 27 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,67(VI00) + NOP lq.xyz VF13,3(VI03) + NOP lq.xyz VF14,68(VI00) + NOP lq.xyz VF10,69(VI00) + mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) + maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 + NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 + add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) + NOP esadd P,VF14 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 + mul.xyz VF15,VF14,VF18 iadd VI07,VI07,VI05 ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) + NOP lq.xyz VF10,0(VI03) + NOP mr32.xyw VF09,VF15 + NOP iaddiu VI04,VI04,0x00000003 + mul.xyz VF18,VF13,VF18 iaddiu VI06,VI06,0 + NOP ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF18,VF14,VF17 NOP + mul.w VF05,VF05,VF05 NOP + NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF15z NOP + addax.w ACC,VF09,VF09x ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1 + mul.w VF05,VF05,VF05 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF07,VF00,VF09y NOP + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF14,VF17 NOP + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000f6 +EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF07,VF00,VF09y iaddiu VI06,VI06,0x00000003 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x lq.xyz VF18,-11(VI06) + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF19,VF14,VF17 NOP + add.xyz VF18,VF18,VF15 NOP + maddaw.xyz ACC,VF12,VF06w NOP + mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 + mul.xyz VF18,VF13,VF17 sq.xyz VF18,-11(VI06) + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibne VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + addax.w ACC,VF09,VF09x NOP +EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + maxx.z VF17,VF18,VF00x lq.xyz VF18,-8(VI06) + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,-5(VI06) + mul.w VF06,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF17z NOP + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-5(VI06) ; STALL_LATENCY ?2 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,-2(VI06) + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF18,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + adday.z ACC,VF18,VF18y NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maxx.z VF18,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF18z NOP + maddaw.xyz ACC,VF12,VF06w NOP + madd.xyz VF13,VF10,VF05 lq.xyz VF18,238(VI06) + mul.w VF06,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,241(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF18,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF07,VF00,VF09y NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,238(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_quad_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_quad_pp4_vcl_15] + maxw.z VF09,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF10,71(VI00) + NOP lq.xyz VF11,3(VI03) + NOP lq.xyz VF14,72(VI00) + mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 + madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) + NOP xtop VI06 + maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 + maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) + sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 + adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 + maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP sqrt Q,VF16z ; STALL_LATENCY ?1 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) + NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) + mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 + NOP iaddiu VI06,VI06,0 + mul.xyz VF12,VF12,VF07 ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) + sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 + mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 + maddx.z VF21,VF09,VF21x NOP + NOP waitp + addw.x VF21,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF21z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0 + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + NOP move.xyz VF19,VF18 + NOP ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + NOP NOP + NOP NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 NOP + madday.w ACC,VF00,VF21y esadd P,VF18 + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z waitp + maxx.w VF05,VF06,VF00x mfp.w VF06,P + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + addq.y VF21,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 + addw.x VF21,VF00,VF00w NOP + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + maddz.w VF05,VF00,VF15z NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0x000000f6 + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibeq VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 + madday.w ACC,VF00,VF21y NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 esadd P,VF18 + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF20x NOP + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z mfp.w VF06,P + mulq.xyz VF23,VF22,Q lq.xyz VF22,-11(VI06) + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + add.xyz VF22,VF22,VF23 NOP + addw.x VF21,VF00,VF00w NOP + mul.w VF06,VF07,VF07 NOP + addq.y VF21,VF00,Q NOP + mula.xyz ACC,VF20,VF06 sq.xyz VF22,-11(VI06) + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) + mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y + NOP NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF21,VF17,VF20 NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibne VI04,VI07,EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF19,VF20 NOP + maddaw.xyz ACC,VF12,VF07w NOP + madd.xyz VF21,VF10,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF21,Q lq.xyz VF21,-8(VI06) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 + mul.xyz VF21,VF17,VF20 sq.xyz VF21,-8(VI06) ; STALL_LATENCY ?1 + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w + mula.xyz ACC,VF20,VF06 NOP + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF17,VF13,Q lq.xyz VF13,-5(VI06) + mulq.xyz VF12,VF10,Q lq.xyz VF10,-2(VI06) ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 NOP + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,-5(VI06) + NOP b EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF10,-2(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + NOP waitp ; STALL_LATENCY ?1 + mul.w VF07,VF06,VF06 mfp.w VF06,P + mul.xyz VF21,VF17,VF20 NOP + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w + mula.xyz ACC,VF20,VF06 NOP + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF17,VF13,Q lq.xyz VF13,238(VI06) + mulq.xyz VF12,VF10,Q lq.xyz VF10,241(VI06) ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 NOP + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,238(VI06) + NOP b EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF10,241(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) + mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF17y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF13,VF13 + NOP mfp.w VF06,P + mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF17z NOP + mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF17y NOP + maddz.w VF07,VF00,VF17z NOP + mulw.xyz VF11,VF11,VF05w NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF11,VF06 NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF16z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF12,VF10,Q lq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_quad_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_quad_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_quad_pp4_vcl_23] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralQuad_CodeEnd: +; iCount=695 +; register stats: +; 11 VU User integer +; 26 VU User floating point diff --git a/vu1/sce_general_tri_vcl.vsm b/vu1/sce_general_tri_vcl.vsm new file mode 100644 index 00000000..58404d13 --- /dev/null +++ b/vu1/sce_general_tri_vcl.vsm @@ -0,0 +1,689 @@ +; === __LP__ EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === hDown : optimal=35 clid=0 mlid=2 size=(36) +; === dUp : optimal=35 clid=0 mlid=1 size=(36) +; === normal : optimal=35 clid=0 mlid=1 size=(36) +; === another : optimal=35 clid=0 mlid=2 size=(36) +; === normal2 : optimal=35 clid=0 mlid=2 size=(36) +; === __LP__ EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=10 mlid=10 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=3 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) +; === __LP__ EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) +; === __LP__ EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneralTri_CodeStart + .global vsmGeneralTri_CodeEnd +vsmGeneralTri_CodeStart: +__v_vu1_general_tri_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_tri_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ vuta1 ] 48 [48 0] 48 [main_loop_lid] + NOP lq.w VF05,57(VI00) + NOP loi 0x44fff000 + NOP xtop VI05 + NOP iaddiu VI03,VI05,0x00000005 + NOP mr32.z VF05,VF05 + NOP iaddiu VI04,VI05,0x000000ed + NOP ilw.x VI05,0(VI05) + NOP lq.xyz VF09,57(VI00) + NOP lq.xyz VF08,59(VI00) + NOP lq VF06,75(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP ilw.w VI07,0(VI00) + mul.xyz VF09,VF09,VF08 lq.xyz VF08,3(VI03) + NOP iadd VI06,VI03,VI05 + NOP iadd VI06,VI06,VI05 + NOP mtir VI08,VF06x + mulax ACC,VF01,VF08x ior VI08,VI08,VI05 + madday ACC,VF02,VF08y mfir.x VF06,VI08 + maddaz ACC,VF03,VF08z iadd VI06,VI06,VI05 + maddw VF15,VF04,VF00w lq.xyz VF14,0(VI03) + NOP lq.xyz VF07,58(VI00) + NOP sq VF06,-1(VI04) + NOP iaddiu VI08,VI00,0x00007fff + mulax ACC,VF01,VF14x div Q,VF00w,VF15w + madday ACC,VF02,VF14y lq.xyz VF08,6(VI03) + maddaz ACC,VF03,VF14z iaddiu VI08,VI08,0x00000001 + maddw VF14,VF04,VF00w ilw.w VI02,76(VI00) + add.xyz VF09,VF07,VF09 lq.xyz VF06,76(VI00) + mulax ACC,VF01,VF08x lq.xyz VF07,5(VI03) + madday ACC,VF02,VF08y lq.w VF08,0(VI00) + maddaz ACC,VF03,VF08z div Q,VF00w,VF14w + mulq.xyz VF08,VF15,Q fcset 0 + mulq.xyz VF07,VF07,Q iaddiu VI09,VI03,0 + maddw VF12,VF04,VF00w iaddiu VI10,VI04,0 + NOP sq.xyz VF09,1(VI04) + add.xyz VF10,VF08,VF05 lq.xyz VF15,2(VI03) + maxi.w VF07,VF00,I sq.xyz VF07,3(VI04) + mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w + mul.xyz VF13,VF08,VF06 mfir.w VF10,VI08 + ftoi4.xyz VF10,VF10 lq.xyz VF11,8(VI03) + mulq.xyz VF15,VF15,Q sq.xyz VF09,4(VI04) + sub.xyz VF14,VF07,VF08 sq.xyz VF09,7(VI04) + mul.xyz VF16,VF07,VF06 iaddiu VI03,VI09,0 + add.xyz VF10,VF07,VF05 sq VF10,5(VI04) + mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 + mulq.xyz VF15,VF11,Q sq.xyz VF15,0(VI04) + clipw.xyz VF16xyz,VF07w ibeq VI03,VI06,EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 +EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ dUp ] 36 [35 35] 36 [EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP] + ftoi4.xyz VF12,VF10 lq.xyz VF11,3(VI03) + sub.xyz VF10,VF07,VF08 NOP + mul.xyz VF08,VF07,VF06 iaddiu VI04,VI10,0x00000009 + mulw.xyz VF13,VF14,VF08w lq.xyz VF14,0(VI03) + mulax ACC,VF01,VF11x sq.xyz VF15,6(VI09) + madday ACC,VF02,VF11y mfir.w VF12,VI08 + maddaz ACC,VF03,VF11z NOP + maddw VF15,VF04,VF00w NOP + mulax ACC,VF01,VF14x lq.xyz VF11,6(VI03) + madday ACC,VF02,VF14y sq VF12,2(VI09) + maddaz ACC,VF03,VF14z iaddiu VI11,VI03,0 + maddw VF14,VF04,VF00w div Q,VF00w,VF15w + mulax ACC,VF01,VF11x lq.xyz VF16,5(VI03) + madday ACC,VF02,VF11y iaddiu VI10,VI04,0 + maddaz ACC,VF03,VF11z sq.xyz VF09,1(VI04) + maddw VF12,VF04,VF00w lq.xyz VF11,8(VI03) + clipw.xyz VF08xyz,VF07w sq.xyz VF09,4(VI04) + opmula.xyz ACCxyz,VF13xyz,VF10xyz mfir.w VF10,VI08 + mulq.xyz VF08,VF15,Q div Q,VF00w,VF14w + mulq.xyz VF16,VF16,Q lq.xyz VF15,2(VI03) + opmsub.xyz VF00xyz,VF10xyz,VF13xyz fcand VI01,262143 + NOP iand VI03,VI01,VI02 + mul.xyz VF13,VF08,VF06 sq.xyz VF09,7(VI04) + add.xyz VF10,VF08,VF05 sq.xyz VF16,3(VI04) + add.xyz VF17,VF07,VF05 fmand VI01,VI07 + mulq.xyz VF07,VF14,Q div Q,VF00w,VF12w + mulq.xyz VF16,VF15,Q ior VI01,VI03,VI01 + ftoi4.xyz VF10,VF10 iaddiu VI03,VI11,0 + ftoi4.xyz VF15,VF17 iaddiu VI11,VI01,0x00007fff + sub.xyz VF14,VF07,VF08 mfir.w VF15,VI11 + mul.xyz VF16,VF07,VF06 sq.xyz VF16,0(VI04) + add.xyz VF10,VF07,VF05 sq VF10,5(VI04) + mulq.xyz VF07,VF12,Q iaddiu VI03,VI03,0x00000009 + mulq.xyz VF15,VF11,Q sq VF15,8(VI09) + clipw.xyz VF16xyz,VF07w ibne VI03,VI06,EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__MAIN_LOOP + clipw.xyz VF13xyz,VF07w iaddiu VI09,VI04,0 +EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 20 [18 0] 21 [EXPL_vu1_general_tri_pp4_vcl_xform_loop_lid__EPI0] + ftoi4.xyz VF09,VF10 NOP + sub.xyz VF10,VF07,VF08 sq.xyz VF15,6(VI09) + mul.xyz VF08,VF07,VF06 NOP + mulw.xyz VF14,VF14,VF08w mfir.w VF09,VI08 + add.xyz VF07,VF07,VF05 lq.xyz VF06,60(VI00) + clipw.xyz VF08xyz,VF07w lq.xyz VF05,59(VI00) ; STALL_LATENCY ?1 + opmula.xyz ACCxyz,VF14xyz,VF10xyz sq VF09,2(VI09) + opmsub.xyz VF11xyz,VF10xyz,VF14xyz lq.xyz VF08,66(VI00) + ftoi4.xyz VF15,VF07 lq.xyz VF07,61(VI00) + NOP fcand VI01,262143 + NOP iand VI02,VI01,VI02 + abs.xyz VF00,VF11 fmand VI07,VI07 + NOP ior VI02,VI02,VI07 + NOP iaddiu VI02,VI02,0x00007fff + NOP ilw.x VI01,0(VI00) + NOP mfir.w VF15,VI02 + NOP iaddiu VI02,VI00,0x00000001 + NOP ilw.x VI03,0(VI02) + NOP ibeq VI01,VI00,pt_lights_lid + NOP sq VF15,8(VI09) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_tri_pp4_vcl_9] + maxw.z VF09,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 27 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,67(VI00) + NOP lq.xyz VF13,3(VI03) + NOP lq.xyz VF14,68(VI00) + NOP lq.xyz VF10,69(VI00) + mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) + maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 + NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 + add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) + NOP esadd P,VF14 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 + mul.xyz VF15,VF14,VF18 iadd VI07,VI07,VI05 ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) + NOP lq.xyz VF10,0(VI03) + NOP mr32.xyw VF09,VF15 + NOP iaddiu VI04,VI04,0x00000003 + mul.xyz VF18,VF13,VF18 iaddiu VI06,VI06,0 + NOP ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF18,VF14,VF17 NOP + mul.w VF05,VF05,VF05 NOP + NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF15z NOP + addax.w ACC,VF09,VF09x ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1 + mul.w VF05,VF05,VF05 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF07,VF00,VF09y NOP + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF14,VF17 NOP + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000f6 +EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF07,VF00,VF09y iaddiu VI06,VI06,0x00000003 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x lq.xyz VF18,-11(VI06) + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF19,VF14,VF17 NOP + add.xyz VF18,VF18,VF15 NOP + maddaw.xyz ACC,VF12,VF06w NOP + mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 + mul.xyz VF18,VF13,VF17 sq.xyz VF18,-11(VI06) + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibne VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + addax.w ACC,VF09,VF09x NOP +EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + maxx.z VF17,VF18,VF00x lq.xyz VF18,-8(VI06) + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,-5(VI06) + mul.w VF06,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF17z NOP + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-5(VI06) ; STALL_LATENCY ?2 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,-2(VI06) + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF18,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + adday.z ACC,VF18,VF18y NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maxx.z VF18,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF18z NOP + maddaw.xyz ACC,VF12,VF06w NOP + madd.xyz VF13,VF10,VF05 lq.xyz VF18,238(VI06) + mul.w VF06,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,241(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF18,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF07,VF00,VF09y NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,238(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_tri_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_tri_pp4_vcl_15] + maxw.z VF09,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF10,71(VI00) + NOP lq.xyz VF11,3(VI03) + NOP lq.xyz VF14,72(VI00) + mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 + madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) + NOP xtop VI06 + maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 + maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) + sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 + adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 + maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP sqrt Q,VF16z ; STALL_LATENCY ?1 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) + NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) + mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 + NOP iaddiu VI06,VI06,0 + mul.xyz VF12,VF12,VF07 ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) + sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 + mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 + maddx.z VF21,VF09,VF21x NOP + NOP waitp + addw.x VF21,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF21z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0 + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + NOP move.xyz VF19,VF18 + NOP ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + NOP NOP + NOP NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 NOP + madday.w ACC,VF00,VF21y esadd P,VF18 + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z waitp + maxx.w VF05,VF06,VF00x mfp.w VF06,P + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + addq.y VF21,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 + addw.x VF21,VF00,VF00w NOP + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + maddz.w VF05,VF00,VF15z NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0x000000f6 + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibeq VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 + madday.w ACC,VF00,VF21y NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 esadd P,VF18 + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF20x NOP + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z mfp.w VF06,P + mulq.xyz VF23,VF22,Q lq.xyz VF22,-11(VI06) + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + add.xyz VF22,VF22,VF23 NOP + addw.x VF21,VF00,VF00w NOP + mul.w VF06,VF07,VF07 NOP + addq.y VF21,VF00,Q NOP + mula.xyz ACC,VF20,VF06 sq.xyz VF22,-11(VI06) + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) + mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y + NOP NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF21,VF17,VF20 NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibne VI04,VI07,EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF19,VF20 NOP + maddaw.xyz ACC,VF12,VF07w NOP + madd.xyz VF21,VF10,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF21,Q lq.xyz VF21,-8(VI06) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 + mul.xyz VF21,VF17,VF20 sq.xyz VF21,-8(VI06) ; STALL_LATENCY ?1 + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w + mula.xyz ACC,VF20,VF06 NOP + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF17,VF13,Q lq.xyz VF13,-5(VI06) + mulq.xyz VF12,VF10,Q lq.xyz VF10,-2(VI06) ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 NOP + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,-5(VI06) + NOP b EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF10,-2(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + NOP waitp ; STALL_LATENCY ?1 + mul.w VF07,VF06,VF06 mfp.w VF06,P + mul.xyz VF21,VF17,VF20 NOP + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w + mula.xyz ACC,VF20,VF06 NOP + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF17,VF13,Q lq.xyz VF13,238(VI06) + mulq.xyz VF12,VF10,Q lq.xyz VF10,241(VI06) ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 NOP + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,238(VI06) + NOP b EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF10,241(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) + mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF17y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF13,VF13 + NOP mfp.w VF06,P + mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF17z NOP + mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF17y NOP + maddz.w VF07,VF00,VF17z NOP + mulw.xyz VF11,VF11,VF05w NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF11,VF06 NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF16z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF12,VF10,Q lq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_tri_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_tri_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_tri_pp4_vcl_23] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneralTri_CodeEnd: +; iCount=591 +; register stats: +; 12 VU User integer +; 24 VU User floating point diff --git a/vu1/sce_general_vcl.vsm b/vu1/sce_general_vcl.vsm new file mode 100644 index 00000000..3a970928 --- /dev/null +++ b/vu1/sce_general_vcl.vsm @@ -0,0 +1,771 @@ +; === __LP__ EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=22 clid=0 mlid=2 size=(22) +; === normal1 : optimal=22 clid=0 mlid=2 size=(22) +; === vuta : optimal=22 clid=0 mlid=2 size=(22) +; === dUp : optimal=22 clid=0 mlid=2 size=(22) +; === vuta1 : optimal=22 clid=0 mlid=2 size=(22) +; === __LP__ EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=10 mlid=10 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=3 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=4 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) +; === __LP__ EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) +; === __LP__ EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP: +; === dumb : optimal=4 clid=3 mlid=3 size=(5) +; === ldumb : optimal=4 clid=0 mlid=2 size=(5) +; === normal1 : optimal=4 clid=0 mlid=2 size=(5) +; === hDown : optimal=4 clid=0 mlid=2 size=(5) +; === vuta : optimal=4 clid=0 mlid=2 size=(5) +; === dUp : optimal=4 clid=0 mlid=2 size=(5) +; === normal : optimal=4 clid=0 mlid=2 size=(5) +; === another : optimal=4 clid=0 mlid=2 size=(5) +; === vuta1 : optimal=4 clid=0 mlid=2 size=(5) +; === normal2 : optimal=4 clid=0 mlid=2 size=(5) +; === dumb2 : optimal=4 clid=3 mlid=3 size=(5) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmGeneral_CodeStart + .global vsmGeneral_CodeEnd +vsmGeneral_CodeStart: +__v_vu1_general_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_general_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 26 [26 0] 26 [main_loop_lid] + NOP loi 0x44fff000 + NOP lq.w VF06,57(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI01 + NOP iaddiu VI03,VI01,0x00000005 + NOP mr32.z VF05,VF06 + NOP iaddiu VI04,VI01,0 + NOP ilw.x VI05,0(VI01) + NOP fcset 0 + NOP lq VF08,75(VI00) + NOP lq.xyz VF06,76(VI00) + NOP iadd VI06,VI03,VI05 + NOP iadd VI06,VI06,VI05 + NOP mtir VI07,VF08x + NOP ior VI07,VI07,VI05 + NOP mfir.x VF08,VI07 + NOP iaddiu VI07,VI01,0x00000001 + NOP ilw.w VI02,76(VI00) + NOP iadd VI06,VI06,VI05 + NOP sq VF08,236(VI04) + NOP iaddiu VI01,VI01,0x00000005 + NOP iaddiu VI08,VI07,0x00000004 + NOP iaddiu VI09,VI00,0x000003ff + NOP iaddiu VI10,VI00,0x00000800 + NOP iaddiu VI11,VI00,0x00000400 + maxi.w VF07,VF00,I iaddiu VI12,VI00,0x00000020 +adcLoop_lid: +; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] + NOP lq VF08,0(VI07) + ftoi0 VF08,VF08 NOP ; STALL_LATENCY ?3 + NOP mtir VI13,VF08x ; STALL_LATENCY ?3 + NOP iand VI14,VI13,VI11 + NOP NOP + NOP ibeq VI14,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pp4_vcl_8] + NOP iand VI14,VI13,VI10 + NOP iand VI13,VI13,VI09 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP isw.w VI14,3(VI13) + NOP mtir VI14,VF08y + NOP iand VI13,VI14,VI11 + NOP NOP + NOP ibeq VI13,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pp4_vcl_9] + NOP iand VI13,VI14,VI10 + NOP iand VI14,VI14,VI09 + NOP iadd VI14,VI14,VI01 + NOP isw.w VI12,0(VI14) + NOP isw.w VI13,3(VI14) + NOP mtir VI13,VF08z + NOP iand VI14,VI13,VI11 + NOP NOP + NOP ibeq VI14,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ ldumb ] 10 [8 0] 10 [__v_vu1_general_pp4_vcl_10] + NOP iand VI14,VI13,VI10 + NOP iand VI13,VI13,VI09 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP isw.w VI14,3(VI13) + NOP mtir VI14,VF08w + NOP iand VI13,VI14,VI11 + NOP NOP + NOP ibeq VI13,VI11,adcLoop_done_lid + NOP NOP +; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_general_pp4_vcl_11] + NOP iand VI13,VI14,VI09 + NOP iaddiu VI07,VI07,0x00000001 + NOP iand VI14,VI14,VI10 + NOP iadd VI13,VI13,VI01 + NOP isw.w VI12,0(VI13) + NOP ibne VI07,VI08,adcLoop_lid + NOP isw.w VI14,3(VI13) +adcLoop_done_lid: +; _LNOPT_w=[ normal2 ] 13 [19 0] 21 [adcLoop_done_lid] + NOP lq.xyz VF12,0(VI03) + mulax ACC,VF01,VF12x iaddiu VI07,VI00,0x00000020 ; STALL_LATENCY ?3 + madday ACC,VF02,VF12y lq.xyz VF10,57(VI00) + maddaz ACC,VF03,VF12z lq.xyz VF11,59(VI00) + maddw VF12,VF04,VF00w lq.w VF05,0(VI00) + mul.xyz VF10,VF10,VF11 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF12w + add.xyz VF07,VF07,VF10 ilw.w VI08,0(VI00) ; STALL_LATENCY ?2 + NOP iaddiu VI03,VI03,0x00000003 + NOP lq.xyz VF11,-1(VI03) + mulq.xyz VF12,VF12,Q sq.xyz VF07,238(VI04) ; STALL_LATENCY ?1 + max.xyz VF09,VF00,VF00 ibeq VI03,VI06,EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI1 + max.xyz VF08,VF00,VF00 iaddiu VI04,VI04,0 +; _LNOPT_w=[ ] 21 [19 0] 22 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__PRO1] + NOP NOP + add.xyz VF10,VF12,VF05 lq.xyz VF15,0(VI03) + sub.xyz VF13,VF09,VF12 iaddiu VI04,VI04,0x000000f3 + max.xyz VF09,VF12,VF12 NOP + mul.xyz VF14,VF12,VF06 NOP + mulax ACC,VF01,VF15x NOP + madday ACC,VF02,VF15y NOP + maddaz ACC,VF03,VF15z ilw.w VI09,-3(VI03) + maddw VF12,VF04,VF00w isub VI01,VI08,VI07 + mulq.xyz VF11,VF11,Q sq.xyz VF07,-2(VI04) + opmula.xyz ACCxyz,VF13xyz,VF08xyz iaddiu VI03,VI03,0x00000003 + opmsub.xyz VF16xyz,VF08xyz,VF13xyz iand VI10,VI09,VI08 + NOP div Q,VF00w,VF12w + NOP sq.xyz VF11,-6(VI04) + abs.xyz VF00,VF16 fmand VI11,VI08 ; STALL_LATENCY ?1 + NOP lq.xyz VF11,-1(VI03) + clipw.xyz VF14xyz,VF07w isub VI11,VI11,VI07 + NOP ior VI07,VI01,VI10 + mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 + NOP ibeq VI03,VI06,EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI0 + mulw.xyz VF08,VF13,VF05w fcand VI01,262143 +EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 22 [22 22] 22 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP] + ftoi4.xyz VF14,VF10 iand VI01,VI01,VI02 + add.xyz VF10,VF12,VF05 lq.xyz VF16,0(VI03) + sub.xyz VF13,VF09,VF12 ior VI10,VI01,VI10 + max.xyz VF09,VF12,VF12 ior VI10,VI10,VI09 + mul.xyz VF15,VF12,VF06 ilw.w VI09,-3(VI03) + mulax ACC,VF01,VF16x iaddiu VI10,VI10,0x00007fff + madday ACC,VF02,VF16y mfir.w VF14,VI10 + maddaz ACC,VF03,VF16z isub VI01,VI08,VI07 + maddw VF12,VF04,VF00w iand VI10,VI09,VI08 + mulq.xyz VF11,VF11,Q sq.xyz VF07,1(VI04) + opmula.xyz ACCxyz,VF13xyz,VF08xyz sq VF14,-4(VI04) + opmsub.xyz VF00xyz,VF08xyz,VF13xyz iaddiu VI03,VI03,0x00000003 + mulw.xyz VF08,VF13,VF05w div Q,VF00w,VF12w + clipw.xyz VF15xyz,VF07w sq.xyz VF11,-3(VI04) + NOP iaddiu VI04,VI04,0x00000003 + NOP fmand VI11,VI08 + NOP lq.xyz VF11,-1(VI03) + NOP isub VI11,VI11,VI07 + NOP ior VI07,VI01,VI10 + mulq.xyz VF12,VF12,Q iand VI10,VI11,VI08 + NOP ibne VI03,VI06,EXPL_vu1_general_pp4_vcl_xform_loop_lid__MAIN_LOOP + NOP fcand VI01,262143 +EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 21 [20 0] 23 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + sub.xyz VF09,VF09,VF12 NOP + mul.xyz VF07,VF12,VF06 iand VI01,VI01,VI02 + opmula.xyz ACCxyz,VF09xyz,VF08xyz ior VI10,VI01,VI10 ; STALL_LATENCY ?2 + opmsub.xyz VF14xyz,VF08xyz,VF09xyz ior VI10,VI10,VI09 + NOP ilw.w VI09,-3(VI03) + NOP iaddiu VI10,VI10,0x00007fff + clipw.xyz VF07xyz,VF07w mfir.w VF13,VI10 + abs.xyz VF00,VF14 fmand VI10,VI08 + NOP isub VI07,VI10,VI07 + ftoi4.xyz VF13,VF10 iand VI10,VI07,VI08 + add.xyz VF10,VF12,VF05 fcand VI01,262143 + NOP iand VI01,VI01,VI02 + mulq.xyz VF11,VF11,Q ior VI10,VI01,VI10 + NOP ior VI10,VI10,VI09 + ftoi4.xyz VF10,VF10 iaddiu VI10,VI10,0x00007fff + NOP mfir.w VF10,VI10 + NOP sq.xyz VF11,-3(VI04) + NOP sq VF13,-4(VI04) + NOP b EXPL_vu1_general_pp4_vcl_xform_loop_lid__EXIT_POINT + NOP sq VF10,-1(VI04) +EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI1: +; _LNOPT_w=[ ] 18 [20 0] 24 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__EPI1] + NOP NOP + NOP NOP + sub.xyz VF09,VF09,VF12 NOP + mul.xyz VF06,VF12,VF06 NOP + opmula.xyz ACCxyz,VF09xyz,VF08xyz NOP ; STALL_LATENCY ?2 + opmsub.xyz VF10xyz,VF08xyz,VF09xyz NOP + clipw.xyz VF06xyz,VF07w ilw.w VI03,-3(VI03) + abs.xyz VF00,VF10 fmand VI01,VI08 ; STALL_LATENCY ?2 + NOP isub VI07,VI01,VI07 + NOP iand VI08,VI07,VI08 + add.xyz VF07,VF12,VF05 fcand VI01,262143 + NOP iand VI02,VI01,VI02 + NOP ior VI02,VI02,VI08 + mulq.xyz VF11,VF11,Q ior VI03,VI02,VI03 + ftoi4.xyz VF07,VF07 iaddiu VI03,VI03,0x00007fff + NOP mfir.w VF07,VI03 + NOP sq.xyz VF11,237(VI04) ; STALL_LATENCY ?1 + NOP sq VF07,239(VI04) ; STALL_LATENCY ?1 +EXPL_vu1_general_pp4_vcl_xform_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 8 [8 0] 8 [EXPL_vu1_general_pp4_vcl_xform_loop_lid__EXIT_POINT] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,60(VI00) + NOP lq.xyz VF07,61(VI00) + NOP lq.xyz VF08,66(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pp4_vcl_16] + maxw.z VF09,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 27 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,67(VI00) + NOP lq.xyz VF13,3(VI03) + NOP lq.xyz VF14,68(VI00) + NOP lq.xyz VF10,69(VI00) + mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) + maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 + NOP iadd VI07,VI04,VI05 ; STALL_LATENCY ?2 + add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) + NOP esadd P,VF14 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 + mul.xyz VF15,VF14,VF18 iadd VI07,VI07,VI05 ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) + NOP lq.xyz VF10,0(VI03) + NOP mr32.xyw VF09,VF15 + NOP iaddiu VI04,VI04,0x00000003 + mul.xyz VF18,VF13,VF18 iaddiu VI06,VI06,0 + NOP ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF18,VF14,VF17 NOP + mul.w VF05,VF05,VF05 NOP + NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF15z NOP + addax.w ACC,VF09,VF09x ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI1 + mul.w VF05,VF05,VF05 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF07,VF00,VF09y NOP + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF14,VF17 NOP + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI0 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000f6 +EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF07,VF00,VF09y iaddiu VI06,VI06,0x00000003 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x lq.xyz VF18,-11(VI06) + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF19,VF14,VF17 NOP + add.xyz VF18,VF18,VF15 NOP + maddaw.xyz ACC,VF12,VF06w NOP + mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 + mul.xyz VF18,VF13,VF17 sq.xyz VF18,-11(VI06) + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibne VI04,VI07,EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + addax.w ACC,VF09,VF09x NOP +EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + maxx.z VF17,VF18,VF00x lq.xyz VF18,-8(VI06) + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-8(VI06) ; STALL_LATENCY ?1 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,-5(VI06) + mul.w VF06,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF17z NOP + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,-5(VI06) ; STALL_LATENCY ?2 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,-2(VI06) + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF18,-2(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + adday.z ACC,VF18,VF18y NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maxx.z VF18,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF18z NOP + maddaw.xyz ACC,VF12,VF06w NOP + madd.xyz VF13,VF10,VF05 lq.xyz VF18,238(VI06) + mul.w VF06,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,241(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF18,241(VI06) ; STALL_LATENCY ?2 +EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF07,VF00,VF09y NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,238(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF18,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_general_pp4_vcl_22] + maxw.z VF09,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF10,71(VI00) + NOP lq.xyz VF11,3(VI03) + NOP lq.xyz VF14,72(VI00) + mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 + madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) + NOP xtop VI06 + maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 + maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) + sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 + adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 + maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI07,VI07,VI05 + NOP sqrt Q,VF16z ; STALL_LATENCY ?1 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) + NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) + mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 + NOP iaddiu VI06,VI06,0 + mul.xyz VF12,VF12,VF07 ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0 +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) + sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 + mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 + maddx.z VF21,VF09,VF21x NOP + NOP waitp + addw.x VF21,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF21z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0 + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + NOP move.xyz VF19,VF18 + NOP ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + NOP NOP + NOP NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 NOP + madday.w ACC,VF00,VF21y esadd P,VF18 + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z waitp + maxx.w VF05,VF06,VF00x mfp.w VF06,P + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + addq.y VF21,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 + addw.x VF21,VF00,VF00w NOP + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + maddz.w VF05,VF00,VF15z NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 iaddiu VI06,VI06,0x000000f6 + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibeq VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + mul.w VF07,VF07,VF07 iaddiu VI06,VI06,0x00000003 + madday.w ACC,VF00,VF21y NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 esadd P,VF18 + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF20x NOP + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z mfp.w VF06,P + mulq.xyz VF23,VF22,Q lq.xyz VF22,-11(VI06) + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + add.xyz VF22,VF22,VF23 NOP + addw.x VF21,VF00,VF00w NOP + mul.w VF06,VF07,VF07 NOP + addq.y VF21,VF00,Q NOP + mula.xyz ACC,VF20,VF06 sq.xyz VF22,-11(VI06) + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) + mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y + NOP NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF21,VF17,VF20 NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibne VI04,VI07,EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF19,VF20 NOP + maddaw.xyz ACC,VF12,VF07w NOP + madd.xyz VF21,VF10,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF21,Q lq.xyz VF21,-8(VI06) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 + mul.xyz VF21,VF17,VF20 sq.xyz VF21,-8(VI06) ; STALL_LATENCY ?1 + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w + mula.xyz ACC,VF20,VF06 NOP + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF17,VF13,Q lq.xyz VF13,-5(VI06) + mulq.xyz VF12,VF10,Q lq.xyz VF10,-2(VI06) ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 NOP + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,-5(VI06) + NOP b EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF10,-2(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + NOP waitp ; STALL_LATENCY ?1 + mul.w VF07,VF06,VF06 mfp.w VF06,P + mul.xyz VF21,VF17,VF20 NOP + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 div Q,VF00w,VF05w + mula.xyz ACC,VF20,VF06 NOP + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF17,VF13,Q lq.xyz VF13,238(VI06) + mulq.xyz VF12,VF10,Q lq.xyz VF10,241(VI06) ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 NOP + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?2 + NOP sq.xyz VF11,238(VI06) + NOP b EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sq.xyz VF10,241(VI06) ; STALL_LATENCY ?1 +EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) + mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF17y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF13,VF13 + NOP mfp.w VF06,P + mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF17z NOP + mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF17y NOP + maddz.w VF07,VF00,VF17z NOP + mulw.xyz VF11,VF11,VF05w NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF11,VF06 NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF16z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF12,VF10,Q lq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP sq.xyz VF10,238(VI06) ; STALL_LATENCY ?3 +EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_general_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lid: +; _LNOPT_w=[ another ] 12 [13 0] 13 [done_lid] + NOP lq.w VF05,60(VI00) + NOP xtop VI01 + NOP iaddiu VI02,VI01,0x000000ed + NOP loi 0x43000000 + muli.w VF05,VF05,I iadd VI03,VI02,VI05 + NOP iadd VI03,VI03,VI05 + NOP loi 0x437f0000 + NOP lq.xyz VF05,1(VI02) + minii.w VF05,VF05,I iadd VI03,VI03,VI05 + NOP iaddiu VI02,VI02,0x00000003 + minii.xyz VF06,VF05,I ibeq VI02,VI03,EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI1 ; STALL_LATENCY ?1 + ftoi0.w VF05,VF05 NOP +; _LNOPT_w=[ ] 5 [5 0] 5 [EXPL_vu1_general_pp4_vcl_final_loop_lid__PRO1] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP NOP + ftoi0.xyz VF05,VF06 ibeq VI02,VI03,EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI0 + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 5 [5 4] 5 [EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP] + NOP lq.xyz VF07,1(VI02) + NOP iaddiu VI02,VI02,0x00000003 + NOP sq VF05,-8(VI02) + ftoi0.xyz VF05,VF06 ibne VI02,VI03,EXPL_vu1_general_pp4_vcl_final_loop_lid__MAIN_LOOP + minii.xyz VF06,VF07,I NOP +EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI0: +; _LNOPT_w=[ ] 6 [5 0] 8 [EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 sq VF05,-5(VI02) + NOP b EXPL_vu1_general_pp4_vcl_final_loop_lid__EXIT_POINT + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?2 +EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI1: +; _LNOPT_w=[ ] 4 [5 0] 7 [EXPL_vu1_general_pp4_vcl_final_loop_lid__EPI1] + NOP NOP + NOP NOP + ftoi0.xyz VF05,VF06 NOP + NOP sq VF05,-2(VI02) ; STALL_LATENCY ?3 +EXPL_vu1_general_pp4_vcl_final_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_general_pp4_vcl_final_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_general_pp4_vcl_30] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmGeneral_CodeEnd: +; iCount=660 +; register stats: +; 15 VU User integer +; 24 VU User floating point diff --git a/vu1/sce_indexed_vcl.vsm b/vu1/sce_indexed_vcl.vsm new file mode 100644 index 00000000..ba7a6c8d --- /dev/null +++ b/vu1/sce_indexed_vcl.vsm @@ -0,0 +1,718 @@ +; === __LP__ EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=10 mlid=10 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=3 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=5 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) +; === __LP__ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === hDown : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === another : optimal=34 clid=0 mlid=4 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) +; === __LP__ EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=38 clid=0 mlid=2 size=(38) +; === vuta : optimal=38 clid=0 mlid=2 size=(38) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmIndexed_CodeStart + .global vsmIndexed_CodeEnd +vsmIndexed_CodeStart: +__v_vu1_indexed_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [main_loop_lid] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,60(VI00) + NOP lq.xyz VF07,61(VI00) + NOP lq.xyz VF08,66(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_7] + maxw.z VF09,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 26 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,67(VI00) + NOP lq.xyz VF13,3(VI03) + NOP lq.xyz VF14,68(VI00) + NOP lq.xyz VF10,69(VI00) + mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) + maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 + add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 + NOP esadd P,VF14 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 + mul.xyz VF15,VF14,VF18 iadd VI07,VI04,VI05 ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + NOP iadd VI05,VI07,VI05 + mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) + NOP mr32.xyw VF09,VF15 + NOP iaddiu VI04,VI04,0x00000003 + mul.xyz VF18,VF13,VF18 lq.xyz VF10,0(VI03) + NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000ac +; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF18,VF14,VF17 NOP + mul.w VF05,VF05,VF05 NOP + NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF15z NOP + addax.w ACC,VF09,VF09x ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1 + mul.w VF05,VF05,VF05 NOP +; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF07,VF00,VF09y NOP + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF14,VF17 NOP + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0 + addax.w ACC,VF09,VF09x NOP +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF07,VF00,VF09y NOP + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x lq.xyz VF18,0(VI06) + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF19,VF14,VF17 NOP + add.xyz VF18,VF18,VF15 NOP + maddaw.xyz ACC,VF12,VF06w NOP + mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 + mul.xyz VF18,VF13,VF17 sqi.xyz VF18,(VI06++) + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + addax.w ACC,VF09,VF09x NOP +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + maxx.z VF17,VF18,VF00x lq.xyz VF18,0(VI06) + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?1 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) + mul.w VF06,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF17z NOP + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + adday.z ACC,VF18,VF18y NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maxx.z VF18,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF18z NOP + maddaw.xyz ACC,VF12,VF06w NOP + madd.xyz VF13,VF10,VF05 lq.xyz VF18,0(VI06) + mul.w VF06,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF07,VF00,VF09y NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP ibeq VI01,VI00,done_lighting_lid ; STALL_LATENCY ?2 + NOP ilw.y VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_13] + maxw.z VF09,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF10,71(VI00) + NOP lq.xyz VF11,3(VI03) + NOP lq.xyz VF14,72(VI00) + mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 + madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) + NOP xtop VI06 + maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 + maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) + sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 + mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 + adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 + maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 + NOP iadd VI07,VI07,VI05 + NOP iadd VI05,VI07,VI05 + NOP sqrt Q,VF16z ; STALL_LATENCY ?1 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) + NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) + mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 + NOP NOP + mul.xyz VF12,VF12,VF07 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0x000000ac +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) + sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 + mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 + maddx.z VF21,VF09,VF21x NOP + NOP waitp + addw.x VF21,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF21z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 NOP + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + NOP move.xyz VF19,VF18 + NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + NOP NOP + NOP NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 NOP + madday.w ACC,VF00,VF21y esadd P,VF18 + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z waitp + maxx.w VF05,VF06,VF00x mfp.w VF06,P + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + addq.y VF21,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 + addw.x VF21,VF00,VF00w NOP + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + maddz.w VF05,VF00,VF15z NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 NOP + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF21y NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 esadd P,VF18 + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF20x NOP + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z mfp.w VF06,P + mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI06) + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + add.xyz VF22,VF22,VF23 NOP + addw.x VF21,VF00,VF00w NOP + mul.w VF06,VF07,VF07 NOP + addq.y VF21,VF00,Q NOP + mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI06++) + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) + mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y + NOP NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF21,VF17,VF20 NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF19,VF20 NOP + maddaw.xyz ACC,VF12,VF07w NOP + madd.xyz VF21,VF10,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI06) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 + mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI06++) ; STALL_LATENCY ?1 + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 NOP + mula.xyz ACC,VF20,VF06 NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulq.xyz VF17,VF13,Q waitq + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + NOP sqi.xyz VF11,(VI06++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + NOP waitp ; STALL_LATENCY ?1 + mul.w VF07,VF06,VF06 mfp.w VF06,P + mul.xyz VF21,VF17,VF20 NOP + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 NOP + mula.xyz ACC,VF20,VF06 NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulq.xyz VF17,VF13,Q waitq + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + NOP sqi.xyz VF11,(VI06++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) + mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF17y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF13,VF13 + NOP mfp.w VF06,P + mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF17z NOP + mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF17y NOP + maddz.w VF07,VF00,VF17z NOP + mulw.xyz VF11,VF11,VF05w NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF11,VF06 NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF16z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?3 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?3 +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,pt_light_loop_lid + NOP ilw.y VI03,0(VI02) +done_lighting_lid: +; _LNOPT_w=[ vuta ] 51 [50 0] 58 [done_lighting_lid] + NOP lq.w VF05,60(VI00) + NOP loi 0x43000000 + muli.w VF10,VF05,I xtop VI04 ; STALL_LATENCY ?2 + NOP ilw.y VI08,0(VI04) + NOP loi 0x437f0000 + maxi.w VF12,VF00,I ilw.z VI03,0(VI04) + minii.w VF10,VF10,I loi 0x437f0000 + NOP lq.xyz VF05,75(VI00) + NOP iaddiu VI06,VI04,0x00000005 + maxi.y VF10,VF00,I loi 0x40400000 + NOP mtir VI02,VF05x ; STALL_LATENCY ?1 + NOP ior VI03,VI02,VI03 + NOP mfir.x VF05,VI03 + NOP iaddiu VI03,VI00,0x0000004e + NOP mfir.w VF05,VI03 + maxi.z VF09,VF00,I loi 0x437d0000 + NOP iadd VI08,VI06,VI08 + NOP ilw.w VI09,0(VI06) + NOP sq VF05,77(VI00) + NOP lqi.w VF05,(VI06++) + NOP iaddiu VI05,VI04,0x000000ac + NOP iaddiu VI04,VI04,0x00000005 + NOP iaddiu VI07,VI00,0x000000ff + NOP iand VI09,VI09,VI07 + maxi.w VF08,VF00,I iadd VI01,VI09,VI09 + addy.w VF06,VF05,VF10y iadd VI01,VI01,VI09 + mulz.w VF05,VF05,VF09z iadd VI10,VI01,VI04 + NOP lq.xyz VF11,0(VI10) + add.w VF05,VF05,VF08 lq.w VF09,57(VI00) ; STALL_LATENCY ?2 + mulax ACC,VF01,VF11x loi 0x45000000 + madday ACC,VF02,VF11y NOP + maddaz ACC,VF03,VF11z iadd VI09,VI09,VI05 + maddw VF13,VF04,VF00w lq.xyz VF07,0(VI09) + NOP mtir VI11,VF05w + NOP div Q,VF00w,VF13w ; STALL_LATENCY ?2 + NOP iadd VI09,VI11,VI04 + NOP lq.xyz VF12,0(VI09) + maxi.w VF07,VF00,I mr32.z VF05,VF09 + miniw.xyz VF11,VF07,VF12w loi 0x44fff000 + addi.xy VF05,VF00,I iaddiu VI02,VI00,0x0000004b + mulax ACC,VF01,VF12x xgkick VI02 + mulq.xyz VF08,VF13,Q ilw.w VI02,76(VI00) + madday ACC,VF02,VF12y lq.xyz VF06,76(VI00) + maddaz ACC,VF03,VF12z fcset 0 + maddw VF16,VF04,VF00w mtir VI01,VF06w + add.xyz VF13,VF08,VF05 lq.xyz VF14,2(VI10) + mul.xyz VF08,VF08,VF06 iadd VI01,VI01,VI05 + NOP lq.xyz VF12,0(VI01) + ftoi0.w VF11,VF10 div Q,VF00w,VF16w + ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 + clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI09) +; _LNOPT_w=[ ] 38 [37 0] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__PRO1] + mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI06) + NOP NOP + NOP lqi.w VF05,(VI06++) + NOP sq VF11,1(VI03) + mulq.xyz VF11,VF16,Q fcand VI01,262143 + NOP iand VI11,VI11,VI07 + NOP iadd VI10,VI11,VI11 + addy.w VF06,VF05,VF10y iadd VI10,VI10,VI11 + mulz.w VF05,VF05,VF09z iadd VI12,VI10,VI04 + mul.xyz VF07,VF11,VF06 iadd VI11,VI11,VI05 + add.xyz VF14,VF11,VF05 lq.xyz VF11,0(VI12) + ftoi4.xyz VF16,VF13 iand VI09,VI01,VI02 + add.w VF05,VF05,VF08 ior VI09,VI09,VI00 + clipw.xyz VF07xyz,VF07w iaddiu VI01,VI09,0x00007fff + mulax ACC,VF01,VF11x mfir.w VF16,VI01 + madday ACC,VF02,VF11y mtir VI10,VF06w + maddaz ACC,VF03,VF11z mtir VI09,VF05w + maddw VF13,VF04,VF00w lq.xyz VF07,0(VI11) + mulq.xyz VF16,VF08,Q sq VF16,2(VI03) + NOP iadd VI11,VI09,VI04 + NOP sq.xyz VF15,0(VI03) + NOP div Q,VF00w,VF13w + miniw.xyz VF11,VF07,VF12w iaddiu VI09,VI03,0 + miniw.xyz VF07,VF12,VF12w lq.xyz VF12,0(VI11) + NOP fcand VI01,262143 + ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) + NOP iand VI03,VI01,VI02 + mulax ACC,VF01,VF12x ior VI03,VI03,VI00 + mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI03 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) + add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 + mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI09) + NOP div Q,VF00w,VF16w + ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP] + NOP ilw.w VI11,0(VI06) + NOP lqi.w VF05,(VI06++) + NOP sq VF11,1(VI03) + mulq.xyz VF15,VF14,Q fcand VI01,262143 + mulq.xyz VF11,VF16,Q iand VI11,VI11,VI07 + addy.w VF06,VF05,VF10y iadd VI10,VI11,VI11 + mulz.w VF05,VF05,VF09z iadd VI10,VI10,VI11 + NOP iadd VI12,VI10,VI04 + add.xyz VF14,VF11,VF05 iadd VI11,VI11,VI05 + mul.xyz VF18,VF11,VF06 mtir VI10,VF06w + add.w VF05,VF05,VF08 lq.xyz VF17,0(VI12) + ftoi4.xyz VF16,VF13 iand VI01,VI01,VI02 + ftoi0.xyz VF11,VF07 ior VI01,VI01,VI00 + clipw.xyz VF18xyz,VF07w iaddiu VI13,VI01,0x00007fff + mulax ACC,VF01,VF17x mtir VI01,VF05w + madday ACC,VF02,VF17y mfir.w VF16,VI13 + maddaz ACC,VF03,VF17z lq.xyz VF07,0(VI11) + maddw VF13,VF04,VF00w iadd VI11,VI01,VI04 + NOP sq.xyz VF15,0(VI03) + NOP sq VF16,2(VI03) + miniw.xyz VF11,VF07,VF12w sq VF11,4(VI09) + NOP div Q,VF00w,VF13w + miniw.xyz VF07,VF12,VF12w iaddiu VI09,VI03,0 + NOP lq.xyz VF12,0(VI11) + NOP fcand VI01,262143 + ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) + mulq.xyz VF16,VF08,Q iand VI03,VI01,VI02 + mulax ACC,VF01,VF12x ior VI03,VI03,VI00 + mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI03 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) + add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 + mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI09) + NOP div Q,VF00w,VF16w + ftoi0.xyz VF11,VF11 ibne VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP + clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 25 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0] + mulq.xyz VF09,VF14,Q NOP + NOP NOP + NOP NOP + NOP NOP + mulq.xyz VF11,VF16,Q sq VF11,1(VI03) + NOP NOP + NOP fcand VI01,262143 + NOP iand VI01,VI01,VI02 + ftoi4.xyz VF16,VF13 ior VI01,VI01,VI00 + mul.xyz VF06,VF11,VF06 iaddiu VI01,VI01,0x00007fff + add.xyz VF14,VF11,VF05 mfir.w VF16,VI01 + ftoi0.xyz VF11,VF07 NOP + clipw.xyz VF06xyz,VF07w sq.xyz VF09,0(VI03) ; STALL_LATENCY ?1 + mulq.xyz VF16,VF08,Q sq VF16,2(VI03) + miniw.xyz VF07,VF12,VF12w sq VF11,4(VI09) + NOP iaddiu VI09,VI03,0 + NOP fcand VI01,262143 + NOP iand VI03,VI01,VI02 + ftoi0.xyz VF11,VF07 ior VI03,VI03,VI00 + ftoi4.xyz VF12,VF14 iaddiu VI03,VI03,0x00007fff + NOP mfir.w VF12,VI03 + NOP sq.xyz VF16,3(VI09) + NOP sq VF11,4(VI09) + NOP b EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT + NOP sq VF12,5(VI09) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1: +; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1] + mulq.xyz VF07,VF14,Q NOP + NOP NOP + NOP sq VF11,1(VI03) + NOP fcand VI01,262143 + mulq.xyz VF11,VF16,Q iand VI04,VI01,VI02 + NOP ior VI04,VI04,VI00 + ftoi4.xyz VF16,VF13 iaddiu VI04,VI04,0x00007fff + NOP mfir.w VF16,VI04 + add.xyz VF14,VF11,VF05 sq.xyz VF07,0(VI03) + mul.xyz VF11,VF11,VF06 NOP + mulq.xyz VF16,VF08,Q sq VF16,2(VI03) ; STALL_LATENCY ?1 + clipw.xyz VF11xyz,VF07w NOP ; STALL_LATENCY ?1 + NOP NOP + NOP NOP + miniw.xyz VF07,VF12,VF12w iaddiu VI04,VI03,0 + NOP fcand VI01,262143 + NOP iand VI03,VI01,VI02 + ftoi4.xyz VF12,VF14 ior VI03,VI03,VI00 + ftoi0.xyz VF11,VF07 iaddiu VI03,VI03,0x00007fff + NOP mfir.w VF12,VI03 + NOP sq.xyz VF16,3(VI04) + NOP sq VF11,4(VI04) ; STALL_LATENCY ?1 + NOP sq VF12,5(VI04) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI00,0x0000004d + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_21] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmIndexed_CodeEnd: +; iCount=637 +; register stats: +; 14 VU User integer +; 24 VU User floating point From a4c5e068c7b49a81af9598edd9a45defcd69bd3a Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 18:25:36 +0200 Subject: [PATCH 08/17] docs: reflect that all 13 VU1 renderers build with openvcl + masp Previously CMAKE_BUILD.md claimed only 11 of 13 renderers compiled with openvcl, listing `indexed` and `scei` as incompatible. Both now build cleanly with openvcl + masp, as confirmed by build-test/vu1/ containing all 13 .vo artifacts produced by the open pipeline. Also remove the commented-out skip block in CMakeLists.txt that used to fall back to pre-built objects for those two renderers. The fallback to pre-built .vo files via the outer if/elif/else for VU1 tools is preserved (still useful on hosts without the toolchain installed). Co-Authored-By: Claude Opus 4.7 (1M context) --- CMAKE_BUILD.md | 13 +++++++------ CMakeLists.txt | 19 ------------------- 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/CMAKE_BUILD.md b/CMAKE_BUILD.md index 42fa30da..8533f285 100644 --- a/CMAKE_BUILD.md +++ b/CMAKE_BUILD.md @@ -16,7 +16,7 @@ This document describes how to build ps2gl using CMake instead of the traditiona - The build automatically detects and prefers open-source tools (`openvcl` + `masp`) over proprietary ones (`vcl` + `gasp`) - If VU1 tools are not available (e.g., on macOS), the build will automatically use pre-built `.vo` object files from the `vu1/` directory - Pre-built objects can be generated by running the build once with VU1 tools available -- **OpenVCL compatibility**: 11 out of 13 renderers compile successfully with openvcl. The `indexed` and `scei` renderers require proprietary `vcl` or use pre-built objects as they are incompatible with openvcl +- **OpenVCL compatibility**: all 13 renderers (including `indexed` and `scei`) now compile successfully with `openvcl` + `masp`. The proprietary `vcl` + `gasp` toolchain remains supported as a fallback, but is no longer required. ## Building @@ -89,18 +89,19 @@ ps2gl includes VU1 assembly renderers that go through a complex preprocessing pi 5. **Step 5**: `vcl`/`openvcl` compiler generates `.vsm` files 6. **Step 6**: `dvp-as` assembler generates `.vo` object files -The CMake build handles all these steps automatically for the following renderers: +The CMake build handles all these steps automatically for all 13 renderers: -### OpenVCL Compatible (11 renderers): - fast_nolights, fast - general, general_quad, general_tri - general_nospec, general_nospec_quad, general_nospec_tri - general_pv_diff, general_pv_diff_quad, general_pv_diff_tri - -### Require proprietary VCL or use pre-built (2 renderers): -- indexed (uses variable naming incompatible with openvcl) +- indexed - scei +All 13 build with either `openvcl + masp` (preferred) or `vcl + gasp` (legacy +fallback). The output `.vsm` from the open pipeline can be compared against +the in-tree Sony reference (`vu1/sce_*_vcl.vsm`) for validation. + ## CMake Toolchain The build uses the PS2DEV CMake toolchain file located at: diff --git a/CMakeLists.txt b/CMakeLists.txt index ade200f4..c53acbcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,25 +157,6 @@ foreach(RENDERER ${RENDERERS}) continue() endif() - # Check if this renderer is known to be incompatible with openvcl - # indexed renderer uses variable names that openvcl doesn't recognize - # Temporarily disabled to try compiling with openvcl - # if(OPENVCL_FOUND AND (RENDERER STREQUAL "indexed" OR RENDERER STREQUAL "scei")) - # if(EXISTS ${VO_SOURCE}) - # message(STATUS "Using pre-built VU1 object for ${RENDERER}.vo (openvcl incompatible)") - # add_custom_command( - # OUTPUT ${VO_FILE} - # COMMAND ${CMAKE_COMMAND} -E copy ${VO_SOURCE} ${VO_FILE} - # DEPENDS ${VO_SOURCE} - # COMMENT "Copying pre-built VU1 object: ${RENDERER}.vo" - # ) - # list(APPEND VU1_OBJECTS ${VO_FILE}) - # else() - # message(WARNING "Pre-built VU1 object not found: ${VO_SOURCE}") - # endif() - # continue() - # endif() - # Step 1: .vcl -> _pp1.vcl (remove #include, #define, fix .include paths) add_custom_command( OUTPUT ${PP1_FILE} From 9138e1fbd6ec0e855b15593955b8dbcdc91e8d4d Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 19:14:33 +0200 Subject: [PATCH 09/17] test: semantic VSM diff against Sony reference per renderer Adds cmake/vsm_diff.py plus CTest wiring that compares openvcl's output for each VU1 renderer against the matching Sony-generated reference in vu1/sce__vcl.vsm. The comparison is deliberately permissive about pipe pairing and register-allocator choices -- it checks: - opcode histogram (same mnemonics with same counts) - flag histogram ([E]/[I]/[D]/[T] occurrences) - label set (control-flow structure preserved) - non-nop slot count + ratio (rough scheduler-progress signal) The 12 renderers with a Sony reference get one CTest each, all labelled `vsm-diff` + `known-failing` and marked WILL_FAIL. Today every renderer diverges (openvcl produces 34-73% of Sony's instruction count because the dual-pipe scheduler and the multi-variant specialization aren't implemented yet), so WILL_FAIL keeps the build green; when a renderer starts matching, the test will XPASS so we notice. Baseline ratios captured at commit time: fast_nolights 0.72 general_pv_diff 0.37 fast 0.73 general_quad 0.34 general_nospec_quad 0.38 general_tri 0.38 general_nospec_tri 0.46 general 0.38 general_nospec 0.45 indexed 0.34 general_pv_diff_quad 0.39 general_pv_diff_tri 0.38 Usage: cmake -B build-test && cmake --build build-test ctest --test-dir build-test -L vsm-diff # raw diff for a single renderer: python3 cmake/vsm_diff.py vu1/sce_general_vcl.vsm build-test/vu1/general_vcl.vsm The scei renderer is skipped because the Sony reference for it uses a different naming convention (`scei_vcl.vsm` vs `sce__vcl.vsm`). Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 44 +++++++++ cmake/vsm_diff.py | 233 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 277 insertions(+) create mode 100755 cmake/vsm_diff.py diff --git a/CMakeLists.txt b/CMakeLists.txt index c53acbcf..5658969d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,6 +231,50 @@ if(VU1_OBJECTS) add_custom_target(vu1_objects ALL DEPENDS ${VU1_OBJECTS}) endif() +# ============================================================================ +# VSM semantic-diff tests +# ============================================================================ +# For each renderer with a Sony reference VSM in `vu1/sce__vcl.vsm`, +# add a CTest that compares its opcode histogram + label set against the +# openvcl-produced `/vu1/_vcl.vsm`. Used as a ratchet for the +# eventual dual-pipe scheduler / specialization work: tests that don't +# pass yet are marked WILL_FAIL so a renderer "going green" lights up +# instead of breaking the build. +find_package(Python3 COMPONENTS Interpreter) +if(VU1_TOOLS_AVAILABLE AND Python3_Interpreter_FOUND) + enable_testing() + + set(VSM_DIFF_SCRIPT ${CMAKE_SOURCE_DIR}/cmake/vsm_diff.py) + + foreach(RENDERER ${RENDERERS}) + set(SCE_VSM "${CMAKE_SOURCE_DIR}/vu1/sce_${RENDERER}_vcl.vsm") + set(OVC_VSM "${CMAKE_BINARY_DIR}/vu1/${RENDERER}_vcl.vsm") + + if(NOT EXISTS "${SCE_VSM}") + # No Sony reference for this renderer (e.g. scei.vcl ships its + # own reference under a different name); skip silently. + continue() + endif() + + add_test(NAME vsm_diff_${RENDERER} + COMMAND ${Python3_EXECUTABLE} ${VSM_DIFF_SCRIPT} + ${SCE_VSM} ${OVC_VSM}) + # Building the openvcl output is a prerequisite. CTest doesn't + # auto-build, so the user must run `cmake --build` first; the + # FIXTURES_REQUIRED machinery would be overkill here. + set_tests_properties(vsm_diff_${RENDERER} PROPERTIES + # Today openvcl produces neither pipe-paired nor multi- + # specialised output, so the strict histogram check fails + # for every renderer. Mark them WILL_FAIL so a renderer + # becoming equivalent shows up as XPASS instead of silently + # breaking the build. Drop this property per-renderer as + # the scheduler / specialization work closes each one. + WILL_FAIL TRUE + LABELS "vsm-diff;known-failing" + ) + endforeach() +endif() + # ============================================================================ # Build the library # ============================================================================ diff --git a/cmake/vsm_diff.py b/cmake/vsm_diff.py new file mode 100755 index 00000000..7fe8ec7e --- /dev/null +++ b/cmake/vsm_diff.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +""" +Compare two VU1 .vsm files at the semantic level. + +Used as a CTest target in ps2gl to verify that openvcl produces the same +*set* of operations as Sony's proprietary vcl for each VU1 renderer. +Differences in pipe-pairing, register-allocator choices, and whitespace +are intentionally ignored -- the goal is to surface real divergences +(missing instructions, wrong opcodes, missing labels) and to track how +close openvcl is getting to the reference as the dual-pipe scheduler +matures. + +Usage: + vsm_diff.py + +Exit codes: + 0 = histograms and labels match. + 1 = real divergence (different opcode set, different label set). + 2 = file read / parse error. + +The script is intentionally permissive about pipe placement: only the +opcode mix matters. A separate "instruction-count delta" line is printed +to track scheduler progress over time. +""" + +import re +import sys +from collections import Counter + +# Lines that are not real instructions and should be skipped entirely. +_DIRECTIVE_PREFIXES = (".vu", ".align", ".global", ".name", ".end") + +# Sony's reference output includes annotation comments like +# ; === __LP__ ... +# ; _LNOPT_w=[...] ... +# openvcl emits no such comments. Both should be dropped from the +# semantic comparison. +_COMMENT_RE = re.compile(r"^\s*;.*") + +# A label line: identifier ending with ':' optionally followed by a comment. +_LABEL_RE = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*:\s*(?:;.*)?$") + +# An instruction line carries upper-pipe + lower-pipe ops separated by a +# wide whitespace gap. Sony's reference left-pads the mnemonic into a +# ~14-char column and uses commas (no spaces) between operands, so within +# a single pipe there's never more than ~13 contiguous spaces. The gap +# between pipes is always 20+ spaces in practice. 15 is the safest +# threshold that catches both styles (openvcl + reference) without +# splitting through an operand list. +_PIPE_SPLIT_RE = re.compile(r"\s{15,}") + +# Flag suffixes the assembler writes after the mnemonic: NOP[E], NOP[I], +# NOP[D], NOP[T]. Captured separately from the bare mnemonic so we can +# verify control-flow tags independently of the surrounding ops. +_FLAG_RE = re.compile(r"^([a-z0-9.]+?)(\[[A-Za-z]+\])?$") + + +def _normalize_mnemonic(tok: str) -> str: + """Lowercase the mnemonic and strip any [E]/[I]/[D]/[T] flag suffix. + + Keep dest fields (`.xyz`, `.w`) attached to the mnemonic so we can + distinguish `addi.xy` from `addi.xyz` -- they're semantically + different operations on different fields. + """ + m = _FLAG_RE.match(tok.lower()) + return m.group(1) if m else tok.lower() + + +def _extract_flag(tok: str) -> str: + """Return the flag suffix (e.g. "[E]") if present, else "".""" + m = _FLAG_RE.match(tok.lower()) + return m.group(2) or "" if m else "" + + +def _is_mnemonic_token(tok: str) -> bool: + """True iff `tok` looks like a VU1 mnemonic (as opposed to a register + or immediate operand). + + Sony's reference output uses uppercase mnemonics with comma-joined + operands and no space between them; openvcl's output uses lowercase + mnemonics with space-separated operands. A consistent classifier + over both is to bucket each whitespace-separated token by what it + looks like: + + mnemonic: starts with a letter, is not a register name + register: V[FI]... or ACC[component] or single-letter I/Q/P/R + immediate: starts with a digit (incl. 0x...) + indirect: contains '(' (e.g. 62(VI00)) + label-ref: trailing ':' -- handled before this function gets called + """ + if not tok: + return False + # Strip any leading punctuation that the assembler emits with the + # token (none expected for mnemonics, but harmless). + if not tok[0].isalpha(): + return False + # Indirect access embedded in a mnemonic isn't a thing -- those are + # always operands like "62(VI00)" which start with a digit anyway, + # but defend against weirdness. + if "(" in tok: + return False + upper = tok.upper() + # Register names: VF / VI, optionally with a field + # suffix like VF15w. + if len(tok) > 2 and upper[:2] in ("VF", "VI") and tok[2].isdigit(): + return False + # The accumulator operand prefix (ACC, ACCxyz, ...). + if upper.startswith("ACC"): + return False + # Single-letter pseudo-registers used as operands. + if upper in ("I", "Q", "P", "R"): + return False + return True + + +def parse_vsm(path: str): + """Return (opcode_histogram, flag_histogram, label_set, instr_count). + + instr_count is the total number of pipe slots filled with anything + other than `nop` -- a rough "work-per-cycle" signal for the scheduler. + """ + opcodes = Counter() + flags = Counter() + labels = set() + instr_count = 0 + + with open(path) as f: + for raw in f: + line = raw.rstrip("\n") + + if not line.strip(): + continue + if _COMMENT_RE.match(line): + continue + stripped = line.strip() + if stripped.startswith(_DIRECTIVE_PREFIXES): + continue + + label_match = _LABEL_RE.match(line) + if label_match: + labels.add(label_match.group(1)) + continue + + # Real instruction line: split into upper-pipe / lower-pipe + # halves on a 15+ whitespace gap. Within each half the + # mnemonic is the first token; the rest is operands and + # would otherwise alias as bogus "opcodes" if we treated + # every token equally. + halves = _PIPE_SPLIT_RE.split(line.strip(), maxsplit=1) + for half in halves: + if not half: + continue + tok = half.split()[0] + if not _is_mnemonic_token(tok): + continue + op = _normalize_mnemonic(tok) + flag = _extract_flag(tok) + opcodes[op] += 1 + if flag: + flags[flag] += 1 + if op != "nop": + instr_count += 1 + + return opcodes, flags, labels, instr_count + + +def _diff_counters(a: Counter, b: Counter): + """Return dict {key: (a, b)} for keys where a and b disagree.""" + diffs = {} + for k in sorted(set(a) | set(b)): + if a[k] != b[k]: + diffs[k] = (a[k], b[k]) + return diffs + + +def main(argv) -> int: + if len(argv) != 3: + print(f"usage: {argv[0]} ", file=sys.stderr) + return 2 + + ref_path, ovc_path = argv[1], argv[2] + + try: + ref_ops, ref_flags, ref_labels, ref_count = parse_vsm(ref_path) + ovc_ops, ovc_flags, ovc_labels, ovc_count = parse_vsm(ovc_path) + except FileNotFoundError as e: + print(f"missing file: {e.filename}", file=sys.stderr) + return 2 + + op_diff = _diff_counters(ref_ops, ovc_ops) + flag_diff = _diff_counters(ref_flags, ovc_flags) + only_in_ref = ref_labels - ovc_labels + only_in_ovc = ovc_labels - ref_labels + + histogram_ok = not op_diff + flags_ok = not flag_diff + labels_ok = not (only_in_ref or only_in_ovc) + + # The scheduler-progress line: a single ratio that should approach 1.0 + # as openvcl learns to pair pipes. Values are non-nop pipe slots. + if ref_count == 0: + ratio = float("inf") if ovc_count else 1.0 + else: + ratio = ovc_count / ref_count + + print(f"=== vsm_diff: {ref_path} vs {ovc_path}") + print(f" non-nop slots: reference={ref_count} openvcl={ovc_count} ratio={ratio:.2f}") + print(f" unique opcodes: reference={len(ref_ops)} openvcl={len(ovc_ops)}") + print(f" labels: reference={len(ref_labels)} openvcl={len(ovc_labels)}") + print(f" histogram_ok={histogram_ok} flags_ok={flags_ok} labels_ok={labels_ok}") + + if op_diff: + print(" opcode count mismatches (op: reference -> openvcl):") + for op, (ra, oa) in op_diff.items(): + print(f" {op:<14} {ra:>4} -> {oa}") + if flag_diff: + print(" flag count mismatches ([X]: reference -> openvcl):") + for fl, (ra, oa) in flag_diff.items(): + print(f" {fl:<6} {ra} -> {oa}") + if only_in_ref: + print(" labels only in reference:") + for l in sorted(only_in_ref): + print(f" - {l}") + if only_in_ovc: + print(" labels only in openvcl:") + for l in sorted(only_in_ovc): + print(f" + {l}") + + return 0 if (histogram_ok and labels_ok) else 1 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) From 316cd918136d40e91e47b3d81175dfe959a6e2a8 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 19:43:11 +0200 Subject: [PATCH 10/17] cmake: add PS2GL_USE_SCE_VSM diagnostic build option Adds a build path that bypasses the .vcl -> .vsm half of the pipeline and assembles the in-tree Sony reference VSMs (vu1/sce__vcl.vsm) directly with dvp-as. Only dvp-as is required when this option is on; openvcl/masp/vcl/gasp are not consulted. Why: separates "openvcl produces wrong VSMs" from "ps2gl host-side bug unrelated to the VU1 microcode". Boot a sample built with this option ON and the same sample built with it OFF; if behavior diverges, the .vcl -> .vsm half is at fault. Empirical result (2026-05-11): lesson04 renders blank when built with openvcl, but renders the expected rotating triangle + quad when built with PS2GL_USE_SCE_VSM=ON. Confirms the rendering bug for samples that use glRotatef/glTranslatef lives in openvcl's VSM output, not in ps2gl's host code. Special-case: scei's reference VSM is named `scei_vcl.vsm` (no `sce_` prefix), so it gets a one-off path in the bypass branch. Usage: cmake -B build-sce -DPS2GL_USE_SCE_VSM=ON -DBUILD_EXAMPLES=ON cmake --build build-sce # boot build-sce/examples/.elf in PCSX2 to compare Co-Authored-By: Claude Opus 4.7 (1M context) --- CMAKE_BUILD.md | 3 +++ CMakeLists.txt | 52 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/CMAKE_BUILD.md b/CMAKE_BUILD.md index 8533f285..8dd185ae 100644 --- a/CMAKE_BUILD.md +++ b/CMAKE_BUILD.md @@ -68,6 +68,9 @@ The following CMake options are available: |--------|---------|-------------| | `DEBUG` | OFF | Enable debug build with `_DEBUG` definition | | `BUILD_TESTS` | OFF | Build test executables | +| `BUILD_EXAMPLES` | OFF | Build the example ELFs under `examples/` | +| `BUILD_GLUT` | ON | Build the GLUT-compat library | +| `PS2GL_USE_SCE_VSM` | OFF | Bypass the .vcl pipeline; assemble Sony's reference VSMs (`vu1/sce__vcl.vsm`) directly with `dvp-as`. Diagnostic mode: lets us isolate "openvcl produces wrong VSMs" from "ps2gl host-side bug" by comparing the SCE-built ELF behavior against the openvcl-built one. | ## Build Flags diff --git a/CMakeLists.txt b/CMakeLists.txt index 5658969d..e5679a6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,6 +101,17 @@ set(RENDERERS scei ) +# Diagnostic build option: skip the .vcl -> .vsm half of the pipeline +# and assemble the in-tree Sony reference VSMs (vu1/sce__vcl.vsm) +# directly with dvp-as. Lets us isolate "openvcl produces wrong VSMs" +# from "ps2gl host-side bug" -- if a sample that renders blank with the +# openvcl-built library renders correctly with this option ON, the +# fault is in openvcl's output (or our preprocessing) rather than in +# ps2gl itself. +option(PS2GL_USE_SCE_VSM + "Bypass the .vcl pipeline; assemble vu1/sce__vcl.vsm directly" + OFF) + # Check for VU1 tools availability # Prefer open-source tools (openvcl + masp) over proprietary ones (vcl + gasp) find_program(OPENVCL_FOUND openvcl) @@ -110,7 +121,16 @@ find_program(GASP_FOUND gasp) find_program(DVP_AS_FOUND dvp-as) # Determine which tools to use -if(OPENVCL_FOUND AND MASP_FOUND AND DVP_AS_FOUND) +if(PS2GL_USE_SCE_VSM AND DVP_AS_FOUND) + # SCE-VSM bypass: only dvp-as is required; the .vcl -> .vsm half of + # the pipeline is skipped and we assemble Sony's reference VSMs + # directly. Treat VU1_TOOLS_AVAILABLE as TRUE so the foreach loop + # produces .vo files (it just takes the bypass branch below). + set(VU1_TOOLS_AVAILABLE TRUE) + set(VCL_TOOL "") + set(GASP_TOOL "") + message(STATUS "PS2GL_USE_SCE_VSM=ON - using vu1/sce__vcl.vsm directly (dvp-as only)") +elseif(OPENVCL_FOUND AND MASP_FOUND AND DVP_AS_FOUND) set(VU1_TOOLS_AVAILABLE TRUE) set(VCL_TOOL ${OPENVCL_FOUND}) set(GASP_TOOL ${MASP_FOUND}) @@ -157,6 +177,36 @@ foreach(RENDERER ${RENDERERS}) continue() endif() + if(PS2GL_USE_SCE_VSM) + # Bypass the .vcl pipeline entirely; assemble the in-tree Sony + # reference VSM directly. scei's reference uses a different + # naming convention (`scei_vcl.vsm`, no `sce_` prefix), so + # special-case it. + if(RENDERER STREQUAL "scei") + set(SCE_VSM_SOURCE "${CMAKE_SOURCE_DIR}/vu1/scei_vcl.vsm") + else() + set(SCE_VSM_SOURCE "${CMAKE_SOURCE_DIR}/vu1/sce_${RENDERER}_vcl.vsm") + endif() + + if(EXISTS ${SCE_VSM_SOURCE}) + add_custom_command( + OUTPUT ${VO_FILE} + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/vu1 + COMMAND ${DVP_AS_FOUND} -o ${VO_FILE} ${SCE_VSM_SOURCE} + DEPENDS ${SCE_VSM_SOURCE} + COMMENT "SCE-VSM bypass: dvp-as ${SCE_VSM_SOURCE} -> ${RENDERER}.vo" + ) + set_source_files_properties(${VO_FILE} PROPERTIES + EXTERNAL_OBJECT TRUE + GENERATED TRUE + ) + list(APPEND VU1_OBJECTS ${VO_FILE}) + else() + message(WARNING "PS2GL_USE_SCE_VSM=ON but ${SCE_VSM_SOURCE} not found; skipping ${RENDERER}") + endif() + continue() + endif() + # Step 1: .vcl -> _pp1.vcl (remove #include, #define, fix .include paths) add_custom_command( OUTPUT ${PP1_FILE} From a6ace9fe308dbbfb7e01acb87ef5f50eb7338e2b Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 22:30:21 +0200 Subject: [PATCH 11/17] =?UTF-8?q?docs:=20add=20INTEGRATION=5FPLAN.md=20?= =?UTF-8?q?=E2=80=94=20cross-repo=20state=20and=20roadmap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-repo planning document covering ps2gl + openvcl + masp. Captures the current state of the open-source pipeline migration, the open items in priority order, and the commands needed to pick up where the last session left off. Key sections: - §0 TL;DR -- the four open items and where to pick up. - §2.1 The ps2gl GL_QUADS rendering bug (filed in openvcl/TODO and repro committed at openvcl/test/repro/quad_adc_bug.vcl). Workaround: this repo's `-DPS2GL_USE_SCE_VSM=ON` build option. - §2.2 The dual-pipe scheduler (headline performance feature in openvcl). Per-renderer baseline instruction-count ratios captured. - §3 What landed 2026-05-11 -- 14 commits across openvcl + ps2gl. - §4 Sample-by-sample runtime-validation matrix. - §6 Quick-reference commands for the full dev loop. This file is intentionally cross-repo: pointing it from ps2gl is practical because ps2gl is the integration target (the consumer of both masp and openvcl). It's expected to be updated each session as items get closed and new ones surface. Co-Authored-By: Claude Opus 4.7 (1M context) --- INTEGRATION_PLAN.md | 376 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 376 insertions(+) create mode 100644 INTEGRATION_PLAN.md diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md new file mode 100644 index 00000000..8b26e67f --- /dev/null +++ b/INTEGRATION_PLAN.md @@ -0,0 +1,376 @@ +# PS2 OpenGL Toolchain — Current State & Plan + +_Last reviewed: 2026-05-11_ + +## 0. TL;DR — pick this up here next session + +**Where we are.** The open pipeline (`openvcl + masp + dvp-as`) builds all 13 +ps2gl renderers and produces ELFs that boot in PCSX2. Triangle-based samples +render correctly. **`GL_QUADS`-based shaders render blank** because openvcl +emits wrong ADC-bit values — bug is localized, has a workaround, and has a +minimal repro committed. + +**Headline open items, in priority order:** + +1. 🔴 **Quad-renderer bug** in openvcl (`general_quad`, `general_pv_diff_quad`, + `general_nospec_quad`). Workaround: build ps2gl with + `-DPS2GL_USE_SCE_VSM=ON`. Minimal repro: `openvcl/test/repro/quad_adc_bug.vcl`. + See §2.1. +2. 🟡 **Dual-pipe scheduler** in openvcl. Headline performance feature. + Currently openvcl produces 0.34-0.73× Sony's instruction count per + renderer. Multi-week. See §2.2. +3. 🟡 **masp polish** — two `FIXME`s in `src/macro.c`, decision on + `build_ps2/`. Half-day. See §2.3. +4. 🟡 **More unit tests** for masp + openvcl, especially per-module coverage. + Infrastructure already in place. See §2.4. + +**Workarounds & infrastructure landed this session:** +- `-DPS2GL_USE_SCE_VSM=ON` — bypasses openvcl, assembles Sony's reference VSMs + directly. Diagnostic + temporary workaround. (`ps2gl@cmake` commit `316cd91`) +- `vsm_diff.py` semantic diff harness + per-renderer CTest entries. All 12 + WILL_FAIL today; XPASS-flips as renderers converge. (`ps2gl@cmake` commit + `9138e1f`) +- openvcl unit + integration test framework. 17 tests, 0 failures. + (`openvcl@ps2gl` commits `7f1db90`, `de7f1f8`, `bec6b7f`) + +--- + +## 1. Big picture + +We are eliminating the legacy / proprietary tools from the ps2gl shader +pipeline so any homebrew developer can build ps2gl from source with **only +open-source** tooling. + +``` + legacy pipeline target pipeline + ┌──────────────────────────┐ ┌──────────────────────────┐ + │ .vcl ─► gasp ─► vcl │ │ .vcl ─► masp ─►openvcl│ + │ (GNU) (Sony) │ ──► │ (ours) (ours) │ + │ ─► dvp-as ─► .vo │ │ ─► dvp-as ─► .vo │ + └──────────────────────────┘ └──────────────────────────┘ + │ + └─ dvp-as stays (PS2DEV) +``` + +Three repos in this workspace, all symlinks into `~/Projects/`: + +| Repo | Role | Replaces | +| --------- | --------------------------------------------------------- | -------- | +| `masp` | Assembler preprocessor (macros, conditionals, directives) | `gasp` | +| `openvcl` | VCL → VSM transpiler (register allocation, pipelining) | `vcl` | +| `ps2gl` | OpenGL-style API for PS2; producer of `.vcl` shaders | — | + +--- + +## 2. Open work + +### 2.1 Quad-renderer rendering bug 🔴 + +**Symptom.** Any ps2gl example that draws with `GL_QUADS` renders blank. +`box` shows only the clear color; `nehe_lesson04`/`05` show their triangles +but not their quads. Triangle-only samples (`lesson02`/`03`) render +correctly. + +**Localized cause.** openvcl writes the wrong ADC bit on vertices 3 and 4 of +each quad. Captured via PCSX2 memory dump at `0x1100D420` and `0x1100D450`: + +| Vertex | Sony (works) W field | openvcl (broken) W field | +|--------|----------------------|--------------------------| +| v1 | `00 80 FF FF` (skip) | `00 80 FF FF` (skip) | +| v2 | `00 80 FF FF` (skip) | `00 80 FF FF` (skip) | +| **v3** | **`FF 7F 00 00` (draw)** | **`00 80 FF FF` (skip)** ❌ | +| **v4** | **`FF 7F 00 00` (draw)** | **`00 80 FF FF` (skip)** ❌ | + +All 4 vertices end up tagged "skip drawing" → GS draws nothing. + +**Where the wrong value comes from.** The source's `ior new_adc_bit, vi01, +z_sign; iaddiu new_adc_bit, new_adc_bit, 0x7fff` chain ends up with +`new_adc_bit = 0x8000` for in-frustum vertices. The z_sign side is silenced +(z_sign_mask in VI05 is loaded as 0 by `ilw.w`), so the bug is purely on the +`vi01` side -- `fcand` is returning non-zero clip flags for vertices that +should pass clipping. Either openvcl's `clipw.xyz` sequence is producing +non-zero CLIP entries for in-frustum vertices, or `fcand` is reading a stale +entry from before the current iteration. + +**Workaround.** `cmake -DPS2GL_USE_SCE_VSM=ON` — bypasses openvcl, +assembles Sony's reference VSMs directly. All examples render correctly +with this on. + +**Status.** Bisected to baseline (`e407703`); **not** a regression from any +of the recent openvcl work (`bc41a56` / `303c528` / `41dff12` / `5c0227b` / +`a2a7d9d`). Filed in `openvcl/TODO` (commit `c6e82f5`). Minimal repro at +`openvcl/test/repro/quad_adc_bug.vcl` (commit `07a3196`). + +**To make progress next:** single-step the minimal repro in PCSX2's +debugger (or run a hand-built "known-correct" reference VSM through PCSX2 +side-by-side and compare CLIP register state cycle-by-cycle). Use the +repro to bisect which clipw / scheduling decision is wrong; tweak, +rebuild, re-emit (sub-second turnaround). + +### 2.2 Dual-pipe instruction scheduler 🟡 + +VU1 issues two instructions per cycle: one upper-pipe (FMAC) + one +lower-pipe (LSU / integer / branch). Sony's vcl reorders independent +instructions to fill both pipes. openvcl currently emits NOP on the free +pipe most of the time. + +**Concrete evidence:** +``` +# Sony vu1/sce_general_vcl.vsm — one source word, both pipes filled +addi.xy VF05, VF00, I loi 0x45000000 + +# openvcl build-test/vu1/general_vcl.vsm — two source words, lots of NOPs +nop loi 0x44fff000 +addi.xy VF05, VF00, i nop +``` + +Sony's `general.vsm` has ~22 instructions per main loop; openvcl's ~39 — +**roughly a 2× VU1 throughput regression**. Per-renderer ratios captured +2026-05-11 (instr count: openvcl / Sony): + +| Renderer | Ratio | Renderer | Ratio | +| --------------------- | ----- | -------------------- | ----- | +| fast_nolights | 0.72 | general_pv_diff | 0.37 | +| fast | 0.73 | general_quad | 0.34 | +| general_nospec_quad | 0.38 | general_tri | 0.38 | +| general_nospec_tri | 0.46 | general | 0.38 | +| general_nospec | 0.45 | indexed | 0.34 | +| general_pv_diff_quad | 0.39 | | | +| general_pv_diff_tri | 0.38 | | | + +**Status of the rescheduler in code:** +- `src/Token.h` already has a `PREORDERED` flag (= "do not reschedule") — + the hook exists, the pass doesn't. +- `Dependency.{cpp,h,inl}` looks like scaffolding for the eventual scheduler. +- README explicitly lists rescheduling as future work. + +**Sub-steps when this work starts:** +1. Read all `vu1/sce_*.vsm` to characterise Sony's pairing patterns + (learning pass before coding). +2. Audit `Dependency.{cpp,h,inl}` — what dep info is already captured? +3. Add a list-scheduler between code-gen and emission: topological order + by data deps, greedy pair upper+lower per cycle, respect VU1 hazard + rules (RAW latency on FMAC X/Y/Z/W, FDIV/EFU long-latency pipes, + branch-delay slot). +4. Respect the existing `PREORDERED` flag on tokens that must stay put + (branches, XGKICK, FCSET / FCAND control-flow ops). +5. Re-run `vsm_diff.py`; iterate until openvcl-vs-Sony ratio is within + ~10 % on all 13 renderers. + +**Risks worth flagging up front:** branch-delay slot, XGKICK timing, +FDIV/EFU long pipes, and `fcand` register liveness are where Sony's tool +will be subtly cleverer. + +### 2.3 masp polish 🟡 + +| Item | Effort | Notes | +| ------------------------------------------ | ------ | ----- | +| Address 2× `FIXME` in `src/macro.c` | ½ day | Comment-handling edge cases. May be doc-only notes — verify before claiming bug. | +| Decide fate of `build_ps2/` failing tests | ½ hour | Either fix toolchain wiring or remove the directory; masp doesn't need to run on PS2. | +| README expand to brief user manual | ½ day | Acknowledged gap in README itself. | + +### 2.4 Test densification 🟡 + +The frameworks exist; coverage is shallow. + +**masp** — has CMake/CTest with 2 tests; pattern links source files into a +test binary so per-module tests are easy. Targets: + +| Module | What to cover | +| ------------------------ | ------------- | +| `sb.c` (string buffer) | append / reset / grow / overflow / null handling | +| `hash.c` (hash table) | insert / lookup / delete / collision / resize | +| `macro.c` | macro defs, recursive expansion, comma-arg splitting, the two FIXME edges | +| Number-prefix parser | `0b` / `0q` / `0h` / `0d` / `0a` vs GASP `B'…` | +| Directive prefix | `-P/--prefixchar` default `\`, conflicts | +| Mode switching | `\masp` / `\gasp` toggles, nested ifmode | +| Conditional assembly | `\ifmode` / `\ifm` / `\endifm` truth tables | +| Golden files | Re-run every `ps2gl/vu1/*.vcl` through masp; compare to checked-in expected output | + +**openvcl** — has 17 tests across `unit/` and `integration/`. Hand-rolled +harness in `test/include/test_harness.h` (TEST_CASE / CHECK / REQUIRE / +EXPECTED_FAIL, auto-registered via static init). Subprocess runner in +`test/include/openvcl_runner.h` for end-to-end checks. Targets to expand: + +| Area | Effort | Status | +| ------------------------------------------------- | ------ | ------ | +| Tokenizer: more mnemonics, comments, fields | ½ day | open | +| Parser: operand templates, error recovery | ½ day | open | +| Expression: edge cases (some landed) | started | partial | +| CodeGenerator golden files per mnemonic family | 1 day | open | +| CommandLine: every flag in README | ½ day | open | +| Scheduler dependency-matrix suite | open until §2.2 lands | n/a | + +### 2.5 ps2gl hygiene 🟢 (low priority) + +- Legacy `Makefile` still present alongside CMake — decide keep-or-delete. +- `examples/tricked_out/billboard_renderer.vcl` exists but the build + links the pre-built `_vcl.vsm`, skipping openvcl on this app-level + shader. Wire it through the full pipeline once the quad bug is fixed + (tricked_out uses GL_QUADS-like billboard rendering). +- Add CI that runs `ctest -L vsm-diff` so renderer regressions surface + in PRs. +- Once openvcl reaches parity, drop the `TEMP: Sony reference VSMs` + commit via `git rebase -i` (the `vu1/sce_*_vcl.vsm` files were + committed as ground truth and are marked temporary in their commit + message). + +--- + +## 3. What landed this session (2026-05-11) + +All pushed to `fjtrujy/openvcl@ps2gl` and `ps2dev/ps2gl@cmake`. + +| Repo | Commit | Description | +| ------- | ---------- | ----------- | +| openvcl | `bc41a56` | LOI IEEE-754 hex + expression-evaluated address offsets + SCE-matching VSM header | +| openvcl | `303c528` | Loop-body live-range extension in the register allocator | +| openvcl | `f8b3ff2` | examples Makefile parallel-build targets | +| openvcl | `41dff12` | Defensive checks + stable token pointer in RA | +| openvcl | `7f1db90` | Bootstrap unit-test framework (CMake + in-tree harness) | +| openvcl | `de7f1f8` | Integration subprocess runner + 2 TODO tests pinned | +| openvcl | `5c0227b` | Error::HasErrors propagation into exit code (fixes silent CLIP) | +| openvcl | `a2a7d9d` | `.init_vf`/`.init_vi` register-range shorthand (`vfXX-vfYY`) | +| openvcl | `bec6b7f` | LOI hex regression tests + Expression edge tests | +| openvcl | `c6e82f5` | File the ps2gl quad-renderer bug in TODO | +| openvcl | `07a3196` | `test/repro/quad_adc_bug.vcl` minimal repro | +| ps2gl | `b74d303` | TEMP: add Sony reference VSMs (12 files, ground truth) | +| ps2gl | `a4c5e06` | Docs + dead code: all 13 renderers build with openvcl+masp | +| ps2gl | `9138e1f` | Semantic VSM-diff CTest harness (`vsm_diff.py` + 12 entries) | +| ps2gl | `316cd91` | `PS2GL_USE_SCE_VSM` diagnostic build option (quad-bug workaround) | + +**Closed openvcl TODO items:** 4 of 6 — LOI hex, line-based register +allocator, CLIP validation, `.init_vf` range. All with regression tests. + +**Open openvcl TODO items:** +- Output-parameters only applied at "proper" branch exits (vague spec) +- GASP preparsing doesn't track filenames (low priority) +- *Plus the new quad-renderer bug* — filed, repro committed. + +--- + +## 4. Runtime validation via ps2gl samples + +The ps2gl example apps are the **functional** regression suite. Each +exercises a different slice of the library + VU1 renderers. + +### Sample matrix (built artifacts in `build-test/examples/`) + +| Sample | Renderer(s) | Renders with openvcl? | Renders with `PS2GL_USE_SCE_VSM=ON`? | +| ----------------- | ------------------------ | --------------------- | ------------------------------------- | +| `nehe_lesson02` | `general*` | ✅ | ✅ | +| `nehe_lesson03` | `general_pv_diff*` | ✅ | ✅ | +| `nehe_lesson04` | `general*` + `_quad` | 🟡 triangle yes, quad no | ✅ | +| `nehe_lesson05` | `general*` + `_quad` | 🟡 triangle yes, quad no | ✅ | +| `box` | `general*`, `_quad` | ❌ all-quad cube blank | ✅ | +| `logo` | `general*` + texturing | (not yet tested) | (not yet tested) | +| `performance` | `fast*`, `general_quad/tri` | (not yet tested) | (not yet tested) | +| `tricked_out` | own VU1 + `general*` | (not yet tested; built from pre-built `_vcl.vsm`) | n/a | + +### Defining "done" + +A sample passes when: +1. ELF builds with the all-open pipeline (`openvcl + masp + dvp-as`). +2. It boots in PCSX2 to the rendering loop (no early `SIF crash` / TLB miss). +3. Screenshot matches the golden within tolerance (image-diff with + PSNR / SSIM threshold; bitwise compare fails on emulator jitter). +4. (After §2.2 lands) Frame time within ~10 % of legacy-pipeline ELF. + +Steps (1)–(3) are functional parity; (4) is the perf milestone tied to the +scheduler. + +### Suggested automation (not yet built) + +``` +for sample in build-test/examples/*.elf; do + mcp__pcsx2.reset_vm + mcp__pcsx2.boot_elf $sample + # wait N frames OR watch for known framebuffer signature + mcp__pcsx2.screenshot > validation/$(basename $sample)_openvcl.png + mcp__pcsx2.shutdown_vm +done +# image-diff each PNG vs validation/_golden.png +``` + +--- + +## 5. Fast iteration loop — `/ps2dev` + PCSX2 + +The standing dev loop is the `/ps2dev` skill (build / SDK / toolchain) glued +to `mcp__pcsx2.*` runtime tools. Use it whenever changes touch `vu1/`, +`openvcl/src`, `masp/src`, or anything that links into `libps2gl.a`. + +| Step | Tool | +| ------------------- | ------------------------------------------------------------------------------------- | +| Build | `/ps2dev` skill | +| Boot ELF | `mcp__pcsx2.boot_elf` (or `ps2link_execee` for hardware) | +| Confirm rendering | `mcp__pcsx2.screenshot` + image-diff against golden | +| Skip past boot | `mcp__pcsx2.save_state` / `load_state` once a sample reaches its render loop | +| Inspect VU1 / GS | `mcp__pcsx2.pause_vm` + `read_memory_range` (VU1 data at `0x1100C000`, 16 KB) | +| Reset between runs | `mcp__pcsx2.reset_vm` / `shutdown_vm` | + +Useful PCSX2 address landmarks for ps2gl debugging: +- VU1 data RAM at `0x1100C000` (16 KB; first ~1.5 KB is constants/matrices, + rest is output buffer + scratch). +- Sony's working quad outputs vertex positions at `0x1100D390`+ — useful + reference for the quad-bug diff. + +--- + +## 6. Quick reference — commands + +```bash +# Build openvcl + install to toolchain +cd ~/Projects/openvcl +make openvcl +cp openvcl ~/toolchains/ps2/ps2dev/bin/openvcl + +# Run openvcl on a single shader (smoke test) +./openvcl --gasp masp general.vcl > /tmp/out.vsm + +# Run openvcl test suite +cmake --build test/build && ./test/build/openvcl_unit_tests + +# Build ps2gl with the open pipeline +cd ~/Projects/ps2gl +cmake -B build-test -DBUILD_EXAMPLES=ON +cmake --build build-test + +# Build ps2gl with the SCE-VSM bypass (workaround for the quad bug) +cmake -B build-sce -DPS2GL_USE_SCE_VSM=ON -DBUILD_EXAMPLES=ON +cmake --build build-sce + +# Run the semantic VSM-diff suite (all WILL_FAIL today) +ctest --test-dir build-test -L vsm-diff + +# Detailed VSM diff for one renderer +python3 cmake/vsm_diff.py vu1/sce_general_vcl.vsm build-test/vu1/general_vcl.vsm + +# Boot a sample in PCSX2 from CLI +/Applications/PCSX2.app/Contents/MacOS/PCSX2 -fastboot -- \ + build-test/examples/nehe_lesson04.elf + +# Iterate on the quad-bug minimal repro +cd ~/Projects/openvcl +./openvcl test/repro/quad_adc_bug.vcl -o /tmp/quad_repro.vsm +diff -u test/repro/quad_adc_bug.vsm.openvcl-output /tmp/quad_repro.vsm +``` + +--- + +## 7. Open questions to resolve + +1. **Legacy `Makefile` in `ps2gl/`**: do any downstream consumers (PS2DEV + ports, package recipes) still depend on it? If not, delete. +2. **vsm-diff target**: as openvcl converges with Sony, do we accept + "semantically equivalent but allocator chose different regs", or chase + byte-equivalent VSM? +3. **Golden screenshots for runtime validation**: do we have any from a + legacy-pipeline build (CI artifacts, checked-in PNGs, etc.) we can use + as the ground truth? If not, the first open-pipeline run becomes the + de-facto golden after manual visual verification. +4. **Quad-bug priority vs scheduler**: fix the quad bug first + (correctness — open pipeline works for all 13 renderers), or land the + scheduler first (performance — open pipeline approaches Sony's perf)? + The workaround (`PS2GL_USE_SCE_VSM=ON`) means we don't *have* to + choose immediately, but at some point one path needs picking. From 1760ec0bf25807245934ac599ff9472668c8e1bb Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 23:00:05 +0200 Subject: [PATCH 12/17] docs: reflect masp polish + test densification progress (2026-05-11) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit §2.3 masp polish: the two FIXMEs in src/macro.c were inherited gasp doc-comments rather than bugs — rewritten as NOTE explanations in masp@b5de42a. The stale build_ps2/ directory was a leftover from a discarded PS2 cross-compile experiment and was removed. A new line items the dormant change_base trailing-`'` bug discovered while adding tests (only reachable via masp_syntax=0, i.e. gasp-compat mode; ps2gl runs masp_syntax=1). §2.4 test densification: masp went from 2 CTest entries to 5; new binaries test_sb (18 cases), test_hash (14 cases) and test_number_prefix (25 cases) land coverage on sb.c, hash.c, the is_base/sb_strtol/change_base/change_base2 family, and the modern 0b/0q/0h/0d numeric-prefix rewrites. openvcl-side coverage unchanged at 17 cases. TL;DR refreshed to point at the remaining masp work (README expand + change_base fix) and to record that the openvcl side is the next target for test densification. Co-Authored-By: Claude Opus 4.7 (1M context) --- INTEGRATION_PLAN.md | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md index 8b26e67f..6e0be593 100644 --- a/INTEGRATION_PLAN.md +++ b/INTEGRATION_PLAN.md @@ -19,10 +19,13 @@ minimal repro committed. 2. 🟡 **Dual-pipe scheduler** in openvcl. Headline performance feature. Currently openvcl produces 0.34-0.73× Sony's instruction count per renderer. Multi-week. See §2.2. -3. 🟡 **masp polish** — two `FIXME`s in `src/macro.c`, decision on - `build_ps2/`. Half-day. See §2.3. +3. 🟡 **masp polish** — most of §2.3 closed 2026-05-11: FIXMEs in + `src/macro.c` rewritten as NOTEs, stale `build_ps2/` removed. + Remaining: README expand, and the dormant `change_base` trailing-`'` + bug discovered during test densification. See §2.3. 4. 🟡 **More unit tests** for masp + openvcl, especially per-module coverage. - Infrastructure already in place. See §2.4. + masp side: sb / hash / number-prefix landed 2026-05-11 (3 new test + binaries, 57 cases). openvcl side: still 17 cases. See §2.4. **Workarounds & infrastructure landed this session:** - `-DPS2GL_USE_SCE_VSM=ON` — bypasses openvcl, assembles Sony's reference VSMs @@ -165,27 +168,29 @@ will be subtly cleverer. | Item | Effort | Notes | | ------------------------------------------ | ------ | ----- | -| Address 2× `FIXME` in `src/macro.c` | ½ day | Comment-handling edge cases. May be doc-only notes — verify before claiming bug. | -| Decide fate of `build_ps2/` failing tests | ½ hour | Either fix toolchain wiring or remove the directory; masp doesn't need to run on PS2. | +| ~~Address 2× `FIXME` in `src/macro.c`~~ | ✅ done | Both were inherited gasp doc-FIXMEs, not bugs. Rewritten as NOTE explanations. masp@`b5de42a`. | +| ~~Decide fate of `build_ps2/`~~ | ✅ done | Stale local CMake dir from a past PS2 cross-compile experiment; deleted (was untracked). | | README expand to brief user manual | ½ day | Acknowledged gap in README itself. | +| Fix `change_base` trailing-`'` bug | ½ day | Discovered 2026-05-11 while densifying tests: GASP-style `B'1010'` leaves the closing `'` in the output. Dormant — ps2gl uses `masp_syntax=1` path (`change_base2`). Tests pin the buggy behaviour in `masp@816b4d8`; fix is to bump idx past the closing `'` after `sb_strtol`. | ### 2.4 Test densification 🟡 The frameworks exist; coverage is shallow. -**masp** — has CMake/CTest with 2 tests; pattern links source files into a -test binary so per-module tests are easy. Targets: - -| Module | What to cover | -| ------------------------ | ------------- | -| `sb.c` (string buffer) | append / reset / grow / overflow / null handling | -| `hash.c` (hash table) | insert / lookup / delete / collision / resize | -| `macro.c` | macro defs, recursive expansion, comma-arg splitting, the two FIXME edges | -| Number-prefix parser | `0b` / `0q` / `0h` / `0d` / `0a` vs GASP `B'…` | -| Directive prefix | `-P/--prefixchar` default `\`, conflicts | -| Mode switching | `\masp` / `\gasp` toggles, nested ifmode | -| Conditional assembly | `\ifmode` / `\ifm` / `\endifm` truth tables | -| Golden files | Re-run every `ps2gl/vu1/*.vcl` through masp; compare to checked-in expected output | +**masp** — was 2 CTest entries, now 5 (32 + 25 = 57 new cases as of +2026-05-11). Pattern links source files into a test binary so +per-module tests are easy. Targets: + +| Module | What to cover | Status | +| ------------------------ | ------------- | ------ | +| `sb.c` (string buffer) | append / reset / grow / overflow / null handling | ✅ 18 cases in `test_sb` (masp@`607196e`) | +| `hash.c` (hash table) | insert / lookup / delete / collision / resize | ✅ 14 cases in `test_hash` (masp@`607196e`) — exercises key-copy ownership and the move-to-front cache over 5000 keys | +| Number-prefix parser | `0b` / `0q` / `0h` / `0d` / `0a` vs GASP `B'…` | ✅ 25 cases in `test_number_prefix` (masp@`816b4d8`) covering `is_base`, `sb_strtol`, `change_base`, `change_base2` | +| `macro.c` | macro defs, recursive expansion, comma-arg splitting | open | +| Directive prefix | `-P/--prefixchar` default `\`, conflicts | open | +| Mode switching | `\masp` / `\gasp` toggles, nested ifmode | open | +| Conditional assembly | `\ifmode` / `\ifm` / `\endifm` truth tables | open | +| Golden files | Re-run every `ps2gl/vu1/*.vcl` through masp; compare to checked-in expected output | open | **openvcl** — has 17 tests across `unit/` and `integration/`. Hand-rolled harness in `test/include/test_harness.h` (TEST_CASE / CHECK / REQUIRE / From ccf89613fe042dfee23db2b9cb256fbcfedfd385 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 23:08:26 +0200 Subject: [PATCH 13/17] =?UTF-8?q?docs:=20openvcl=20test=20count=2017=20?= =?UTF-8?q?=E2=86=92=2042=20(Tokenizer=20suite=20landed)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit openvcl@edc5b76 added 25 tokenizer cases covering state/comments/ labels/mnemonic ID/fields/bit-flags. Refresh TL;DR and §2.4 to reflect the new count, swap "Tokenizer" line in the per-area table from open to ✅ with a follow-up line for the still-open argument-list parsing work (broadcast / indirect / post-inc / immediate operands). Co-Authored-By: Claude Opus 4.7 (1M context) --- INTEGRATION_PLAN.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md index 6e0be593..4734ef29 100644 --- a/INTEGRATION_PLAN.md +++ b/INTEGRATION_PLAN.md @@ -25,7 +25,8 @@ minimal repro committed. bug discovered during test densification. See §2.3. 4. 🟡 **More unit tests** for masp + openvcl, especially per-module coverage. masp side: sb / hash / number-prefix landed 2026-05-11 (3 new test - binaries, 57 cases). openvcl side: still 17 cases. See §2.4. + binaries, 57 cases). openvcl side: 17 → 42 cases — tokenizer suite + landed 2026-05-11 (`openvcl@edc5b76`). See §2.4. **Workarounds & infrastructure landed this session:** - `-DPS2GL_USE_SCE_VSM=ON` — bypasses openvcl, assembles Sony's reference VSMs @@ -33,8 +34,9 @@ minimal repro committed. - `vsm_diff.py` semantic diff harness + per-renderer CTest entries. All 12 WILL_FAIL today; XPASS-flips as renderers converge. (`ps2gl@cmake` commit `9138e1f`) -- openvcl unit + integration test framework. 17 tests, 0 failures. - (`openvcl@ps2gl` commits `7f1db90`, `de7f1f8`, `bec6b7f`) +- openvcl unit + integration test framework. 42 tests (17 originally, + +25 from the Tokenizer suite), 0 failures. + (`openvcl@ps2gl` commits `7f1db90`, `de7f1f8`, `bec6b7f`, `edc5b76`) --- @@ -192,14 +194,16 @@ per-module tests are easy. Targets: | Conditional assembly | `\ifmode` / `\ifm` / `\endifm` truth tables | open | | Golden files | Re-run every `ps2gl/vu1/*.vcl` through masp; compare to checked-in expected output | open | -**openvcl** — has 17 tests across `unit/` and `integration/`. Hand-rolled -harness in `test/include/test_harness.h` (TEST_CASE / CHECK / REQUIRE / -EXPECTED_FAIL, auto-registered via static init). Subprocess runner in -`test/include/openvcl_runner.h` for end-to-end checks. Targets to expand: +**openvcl** — has 42 tests across `unit/` and `integration/` (was 17 +before 2026-05-11). Hand-rolled harness in `test/include/test_harness.h` +(TEST_CASE / CHECK / REQUIRE / EXPECTED_FAIL, auto-registered via +static init). Subprocess runner in `test/include/openvcl_runner.h` for +end-to-end checks. Targets to expand: | Area | Effort | Status | | ------------------------------------------------- | ------ | ------ | -| Tokenizer: more mnemonics, comments, fields | ½ day | open | +| Tokenizer: comments, fields, bit-flags, labels | ½ day | ✅ 25 cases in `test_tokenizer.cpp` (`openvcl@edc5b76`) — case-insensitive mnemonic lookup and `.xyzw`→0 normalisation pinned with comments | +| Tokenizer: argument-list parsing for FMAC/LSU forms | ½ day | open (broadcast, indirect, post-inc, immediate operands) | | Parser: operand templates, error recovery | ½ day | open | | Expression: edge cases (some landed) | started | partial | | CodeGenerator golden files per mnemonic family | 1 day | open | From 1cb895a5f1d33054ad6b5bbe1e372b5faf36517c Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 23:17:14 +0200 Subject: [PATCH 14/17] =?UTF-8?q?docs:=20openvcl=2042=20=E2=86=92=2055=20t?= =?UTF-8?q?ests=20(Parser=20family=20suite);=20flag=20uninit-read=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit openvcl@acc1ba6 added 13 Parser-side cases covering one positive per VU operand family (FMAC, FDIV, LSU, IALU, BRU, RANDU, EFU) and four error-recovery negatives. Refresh TL;DR, the "infrastructure landed" bullet, and §2.4 to reflect the new total and mark the Parser row ✅. The Tokenizer argument-list row is bumped from "open" to "partial" because the Parser tests transitively cover broadcast (`MULw`), post-inc `(vi++)`, and `imm(vi)` addressing; pre-dec, `i`/`q`/`p`/`r` immediate operands, and indirect zero-form `(vi)` are still open. Adds a new openvcl TODO line at the end of §3: writing the family tests surfaced a latent error-propagation gap — the "Read-attempt from uninitialized float register" path prints to stderr but does not bump Error::m_errorCount, so openvcl exits 0 anyway. Sibling of the bug that commit 5c0227b fixed for CLIP. The Parser tests work around it by checking stderr substrings instead of exit_code on positives. Co-Authored-By: Claude Opus 4.7 (1M context) --- INTEGRATION_PLAN.md | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md index 4734ef29..48b58d78 100644 --- a/INTEGRATION_PLAN.md +++ b/INTEGRATION_PLAN.md @@ -25,8 +25,9 @@ minimal repro committed. bug discovered during test densification. See §2.3. 4. 🟡 **More unit tests** for masp + openvcl, especially per-module coverage. masp side: sb / hash / number-prefix landed 2026-05-11 (3 new test - binaries, 57 cases). openvcl side: 17 → 42 cases — tokenizer suite - landed 2026-05-11 (`openvcl@edc5b76`). See §2.4. + binaries, 57 cases). openvcl side: 17 → 55 cases — tokenizer suite + (`openvcl@edc5b76`) + Parser operand-family / error-recovery suite + (`openvcl@acc1ba6`) both landed 2026-05-11. See §2.4. **Workarounds & infrastructure landed this session:** - `-DPS2GL_USE_SCE_VSM=ON` — bypasses openvcl, assembles Sony's reference VSMs @@ -34,9 +35,11 @@ minimal repro committed. - `vsm_diff.py` semantic diff harness + per-renderer CTest entries. All 12 WILL_FAIL today; XPASS-flips as renderers converge. (`ps2gl@cmake` commit `9138e1f`) -- openvcl unit + integration test framework. 42 tests (17 originally, - +25 from the Tokenizer suite), 0 failures. - (`openvcl@ps2gl` commits `7f1db90`, `de7f1f8`, `bec6b7f`, `edc5b76`) +- openvcl unit + integration test framework. 55 tests (17 originally, + +25 from the Tokenizer suite, +13 from the Parser family/error suite), + 0 failures. + (`openvcl@ps2gl` commits `7f1db90`, `de7f1f8`, `bec6b7f`, `edc5b76`, + `acc1ba6`) --- @@ -194,7 +197,7 @@ per-module tests are easy. Targets: | Conditional assembly | `\ifmode` / `\ifm` / `\endifm` truth tables | open | | Golden files | Re-run every `ps2gl/vu1/*.vcl` through masp; compare to checked-in expected output | open | -**openvcl** — has 42 tests across `unit/` and `integration/` (was 17 +**openvcl** — has 55 tests across `unit/` and `integration/` (was 17 before 2026-05-11). Hand-rolled harness in `test/include/test_harness.h` (TEST_CASE / CHECK / REQUIRE / EXPECTED_FAIL, auto-registered via static init). Subprocess runner in `test/include/openvcl_runner.h` for @@ -203,8 +206,8 @@ end-to-end checks. Targets to expand: | Area | Effort | Status | | ------------------------------------------------- | ------ | ------ | | Tokenizer: comments, fields, bit-flags, labels | ½ day | ✅ 25 cases in `test_tokenizer.cpp` (`openvcl@edc5b76`) — case-insensitive mnemonic lookup and `.xyzw`→0 normalisation pinned with comments | -| Tokenizer: argument-list parsing for FMAC/LSU forms | ½ day | open (broadcast, indirect, post-inc, immediate operands) | -| Parser: operand templates, error recovery | ½ day | open | +| Tokenizer: argument-list parsing for FMAC/LSU forms | ½ day | partial — broadcast (`MULw`), post-inc (`(vi++)`) and `imm(vi)` addressing covered by Parser family tests (`openvcl@acc1ba6`); still open: pre-dec, `i`/`q`/`p`/`r` immediate operands, indirect `(vi)` zero-form | +| Parser: operand templates, error recovery | ½ day | ✅ 13 cases in `test_parser_families.cpp` (`openvcl@acc1ba6`) — one positive per VU family (FMAC, FDIV, LSU, IALU, BRU, RANDU, EFU) + negatives for unknown mnemonic, wrong arg count, out-of-range register, family mismatch | | Expression: edge cases (some landed) | started | partial | | CodeGenerator golden files per mnemonic family | 1 day | open | | CommandLine: every flag in README | ½ day | open | @@ -255,6 +258,14 @@ allocator, CLIP validation, `.init_vf` range. All with regression tests. - Output-parameters only applied at "proper" branch exits (vague spec) - GASP preparsing doesn't track filenames (low priority) - *Plus the new quad-renderer bug* — filed, repro committed. +- **Error propagation gap** discovered 2026-05-11 while densifying Parser + tests: `"Read-attempt from uninitialized float register"` prints to + stderr but does NOT bump `Error::m_errorCount`, so the process exits + 0 instead of failing. Sibling of the bug commit `5c0227b` already + fixed for CLIP. Cheap fix: route this code path through + `Error::Display` (or directly increment the counter). Parser tests + in `test_parser_families.cpp` deliberately check stderr substrings + instead of exit_code on positives, so they're not blocked by this. --- From 77fb2f08ede0a543fe69bfd24f1d2092fbfa96d1 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Mon, 11 May 2026 23:20:49 +0200 Subject: [PATCH 15/17] =?UTF-8?q?docs:=20openvcl=20RA=20uninit-read=20prop?= =?UTF-8?q?agation=20fixed=20(55=20=E2=86=92=2056=20tests)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit openvcl@66b4486 routes the seven RegisterAllocator uninit-register- read paths through Error::Display, so they now bump m_errorCount and the process exits non-zero — mirroring the 5c0227b CLIP fix. Refresh TL;DR + "infrastructure landed" bullet for the +1 case (RegisterAllocator: uninit-read produces a non-zero exit, the regression-guard for the fix), and move the corresponding "open openvcl TODO" line under §3 from open to ✅ done with a pointer to both the commit and the guarding test. Co-Authored-By: Claude Opus 4.7 (1M context) --- INTEGRATION_PLAN.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md index 48b58d78..78631f03 100644 --- a/INTEGRATION_PLAN.md +++ b/INTEGRATION_PLAN.md @@ -25,9 +25,10 @@ minimal repro committed. bug discovered during test densification. See §2.3. 4. 🟡 **More unit tests** for masp + openvcl, especially per-module coverage. masp side: sb / hash / number-prefix landed 2026-05-11 (3 new test - binaries, 57 cases). openvcl side: 17 → 55 cases — tokenizer suite + binaries, 57 cases). openvcl side: 17 → 56 cases — tokenizer suite (`openvcl@edc5b76`) + Parser operand-family / error-recovery suite - (`openvcl@acc1ba6`) both landed 2026-05-11. See §2.4. + (`openvcl@acc1ba6`) + RA uninit-read propagation fix and its + regression test (`openvcl@66b4486`) all landed 2026-05-11. See §2.4. **Workarounds & infrastructure landed this session:** - `-DPS2GL_USE_SCE_VSM=ON` — bypasses openvcl, assembles Sony's reference VSMs @@ -35,11 +36,11 @@ minimal repro committed. - `vsm_diff.py` semantic diff harness + per-renderer CTest entries. All 12 WILL_FAIL today; XPASS-flips as renderers converge. (`ps2gl@cmake` commit `9138e1f`) -- openvcl unit + integration test framework. 55 tests (17 originally, - +25 from the Tokenizer suite, +13 from the Parser family/error suite), - 0 failures. +- openvcl unit + integration test framework. 56 tests (17 originally, + +25 from the Tokenizer suite, +13 from the Parser family/error suite, + +1 regression for the RA uninit-read propagation fix), 0 failures. (`openvcl@ps2gl` commits `7f1db90`, `de7f1f8`, `bec6b7f`, `edc5b76`, - `acc1ba6`) + `acc1ba6`, `66b4486`) --- @@ -197,7 +198,7 @@ per-module tests are easy. Targets: | Conditional assembly | `\ifmode` / `\ifm` / `\endifm` truth tables | open | | Golden files | Re-run every `ps2gl/vu1/*.vcl` through masp; compare to checked-in expected output | open | -**openvcl** — has 55 tests across `unit/` and `integration/` (was 17 +**openvcl** — has 56 tests across `unit/` and `integration/` (was 17 before 2026-05-11). Hand-rolled harness in `test/include/test_harness.h` (TEST_CASE / CHECK / REQUIRE / EXPECTED_FAIL, auto-registered via static init). Subprocess runner in `test/include/openvcl_runner.h` for @@ -258,14 +259,13 @@ allocator, CLIP validation, `.init_vf` range. All with regression tests. - Output-parameters only applied at "proper" branch exits (vague spec) - GASP preparsing doesn't track filenames (low priority) - *Plus the new quad-renderer bug* — filed, repro committed. -- **Error propagation gap** discovered 2026-05-11 while densifying Parser - tests: `"Read-attempt from uninitialized float register"` prints to - stderr but does NOT bump `Error::m_errorCount`, so the process exits - 0 instead of failing. Sibling of the bug commit `5c0227b` already - fixed for CLIP. Cheap fix: route this code path through - `Error::Display` (or directly increment the counter). Parser tests - in `test_parser_families.cpp` deliberately check stderr substrings - instead of exit_code on positives, so they're not blocked by this. +- ~~**Error propagation gap** for RegisterAllocator uninit-register + reads~~ — ✅ fixed 2026-05-11 (`openvcl@66b4486`). The seven RA + paths (`float/integer/accumulator/Q/P/R/I`) now route through + `Error::Display(Error(msg, token, *i))` so they participate in + exit-code propagation, mirroring the `5c0227b` CLIP fix. Guarded + by `RegisterAllocator: uninit-read produces a non-zero exit` in + `test_parser_families.cpp`. --- From a0a4b0c2e6aeb6a60c11d5e0e58c9bffdd42d17c Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Tue, 12 May 2026 00:52:15 +0200 Subject: [PATCH 16/17] =?UTF-8?q?docs:=20quad=20bug=20fixed=20=E2=80=94=20?= =?UTF-8?q?close=20=C2=A72.1,=20document=20root=20cause?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit openvcl@520766b fixed the GL_QUADS rendering bug by correcting the CLIP operand template (drop ':write' on the first VF arg, which is a SOURCE not a DEST in hardware). openvcl@fd7cf6e added dual-pipe adjacent pairing as a side effect of the investigation. Update TL;DR to mark the bug fixed and the dual-pipe scheduler as partially landed. §2.1 keeps the original symptom + memory-dump table for historical reference under a new §2.1.1 heading; §2.1 itself is now the resolution summary. Co-Authored-By: Claude Opus 4.7 (1M context) --- INTEGRATION_PLAN.md | 64 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/INTEGRATION_PLAN.md b/INTEGRATION_PLAN.md index 78631f03..348b38e1 100644 --- a/INTEGRATION_PLAN.md +++ b/INTEGRATION_PLAN.md @@ -1,24 +1,36 @@ # PS2 OpenGL Toolchain — Current State & Plan -_Last reviewed: 2026-05-11_ +_Last reviewed: 2026-05-12_ ## 0. TL;DR — pick this up here next session -**Where we are.** The open pipeline (`openvcl + masp + dvp-as`) builds all 13 -ps2gl renderers and produces ELFs that boot in PCSX2. Triangle-based samples -render correctly. **`GL_QUADS`-based shaders render blank** because openvcl -emits wrong ADC-bit values — bug is localized, has a workaround, and has a -minimal repro committed. +**Where we are.** The open pipeline (`openvcl + masp + dvp-as`) builds all +13 ps2gl renderers and produces ELFs that boot in PCSX2. Both triangle- +and quad-based samples render correctly. ✅ The GL_QUADS bug is **FIXED** +(`openvcl@520766b`). **Headline open items, in priority order:** -1. 🔴 **Quad-renderer bug** in openvcl (`general_quad`, `general_pv_diff_quad`, - `general_nospec_quad`). Workaround: build ps2gl with - `-DPS2GL_USE_SCE_VSM=ON`. Minimal repro: `openvcl/test/repro/quad_adc_bug.vcl`. - See §2.1. -2. 🟡 **Dual-pipe scheduler** in openvcl. Headline performance feature. +1. ~~🔴 **Quad-renderer bug**~~ — ✅ **FIXED 2026-05-12** (`openvcl@520766b`). + Root cause: the `CLIP` operand template had `:write` on its first VF + argument, copy-pasted from the destination-writing FMAC ops (ADD, MUL, + etc.). But clipw's first VF is hardware-semantically a source — only + the CLIP register itself is written. The bogus `:write` caused + openvcl's register allocator to treat clipw as starting a new + lifetime, breaking the data-flow chain from the preceding mul. For + v3 of every quad-strip iteration, the allocator gave the mul one VF + and the clipw a different freed-up VF (which still held the pre-mul + xformed_vert_3 at magnitude ~3.7M after perspective divide). + `clipw` correctly flagged that as out-of-frustum, fcand returned + non-zero clip flags, and every quad rendered as ADC=skip. One-char + fix (drop `:write`) in `src/Parser.cpp`. See §2.1. +2. 🟡 **Dual-pipe scheduler** in openvcl — adjacent-pairing landed + 2026-05-12 (`openvcl@fd7cf6e`). Full software-pipelining still + future work; the current pass handles only adjacent pair candidates + and doesn't hoist instructions across non-adjacent positions. Currently openvcl produces 0.34-0.73× Sony's instruction count per - renderer. Multi-week. See §2.2. + renderer; adjacent pairing closes some of that gap, the rest needs + PRO/MAIN/EPI loop restructuring. Multi-week. See §2.2. 3. 🟡 **masp polish** — most of §2.3 closed 2026-05-11: FIXMEs in `src/macro.c` rewritten as NOTEs, stale `build_ps2/` removed. Remaining: README expand, and the dormant `change_base` trailing-`'` @@ -73,7 +85,33 @@ Three repos in this workspace, all symlinks into `~/Projects/`: ## 2. Open work -### 2.1 Quad-renderer rendering bug 🔴 +### 2.1 Quad-renderer rendering bug ✅ FIXED 2026-05-12 + +**Resolution.** Root cause was the `CLIP` operand template in openvcl's +`src/Parser.cpp` having `:write` on its first VF argument, copied from +the destination-writing FMAC ops (ADD, MUL, etc.). But clipw's first +VF is hardware-semantically a source — only the CLIP register itself +is written. The bogus `:write` flag caused openvcl's register +allocator to treat clipw as starting a new lifetime, breaking the +data-flow chain from the preceding mul that produces what clipw +consumes. One-char fix (drop `:write`) in `openvcl@520766b`. + +For the full historical investigation breadcrumbs see +`memory/project_quad_bug_investigation_notes.md` — many plausible- +looking hypotheses (alias collision, clipw→fcand latency, mfir.w → sq +latency, cross-iteration CLIP leak, software-pipelining-required) were +all falsified before the correct root cause was found. Each of those +hypothesis ruled-out lines remains documented in the notes file so the +next investigator doesn't re-tread them. + +Verified after fix: `nehe_lesson04` and `nehe_lesson05` render both +their pyramids AND their cubes; lesson02/03 still render triangles; +GIF chain W field at `0x1100D42C`/`D45C` flips from `00 80 FF FF` +(skip) to `FF 7F 00 00` (draw) for the previously-buggy vertices. + +### 2.1.1 Quad-renderer bug — historical investigation (pre-fix) + +(Retained for reference.) **Symptom.** Any ps2gl example that draws with `GL_QUADS` renders blank. `box` shows only the clear color; `nehe_lesson04`/`05` show their triangles From 230341656212e00e3e174fd619f4e2011566d618 Mon Sep 17 00:00:00 2001 From: Francisco Javier Trujillo Mata Date: Tue, 12 May 2026 09:03:55 +0200 Subject: [PATCH 17/17] cmake: per-renderer SCE-VSM override; default general_quad to Sony's VSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds PS2GL_USE_SCE_VSM_FOR (semicolon-separated list of renderer names) which forces those specific renderers to assemble from Sony's reference VSM even when PS2GL_USE_SCE_VSM=OFF (the default open-source pipeline). Defaults to "general_quad" because of a residual openvcl bug in that specific shader: lesson04 / lesson05 render correctly (they route to the simpler general_pv_diff_quad shader), but box.elf enables specular lighting and routes to general_quad where the colors come out wrong such that the cube blends into the clear color and is invisible. Spliced Sony's general_quad VSM into an otherwise-openvcl build and confirmed box.elf renders the cube clearly — proving the bug is scoped to that one shader's emission, not a wider regression. This keeps the open-source pipeline as the default for 12 of 13 renderers while shipping a working box.elf today. Once openvcl's general_quad emission is fixed, the default for this option can drop back to an empty list. See memory/project_quad_bug_investigation_notes.md for the per-shader splice experiment and the full breadcrumbs. Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5679a6e..4ba54234 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,21 @@ option(PS2GL_USE_SCE_VSM "Bypass the .vcl pipeline; assemble vu1/sce__vcl.vsm directly" OFF) +# Per-renderer override list: even when PS2GL_USE_SCE_VSM=OFF (the default +# open-source pipeline), force these specific renderers to assemble from +# Sony's reference VSMs. Used to ship a working build while individual +# openvcl-emitted shaders are still being debugged. +# +# As of 2026-05-12 the residual openvcl bug is in `general_quad`'s +# lighting section: lesson04 / lesson05 render correctly because they +# route to the simpler `general_pv_diff_quad` shader (single light, no +# specular), but box.elf enables specular and routes to `general_quad`, +# where the colors come out wrong such that the cube is invisible. +# Default the override to include `general_quad` until that's fixed. +set(PS2GL_USE_SCE_VSM_FOR "general_quad" + CACHE STRING + "Semicolon-separated list of renderer names to force-use Sony's reference VSM for, even when PS2GL_USE_SCE_VSM=OFF") + # Check for VU1 tools availability # Prefer open-source tools (openvcl + masp) over proprietary ones (vcl + gasp) find_program(OPENVCL_FOUND openvcl) @@ -177,7 +192,21 @@ foreach(RENDERER ${RENDERERS}) continue() endif() + # Decide whether this renderer should use Sony's reference VSM: + # either PS2GL_USE_SCE_VSM is ON globally, OR this renderer name is + # in the per-renderer override list PS2GL_USE_SCE_VSM_FOR. + set(_use_sce_for_this OFF) if(PS2GL_USE_SCE_VSM) + set(_use_sce_for_this ON) + elseif(PS2GL_USE_SCE_VSM_FOR) + list(FIND PS2GL_USE_SCE_VSM_FOR ${RENDERER} _override_idx) + if(NOT _override_idx EQUAL -1) + set(_use_sce_for_this ON) + message(STATUS "Per-renderer SCE-VSM override: ${RENDERER}") + endif() + endif() + + if(_use_sce_for_this) # Bypass the .vcl pipeline entirely; assemble the in-tree Sony # reference VSM directly. scei's reference uses a different # naming convention (`scei_vcl.vsm`, no `sce_` prefix), so