diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py index 4adacb9a86f..ff5bc0b6816 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py @@ -348,7 +348,6 @@ def apply_ep_prefill( ffn_out, m_indices, ) - del permute_input # swiglu ffn_out = paddle.incubate.nn.functional.swiglu(ffn_out, None) @@ -367,7 +366,6 @@ def apply_ep_prefill( ) ffn_in_x_scale_tensor = ffn_in_x_scale_tensor.T[: ffn_in_x.shape[0]] - del ffn_out ffn_out = paddle.empty( (token_all_num, getattr(layer, self.added_weight_attrs[1]).shape[1]), dtype=paddle.bfloat16, @@ -379,7 +377,6 @@ def apply_ep_prefill( ffn_out, m_indices, ) - del ffn_in_x # prmt back per rank tmp_ffn_out = fastdeploy.model_executor.ops.gpu.ep_moe_expert_combine( @@ -391,7 +388,6 @@ def apply_ep_prefill( False, # norm_topk_prob 1.0, ) - del ffn_out else: tmp_ffn_out = paddle.empty([0, hidden_size], paddle.bfloat16)