@@ -26,16 +26,26 @@ def _run_code(code: str, stdin: str) -> tuple[str, str, bool]:
2626 os .unlink (tmpfile )
2727
2828
29+ def _code_block (label : str , content : str ) -> str :
30+ return f"{ label } :\n ```\n { content } \n ```"
31+
32+
2933def evaluation_function (response : Any , answer : Any , params : Params ) -> Result :
3034 tests = params .get ("tests" , [])
35+ result = Result ()
3136
3237 if not tests :
33- result = Result (is_correct = False )
34- result .add_feedback ("error" , "No test cases provided." )
38+ stdout , stderr , timed_out = _run_code (str (response ), "" )
39+ if timed_out :
40+ result .add_feedback ("error" , f"Code timed out after { _TIMEOUT } s." )
41+ elif stderr and not stdout :
42+ result .add_feedback ("error" , _code_block ("Error" , stderr .strip ()))
43+ else :
44+ output = stdout .rstrip () or "(no output)"
45+ result .add_feedback ("output" , _code_block ("Output" , output ))
3546 return result
3647
3748 passed = 0
38- result = Result ()
3949
4050 for i , test in enumerate (tests , 1 ):
4151 stdin = test .get ("input" , "" )
@@ -44,31 +54,42 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
4454
4555 stdout , stderr , timed_out = _run_code (str (response ), stdin )
4656 actual = stdout .rstrip ()
57+ label = f"Hidden test { i } " if hidden else f"Test { i } "
4758
4859 if timed_out :
4960 tag = "hidden_fail" if hidden else "fail"
50- label = f"Hidden test { i } " if hidden else f"Test { i } "
5161 result .add_feedback (tag , f"{ label } : timed out after { _TIMEOUT } s." )
5262 elif stderr and not stdout :
5363 tag = "hidden_fail" if hidden else "fail"
54- label = f"Hidden test { i } " if hidden else f"Test { i } "
55- msg = f"{ label } : runtime error." if hidden else f"{ label } : runtime error.\n { stderr .strip ()} "
56- result .add_feedback (tag , msg )
64+ if hidden :
65+ result .add_feedback (tag , f"{ label } : runtime error." )
66+ else :
67+ parts = [f"{ label } : runtime error." ]
68+ if stdin .strip ():
69+ parts .append (_code_block ("Input" , stdin .rstrip ()))
70+ parts .append (_code_block ("Error" , stderr .strip ()))
71+ result .add_feedback (tag , "\n \n " .join (parts ))
5772 elif actual == expected :
5873 passed += 1
59- label = f"Hidden test { i } " if hidden else f"Test { i } "
60- result .add_feedback ("pass" , f"{ label } : passed." )
74+ if hidden :
75+ result .add_feedback ("pass" , f"{ label } : passed." )
76+ else :
77+ parts = [f"{ label } : passed." ]
78+ if stdin .strip ():
79+ parts .append (_code_block ("Input" , stdin .rstrip ()))
80+ parts .append (_code_block ("Output" , actual or "(no output)" ))
81+ result .add_feedback ("pass" , "\n \n " .join (parts ))
6182 else :
6283 tag = "hidden_fail" if hidden else "fail"
6384 if hidden :
64- result .add_feedback (tag , f"Hidden test { i } : failed." )
85+ result .add_feedback (tag , f"{ label } : failed." )
6586 else :
66- result . add_feedback ( tag , (
67- f"Test { i } : failed. \n "
68- f" Input: { stdin .rstrip ()} \n "
69- f" Expected: { expected } \n "
70- f" Got: { actual } "
71- ))
87+ parts = [ f" { label } : failed." ]
88+ if stdin . strip ():
89+ parts . append ( _code_block ( " Input" , stdin .rstrip ()))
90+ parts . append ( _code_block ( "Your output" , actual or "(no output)" ))
91+ parts . append ( _code_block ( "Expected" , expected ))
92+ result . add_feedback ( tag , " \n \n " . join ( parts ))
7293
7394 result .is_correct = passed == len (tests )
7495 result .add_feedback ("summary" , f"{ passed } /{ len (tests )} tests passed." )
0 commit comments