model	scenario	result	duration_s	notes
glm-4.5-air	aggressive-client	FAIL	72	[92m05:48:37 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
glm-4.5-air	callback-quick-yes	PASS	44	
glm-4.5-air	callback-recorded	PASS	73	
glm-4.5-air	callback-refused	FAIL	65	[92m05:51:47 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
glm-4.5-air	chat-callback-recorded-via-http	SKIP	0	no golden_pass
glm-4.5-air	chat-callback-recorded	PASS	46	
glm-4.5-air	correction-mid-call	FAIL	45	[92m05:53:37 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
glm-4.5-air	off-topic	PASS	38	
glm-4.5-air	unclear-request	PASS	65	
glm-4.5-air	wrong-number	PASS	49	
glm-4.7	aggressive-client	FAIL	74	[92m05:56:54 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
glm-4.7	callback-quick-yes	FAIL	67	[92m05:58:07 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
glm-4.7	callback-recorded	PASS	114	
glm-4.7	callback-refused	FAIL	79	[92m06:01:09 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
glm-4.7	chat-callback-recorded-via-http	SKIP	0	no golden_pass
glm-4.7	chat-callback-recorded	PASS	104	
glm-4.7	correction-mid-call	PASS	193	
glm-4.7	off-topic	PASS	53	
glm-4.7	unclear-request	PASS	70	
glm-4.7	wrong-number	PASS	79	
gemini/gemini-2.5-flash	aggressive-client	PASS	41	
gemini/gemini-2.5-flash	callback-quick-yes	PASS	33	
gemini/gemini-2.5-flash	callback-recorded	PASS	58	
gemini/gemini-2.5-flash	callback-refused	FAIL	58	[92m06:12:59 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gemini/gemini-2.5-flash	chat-callback-recorded-via-http	SKIP	0	no golden_pass
gemini/gemini-2.5-flash	chat-callback-recorded	PASS	38	
gemini/gemini-2.5-flash	correction-mid-call	PASS	38	
gemini/gemini-2.5-flash	off-topic	PASS	30	
gemini/gemini-2.5-flash	unclear-request	PASS	41	
gemini/gemini-2.5-flash	wrong-number	FAIL	40	[92m06:16:24 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gemini/gemini-3-flash-preview	aggressive-client	FAIL	41	[92m06:17:03 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gemini/gemini-3-flash-preview	callback-quick-yes	PASS	41	
gemini/gemini-3-flash-preview	callback-recorded	PASS	48	
gemini/gemini-3-flash-preview	callback-refused	FAIL	56	[92m06:19:14 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gemini/gemini-3-flash-preview	chat-callback-recorded-via-http	SKIP	0	no golden_pass
gemini/gemini-3-flash-preview	chat-callback-recorded	PASS	45	
gemini/gemini-3-flash-preview	correction-mid-call	PASS	49	
gemini/gemini-3-flash-preview	off-topic	PASS	29	
gemini/gemini-3-flash-preview	unclear-request	PASS	44	
gemini/gemini-3-flash-preview	wrong-number	PASS	39	
gemini/gemini-flash-latest	aggressive-client	FAIL	41	[92m06:23:36 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gemini/gemini-flash-latest	callback-quick-yes	PASS	35	
gemini/gemini-flash-latest	callback-recorded	PASS	48	
gemini/gemini-flash-latest	callback-refused	FAIL	71	[92m06:25:39 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gemini/gemini-flash-latest	chat-callback-recorded-via-http	SKIP	0	no golden_pass
gemini/gemini-flash-latest	chat-callback-recorded	PASS	62	
gemini/gemini-flash-latest	correction-mid-call	PASS	38	
gemini/gemini-flash-latest	off-topic	PASS	35	
gemini/gemini-flash-latest	unclear-request	FAIL	48	[92m06:29:06 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gemini/gemini-flash-latest	wrong-number	PASS	46	
gpt-4o-mini	aggressive-client	FAIL	42	[92m06:30:39 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4o-mini	callback-quick-yes	PASS	32	
gpt-4o-mini	callback-recorded	FAIL	52	[92m06:31:54 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4o-mini	callback-refused	FAIL	66	[92m06:32:45 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4o-mini	chat-callback-recorded-via-http	SKIP	0	no golden_pass
gpt-4o-mini	chat-callback-recorded	PASS	40	
gpt-4o-mini	correction-mid-call	PASS	39	
gpt-4o-mini	off-topic	PASS	35	
gpt-4o-mini	unclear-request	PASS	40	
gpt-4o-mini	wrong-number	PASS	36	
gpt-4o	aggressive-client	PASS	40	
gpt-4o	callback-quick-yes	PASS	30	
gpt-4o	callback-recorded	FAIL	49	[92m06:38:14 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4o	callback-refused	FAIL	56	[92m06:39:02 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4o	chat-callback-recorded-via-http	SKIP	0	no golden_pass
gpt-4o	chat-callback-recorded	PASS	38	
gpt-4o	correction-mid-call	PASS	35	
gpt-4o	off-topic	PASS	34	
gpt-4o	unclear-request	PASS	45	
gpt-4o	wrong-number	FAIL	41	[92m06:42:30 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4.1-mini	aggressive-client	FAIL	47	[92m06:43:11 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4.1-mini	callback-quick-yes	PASS	33	
gpt-4.1-mini	callback-recorded	FAIL	47	[92m06:44:31 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4.1-mini	callback-refused	FAIL	51	[92m06:45:18 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
gpt-4.1-mini	chat-callback-recorded-via-http	SKIP	0	no golden_pass
gpt-4.1-mini	chat-callback-recorded	PASS	39	
gpt-4.1-mini	correction-mid-call	PASS	37	
gpt-4.1-mini	off-topic	PASS	31	
gpt-4.1-mini	unclear-request	PASS	40	
gpt-4.1-mini	wrong-number	FAIL	37	[92m06:48:36 - LiteLLM:WARNING[0m: common_utils.py:979 - litellm: could not pre-load bedrock-runtime response stream s
