Example Eval - AutoGen / AG2

from autogen import ConversableAgent   # or: from ag2 import ConversableAgent

assistant = ConversableAgent("SupportAgent", system_message="Answer concisely.", llm_config={...}, human_input_mode="NEVER")
user_proxy = ConversableAgent("User", llm_config=False, human_input_mode="NEVER", max_consecutive_auto_reply=0)

def autogen_agent(case):
    user_proxy.initiate_chat(assistant, message=case.query, max_turns=1, silent=True)
    messages = assistant.chat_messages.get(user_proxy, [])
    output = next((m["content"] for m in reversed(messages) if m.get("role") == "assistant"), "")
    return {"output": output, "metadata": {"framework": "autogen"}}

report = (
    client.evaluations
    .run(dataset_id="...", subject={"kind": "custom_agent", "displayName": "AutoGen Agent", "framework": "autogen"})
    .execute(autogen_agent)
    .finalize()
    .analyze()
)

Full example: autogen_eval