Uses AI agents to conduct structured evaluations of content, leveraging customisable prompts and system instructions for comprehensive assessment. Unlike rule-based compliance checks, this approach enables nuanced analysis by allowing agents to interpret content contextually based on predefined criteria.Click here to read the eval definition of Agent as a Judge
from fi.evals import Evaluatorfrom fi.evals import AgentJudgefrom fi.testcases import LLMTestCaseevaluator = Evaluator( fi_api_key="your_api_key", fi_secret_key="your_secret_key", fi_base_url="https://api.futureagi.com")test_case = LLMTestCase( query="What is the capital of France?", response="Paris is the capital of France and is known for the Eiffel Tower.", context="Paris has been France's capital since 987 CE.", expected_response="Paris is the capital of France.")template = AgentJudge(config={ "model": "gpt-4o-mini", "evalPrompt": "Evaluate if the {{response}} accurately answers the {{query}}. Return a score between 0.0 and 1.0.", "systemPrompt": "You are an expert agent evaluating responses for accuracy and completeness."})response = evaluator.evaluate(eval_templates=[template], inputs=[test_case], model_name="turing_flash")print(f"Evaluation Result: {response.eval_results[0].reason}")print(f"Score: {response.eval_results[0].metrics[0].value}")