In this cookbook we demonstrate how FutureAGI SDK can be used to evaluate images:

  • Alignment with Descriptions: Ensuring that generated images accurately represent the textual descriptions provided.
  • Meeting Specific Requirements: Evaluating images against predefined artistic or thematic standards.
  • Evaluating Image Replacements: Assessing the quality and fit of new images replacing previous versions.

Installing FutureAGI

!pip install futureagi==0.3.3
!pip install pillow

Making Necessary Imports

import json
import requests

from IPython.display import Image, display

from fi.evals import EvalClient
from fi.evals import Deterministic, ImageInputOutput, ImageInstruction
from fi.testcases import MLLMTestCase

Loading and Visualising Data

path = '/content/data.json'
# Open and load the JSON file
with open(path, 'r') as file:
    datapoints = json.load(file)
# Sample Datapoint

datapoint = {
    'id': 'masked_id',
    'image_url': './images/output_8_0.png',
    'output_image_url': './images/output_26_0.png',
    'prompt': 'an asian man, closeup, on new york city street',
    'type': 'T2I',
    'category': 'Ethnicity',
    'question': 'Does the image follow the Ethnicity mentioned in the prompt?'
}
# Sample Image

response = requests.get(datapoint['image_url'])

# Display the image in the notebook
if response.status_code == 200:
    display(Image(response.content))
else:
    print("Failed to fetch the image.")

Output:

Initializing the FutureAGI Evaluator Class and Deterministic Eval

from getpass import getpass
from fi.evals import EvalClient

fi_api_key = getpass("Enter your FI API Key: ")
fi_secret_key = getpass("Enter your FI Secret Key: ")

evaluator = EvalClient(
    fi_api_key=fi_api_key,
    fi_secret_key=fi_secret_key,
    fi_base_url="https://dev.api.futureagi.com"
)

print("Evaluator client initialized successfully!")

Evaluating Alignment with Descriptions

image_eval = ImageInstruction(
    config={
        "criteria": """
        Evaluate the image based on:
        1. Accuracy of object representation
        2. Setting accuracy
        3. Image quality and realism
        """
    }
)
class ImageEvalTestCase(MLLMTestCase):
    input: str
    image_url: str
test_case_img_eval = ImageEvalTestCase(
    input=datapoint['prompt'],
    image_url=datapoint['image_url']
)
import textwrap
batch_result = evaluator.evaluate([image_eval], [test_case_img_eval])
wrapped_text = textwrap.fill(batch_result.eval_results[0].reason, width=80)

print(wrapped_text)

Output:

The image accurately represents an Asian man and a New York City street, but the anime style affects realism and image quality.

Evaluating Subjective Requirements

deterministic_eval = Deterministic(config={
    "multi_choice": False,
    "choices": ["Yes", "No"],
    "rule_prompt": "Prompt : {{input_key2}}, Image : {{input_key3}}. Given the prompt and the corresponding image, answer the Question : {{input_key1}}. Focus only on the {{input_key4}}",
    "input": {
        "input_key1": "question",
        "input_key2": "prompt",
        "input_key3": "image_url",
        "input_key4": "category"
    }
})
class DeterministicTestCase(MLLMTestCase):
    question: str
    prompt: str
    image_url: str
    category: str
test_case = DeterministicTestCase(
    question=datapoint['question'],
    prompt=datapoint['prompt'],
    image_url=datapoint['image_url'],
    category=datapoint['category']
)
batch_result = evaluator.evaluate([deterministic_eval], [test_case])
batch_result.eval_results[0].data

Output:

['Yes']
print(textwrap.fill(batch_result.eval_results[0].reason, width=80))

Output:

The image depicts an animated character with traits commonly associated with Asian ethnicity.

Evaluating Changes Based on Text Instructions

image_input_output_eval = ImageInputOutput(config={
    "criteria": """
    Evaluate the output image based on:
        1. Adherence to input instruction
        2. Preservation of key elements from input image
        3. Quality of color modification
        4. Image quality and realism
    """
})
class ImageInputOutputTestCase(MLLMTestCase):
    input: str
    input_image_url: str
    output_image_url: str
response = requests.get(datapoint['output_image_url'])

# Display the image in the notebook
if response.status_code == 200:
    display(Image(response.content))
else:
    print("Failed to fetch the image.")

Output:

test_case_image_input_output = ImageInputOutputTestCase(
    input='Replace the man with a man of African ethnicity',
    input_image_url=datapoint['image_url'],
    output_image_url=datapoint['output_image_url']
)
batch_result = evaluator.evaluate([image_input_output_eval], [test_case_image_input_output])
print(textwrap.fill(batch_result.eval_results[0].reason, width=80))

Output:

The output image accurately replaces the man with one of African ethnicity while preserving all key elements, maintaining high image quality and realism.