Image Evaluation
Evaluate AI-generated images for description alignment, artistic requirements, and replacement quality using the Future AGI SDK.
In this cookbook we demonstrate how FutureAGI SDK can be used to evaluate images:
- Alignment with Descriptions: Ensuring that generated images accurately represent the textual descriptions provided.
- Meeting Specific Requirements: Evaluating images against predefined artistic or thematic standards.
- Evaluating Image Replacements: Assessing the quality and fit of new images replacing previous versions.
Installing FutureAGI
pip install ai-evaluation
pip install pillow
Making Necessary Imports
import json
import requests
from IPython.display import Image, display
from fi.evals import Evaluator
from fi.evals import Deterministic, ImageInputOutput, ImageInstruction
from fi.testcases import MLLMTestCase
Loading and Visualising Data
path = '/content/data.json'
# Open and load the JSON file
with open(path, 'r') as file:
datapoints = json.load(file)
# Sample Datapoint
datapoint = {
'id': 'masked_id',
'image_url': './images/output_8_0.png',
'output_image_url': './images/output_26_0.png',
'prompt': 'an asian man, closeup, on new york city street',
'type': 'T2I',
'category': 'Ethnicity',
'question': 'Does the image follow the Ethnicity mentioned in the prompt?'
}
# Sample Image
response = requests.get(datapoint['image_url'])
# Display the image in the notebook
if response.status_code == 200:
display(Image(response.content))
else:
print("Failed to fetch the image.")
Output:
Initializing the FutureAGI Evaluator Class and Deterministic Eval
from getpass import getpass
from fi.evals import Evaluator
fi_api_key = getpass("Enter your FI API Key: ")
fi_secret_key = getpass("Enter your FI Secret Key: ")
evaluator = Evaluator(
fi_api_key=fi_api_key,
fi_secret_key=fi_secret_key,
fi_base_url="https://api.futureagi.com"
)
print("Evaluator client initialized successfully!")
Evaluating Alignment with Descriptions
image_eval = ImageInstruction(
config={
"criteria": """
Evaluate the image based on:
1. Accuracy of object representation
2. Setting accuracy
3. Image quality and realism
"""
}
)
class ImageEvalTestCase(MLLMTestCase):
input: str
image_url: str
test_case_img_eval = ImageEvalTestCase(
input=datapoint['prompt'],
image_url=datapoint['image_url']
)
import textwrap
batch_result = evaluator.evaluate([image_eval], [test_case_img_eval])
wrapped_text = textwrap.fill(batch_result.eval_results[0].reason, width=80)
print(wrapped_text)
Output:
The image accurately represents an Asian man and a New York City street, but the anime style affects realism and image quality.
Evaluating Subjective Requirements
deterministic_eval = Deterministic(config={
"multi_choice": False,
"choices": ["Yes", "No"],
"rule_prompt": "Prompt : {{input_key2}}, Image : {{input_key3}}. Given the prompt and the corresponding image, answer the Question : {{input_key1}}. Focus only on the {{input_key4}}",
"input": {
"input_key1": "question",
"input_key2": "prompt",
"input_key3": "image_url",
"input_key4": "category"
}
})
class DeterministicTestCase(MLLMTestCase):
question: str
prompt: str
image_url: str
category: str
test_case = DeterministicTestCase(
question=datapoint['question'],
prompt=datapoint['prompt'],
image_url=datapoint['image_url'],
category=datapoint['category']
)
batch_result = evaluator.evaluate([deterministic_eval], [test_case])
batch_result.eval_results[0].metrics[0].value
Output:
['Yes']
print(textwrap.fill(batch_result.eval_results[0].reason, width=80))
Output:
The image depicts an animated character with traits commonly associated with Asian ethnicity.
Evaluating Changes Based on Text Instructions
image_input_output_eval = ImageInputOutput(config={
"criteria": """
Evaluate the output image based on:
1. Adherence to input instruction
2. Preservation of key elements from input image
3. Quality of color modification
4. Image quality and realism
"""
})
class ImageInputOutputTestCase(MLLMTestCase):
input: str
input_image_url: str
output_image_url: str
response = requests.get(datapoint['output_image_url'])
# Display the image in the notebook
if response.status_code == 200:
display(Image(response.content))
else:
print("Failed to fetch the image.")
Output:
test_case_image_input_output = ImageInputOutputTestCase(
input='Replace the man with a man of African ethnicity',
input_image_url=datapoint['image_url'],
output_image_url=datapoint['output_image_url']
)
batch_result = evaluator.evaluate([image_input_output_eval], [test_case_image_input_output])
print(textwrap.fill(batch_result.eval_results[0].reason, width=80))
Output:
The output image accurately replaces the man with one of African ethnicity while preserving all key elements, maintaining high image quality and realism. Was this page helpful?