Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion agentevolver/schema/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class Task(BaseModel):
env_type: str = Field(default="appworld")

# whether this task is open query. open query has no clear stop condition.
open_query: bool = Field() # FIXME debug, check if every instance handles this new attr. default False.
open_query: bool = Field(default=False)

metadata: dict = Field(default_factory=dict)

Expand Down
101 changes: 101 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import sys
from pathlib import Path

import pytest

ROOT_DIR = Path(__file__).resolve().parents[1]
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
Comment on lines +1 to +8

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Modifying sys.path dynamically within test files is an anti-pattern that can mask import issues and make tests behave inconsistently. Since the recommended way to run tests is via python -m pytest (as mentioned in the PR description), the project root is automatically added to sys.path by Python. Therefore, this dynamic path insertion and the unused sys and pathlib imports can be safely removed.

Suggested change
import sys
from pathlib import Path
import pytest
ROOT_DIR = Path(__file__).resolve().parents[1]
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
import pytest


from agentevolver.schema.task import Task, TaskObjective


class TestTask:
"""Unit tests for the Task schema."""

def test_task_minimal_construction(self):
task = Task(task_id="task-1")
assert task.task_id == "task-1"
assert task.env_type == "appworld"

def test_open_query_defaults_to_false(self):
"""open_query should default to False so callers do not need to set it."""
task = Task(task_id="task-1")
assert task.open_query is False

def test_open_query_can_be_overridden(self):
task = Task(task_id="task-2", open_query=True)
assert task.open_query is True

def test_ground_truth_is_optional(self):
task_with_gt = Task(
task_id="task-3",
ground_truth="4",
)
assert task_with_gt.ground_truth == "4"

task_without_gt = Task(task_id="task-4")
assert task_without_gt.ground_truth is None

def test_query_defaults_to_none(self):
task = Task(task_id="task-5")
assert task.query is None


class TestTaskObjective:
"""Unit tests for the TaskObjective wrapper."""

def test_task_objective_proxies_ground_truth(self):
task = Task(
task_id="task-6",
ground_truth="Roses are red.",
open_query=True,
)
objective = TaskObjective(task=task)

assert objective.ground_truth == "Roses are red."
assert objective.task.open_query is True

def test_task_objective_exposes_query_as_objective(self):
task = Task(
task_id="task-7",
query="Please summarize.",
)
objective = TaskObjective(task=task)
assert objective.objective == "Please summarize."

def test_task_objective_objective_without_query(self):
task = Task(
task_id="task-8",
)
objective = TaskObjective(task=task)
assert objective.objective is None

def test_task_objective_ground_truth_setter(self):
task = Task(task_id="task-9")
objective = TaskObjective(task=task)

objective.ground_truth = "Updated ground truth."
assert task.ground_truth == "Updated ground truth."
assert objective.ground_truth == "Updated ground truth."

def test_task_objective_dict_roundtrip(self):
task = Task(
task_id="task-10",
ground_truth="Gravity is a force.",
open_query=True,
)
objective = TaskObjective(task=task, confidence=0.9, reward=1.0)

data = objective.model_dump()
restored = TaskObjective(**data)
Comment on lines +90 to +91

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The rest of the codebase (e.g., task_manager.py) consistently uses Pydantic v1 APIs such as .dict(), .json(), parse_obj(), and parse_raw(). Using model_dump() (a Pydantic v2 API) here will cause an AttributeError if the environment is running Pydantic v1. To maintain compatibility with the rest of the codebase, use .dict() instead.

Suggested change
data = objective.model_dump()
restored = TaskObjective(**data)
data = objective.dict()
restored = TaskObjective(**data)


assert restored.task.task_id == objective.task.task_id
assert restored.task.ground_truth == objective.task.ground_truth
assert restored.task.open_query == objective.task.open_query
assert restored.confidence == 0.9
assert restored.reward == 1.0


if __name__ == "__main__":
pytest.main([__file__, "-v"])
Comment on lines +100 to +101

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Running tests by executing the test file directly via if __name__ == '__main__': pytest.main(...) is an outdated practice. Modern test suites rely entirely on the pytest CLI runner. Removing this boilerplate keeps the test files clean and standard.