diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py index 8154caf5e..41b6da63f 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/datafabric_prompt_builder.py @@ -27,14 +27,24 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: - """Convert an Entity SDK object to schema + derived query patterns.""" + """Convert an Entity SDK object to schema + derived query patterns. + + Auto-added system/audit fields (Id, CreateTime, UpdateTime, CreatedBy, + UpdatedBy) are surfaced in the schema, tagged ``system`` via + :attr:`FieldSchema.is_system_field`, but are always excluded from the + derived query patterns so the examples reference only business fields. + """ field_schemas: list[FieldSchema] = [] + # Query patterns are derived from business fields only — system fields, + # even when surfaced in the schema, must never drive an example query. + business_field_names: list[str] = [] numeric_field: str | None = None text_field: str | None = None for field in entity.fields or []: - if field.is_hidden_field or field.is_system_field: + if field.is_hidden_field: continue + is_system = field.is_system_field type_name = field.sql_type.name if field.sql_type else "unknown" fs = FieldSchema( name=field.name, @@ -45,15 +55,19 @@ def build_entity_context(entity: Entity) -> EntitySQLContext: is_required=field.is_required, is_unique=field.is_unique, nullable=not field.is_required, + is_system_field=is_system, ) field_schemas.append(fs) + if is_system: + continue + business_field_names.append(fs.name) if not numeric_field and fs.is_numeric: numeric_field = fs.name if not text_field and fs.is_text: text_field = fs.name - field_names = [f.name for f in field_schemas] + field_names = business_field_names table = entity.name group_field = text_field or (field_names[0] if field_names else "Category") @@ -172,11 +186,11 @@ def format_sql_context(ctx: SQLContext) -> str: if entity.description: lines.append(f"_{entity.description}_") lines.append("") - lines.append("| Field | Type |") - lines.append("|-------|------|") - + lines.append("| Field | Type | Description |") + lines.append("|-------|------|-------------|") for field in entity.fields: - lines.append(f"| {field.name} | {field.display_type} |") + desc = (field.description or "").replace("|", r"\|").replace("\n", " ") + lines.append(f"| {field.name} | {field.display_type} | {desc} |") lines.append("") diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/models.py b/src/uipath_langchain/agent/tools/datafabric_tool/models.py index 09f4436ee..a68334a2b 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/models.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/models.py @@ -17,6 +17,7 @@ class FieldSchema(BaseModel): is_required: bool = False is_unique: bool = False nullable: bool = True + is_system_field: bool = False @property def display_type(self) -> str: @@ -24,6 +25,8 @@ def display_type(self) -> str: modifiers = [] if self.is_required: modifiers.append("required") + if self.is_system_field: + modifiers.append("system") if modifiers: return f"{self.type}, {', '.join(modifiers)}" return self.type diff --git a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py index 539f2df39..e3e1e1c8e 100644 --- a/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py +++ b/src/uipath_langchain/agent/tools/datafabric_tool/prompts/v1.py @@ -57,6 +57,11 @@ customer_name, order_date, etc. e. Double-check: does every field in your SELECT directly appear in the \ answer the user expects? + f. SYSTEM / AUDIT FIELDS — the schema may include auto-added system fields \ +tagged ``system`` (e.g. a record identifier or created/updated bookkeeping \ +columns). Use field names and descriptions to decide which field the question \ +refers to; when a business (non-system) field overlaps a system field's \ +concept and it is unclear which to use — prefer the BUSINESS field. 4. WHERE FILTERS — What predicates belong in WHERE? What are the exact values \ to filter on? 5. VALUE RESOLUTION — Before finalising any equality / IN filter on a textual \ diff --git a/tests/agent/tools/test_datafabric_prompt_builder.py b/tests/agent/tools/test_datafabric_prompt_builder.py index 47034b144..560e049d1 100644 --- a/tests/agent/tools/test_datafabric_prompt_builder.py +++ b/tests/agent/tools/test_datafabric_prompt_builder.py @@ -4,7 +4,7 @@ def _fake_field(**overrides): - return SimpleNamespace( + defaults = dict( name="status", display_name="Status", sql_type=SimpleNamespace(name="varchar"), @@ -19,8 +19,9 @@ def _fake_field(**overrides): is_unique=False, is_hidden_field=False, is_system_field=False, - **overrides, ) + defaults.update(overrides) + return SimpleNamespace(**defaults) def _fake_entity(*fields, **overrides): @@ -57,3 +58,50 @@ def test_build_includes_domain_guidance_in_rendered_prompt(): assert "## Domain Guidance" in prompt assert "Use business-friendly ticket language." in prompt + + +def _system_field(name, type_name="datetimeoffset", **overrides): + """A fake auto-added system/audit field (Id, CreateTime, ...).""" + return _fake_field( + name=name, + display_name=name, + sql_type=SimpleNamespace(name=type_name), + description="System built-in field", + is_system_field=True, + allowed_values=None, + examples=None, + good_for_aggregation=False, + good_for_grouping=False, + good_for_filtering=False, + **overrides, + ) + + +def test_surfaces_tagged_system_fields_with_descriptions(): + """System fields are surfaced, tagged ``system``, in a Description-column table.""" + entity = _fake_entity( + _fake_field(name="status"), + _system_field("CreateTime"), + _system_field("CreatedBy", type_name="uniqueidentifier"), + ) + prompt = build([entity]) + + assert "| Field | Type | Description |" in prompt + assert "| CreateTime | datetimeoffset, system |" in prompt + assert "System built-in field" in prompt + # system/audit field-selection guidance is part of the prompt + assert "SYSTEM / AUDIT FIELDS" in prompt + + +def test_query_patterns_exclude_system_fields(): + """Surfaced system fields must never drive the derived query patterns.""" + entity = _fake_entity( + _fake_field(name="status"), + _system_field("CreateTime"), + _system_field("Id", type_name="uniqueidentifier"), + ) + prompt = build([entity]) + + patterns_block = prompt.split("Query Patterns for Ticket", 1)[1] + assert "CreateTime" not in patterns_block + assert "Id" not in patterns_block