Source code for sycamore.transforms.assign_doc_properties
from sycamore.data import Document
from sycamore.plan_nodes import Node, SingleThreadUser, NonGPUUser
from sycamore.transforms.map import Map
from sycamore.utils.time_trace import timetrace
from typing import List, Dict
[docs]
class AssignDocProperties(SingleThreadUser, NonGPUUser, Map):
"""
The AssignDocProperties transform is used to copy properties from first element of a specific type
to the parent document. This allows for the consolidation of key attributes at the document level.
Args:
child: The source node or component that provides the dataset for assigning properties from element.
resource_args: Additional resource-related arguments passed to the operation for property assignment.
Example:
.. code-block:: python
source_node = ... # Define a source node or component that provides hierarchical documents.
property_transform = AssignDocProperties(child=source_node, list=["table", "llm_response"])
property_dataset = property_transform.execute()
"""
def __init__(self, child: Node, parameters: List[str], **resource_args):
super().__init__(child, f=AssignDocProperties.assign_doc_properties, args=parameters, **resource_args)
@staticmethod
@timetrace("AssignProps")
def assign_doc_properties(parent: Document, element_type: str, property_name: str) -> Document:
# element count is zero indexed
assert property_name is not None
for e in parent.elements:
if e.type == element_type and property_name in e.properties.keys():
prop = e.properties.get(property_name)
assert isinstance(
prop, Dict
), f"property {property_name}, expected Dict, got {type(prop).__name__}: {str(prop)}"
if "entity" in parent.properties:
parent.properties["entity"].update(prop)
else:
parent.properties["entity"] = prop
break
return parent