Data Annotation: Labeling Your Way to Success 🎯
Learn how to create, manage, and organize annotations in Dataloop - your key to building high-quality training data.
Annotation Basics 🎨
1. Creating Annotations
import dtlpy as dl
# Get your item
item = dataset.items.get(item_id='your-item-id')
# Create an annotation builder
builder = item.annotations.builder()
# Add a box annotation
builder.add(annotation_definition=dl.Box(
top=100,
left=100,
bottom=200,
right=200,
label='car'
))
# Add a classification
builder.add(annotation_definition=dl.Classification(
label='sedan',
))
# Upload annotations
item.annotations.upload(builder)
2. Annotation Types
# Point annotation
builder.add(annotation_definition=dl.Point(
x=100,
y=100,
label='landmark'
))
# Polygon annotation
builder.add(annotation_definition=dl.Polygon(
geo=[[100, 100], [200, 100], [200, 200], [100, 200]],
label='building'
))
# Segmentation annotation
builder.add(annotation_definition=dl.Segmentation(
geo=[[100, 100], [200, 100], [200, 200]],
label='road'
))
# Polyline annotation
builder.add(annotation_definition=dl.Polyline(
geo=[[100, 100], [200, 200], [300, 300]],
label='lane'
))
3. Advanced Annotation Properties
# Add attributes
builder.add(annotation_definition=dl.Box(
top=100,
left=100,
bottom=200,
right=200,
label='car',
attributes={
'color': 'red',
'model': 'sedan',
'damaged': True
}
))
# Add metadata
builder.add(annotation_definition=dl.Box(
top=100,
left=100,
bottom=200,
right=200,
label='car'
),
metadata = {
'confidence': 0.95,
'reviewer': 'John'
}
)
Managing Annotations 📋
1. Querying Annotations
# Get all annotations for an item
annotations = item.annotations.list()
for annotation in annotations:
print(f"Found {annotation.label} at {annotation.coordinates}")
# Filter annotations by label
filters = dl.Filters(resource=dl.FiltersResource.ANNOTATION)
filters.add(field='label', values='car')
car_annotations = item.annotations.list(filters=filters)
2. Updating Annotations
# Get specific annotation
annotation = item.annotations.get(annotation_id='annotation-id')
# Update properties
annotation.label = 'truck'
annotation.attributes['color'] = 'blue'
annotation.update()
# Update coordinates (for box)
if isinstance(annotation, dl.Box):
annotation.top = 150
annotation.bottom = 250
annotation.update()
3. Batch Operations
# Delete all annotations of a specific label
filters = dl.Filters(resource=dl.FiltersResource.ANNOTATION)
filters.add(field='label', values='car')
item.annotations.delete(filters=filters)
# Copy annotations between items
source_item = dataset.items.get(item_id='source-id')
target_item = dataset.items.get(item_id='target-id')
annotations = source_item.annotations.list()
target_item.annotations.upload(annotations)
Working with Tasks 📝
1. Creating Tasks
# Create an annotation task
task = dataset.tasks.create(
task_name='Annotate Cars',
assignee_ids=['annotator@company.com'],
filters=dl.Filters(field='dir', values='/folder/to/annotate')
)
# Add specific items to task
task.add_items(
item_ids=['item1-id', 'item2-id'],
assignee_ids=['annotator@company.com']
)
2. Task Management
# Get task by ID
task = dataset.tasks.get(task_id='task-id')
# Update task status
task.set_status(status='completed')
# Get task items
items = task.items.list()
for item in items:
print(f"Item: {item.name}, Status: {item.status}")
Quality Assurance 🔍
1. Annotation Review
# Create a review task
review_task = dataset.tasks.create_qa_task(
task=task, # Original task
assignee_ids=['reviewer@company.com'],
filters=dl.Filters(field='annotated', values=True)
)
2. Consensus Annotations
# Get all annotations for comparison
annotations_a = item.annotations.list(filters=dl.Filters(field='creator', values='annotator1@company.com', resource=dl.FiltersResource.ANNOTATION))
annotations_b = item.annotations.list(filters=dl.Filters(field='creator', values='annotator2@company.com', resource=dl.FiltersResource.ANNOTATION))
# Calculate IoU for box annotations
def calculate_iou(box_a, box_b):
if not (isinstance(box_a, dl.Box) and isinstance(box_b, dl.Box)):
return 0
# IoU calculation logic here
pass
Best Practices 👑
1. Annotation Guidelines
- Create clear labeling instructions
- Define label hierarchies
- Establish quality criteria
- Document edge cases
2. Workflow Optimization
# Create annotation template
template = {
'attributes': {
'color': ['red', 'blue', 'green'],
'size': ['small', 'medium', 'large'],
'damaged': [True, False]
}
}
# Apply template to new annotations
builder.add(annotation_definition=dl.Box(
top=100,
left=100,
bottom=200,
right=200,
label='car',
attributes=template['attributes']
))
3. Error Prevention
def validate_annotation(annotation):
"""Validate annotation properties"""
try:
# Check required fields
assert annotation.label, "Missing label"
if isinstance(annotation, dl.Box):
assert annotation.top < annotation.bottom, "Invalid box coordinates"
return True
except AssertionError as e:
print(f"Validation failed: {str(e)}")
return False
Ready to explore metadata and filtering? Let's move on to the next chapter! 🚀