Last updated

Functions as a Service (FaaS): Your Serverless Toolkit 🚀

Learn how to create, deploy, and manage serverless functions in Dataloop - your key to automating workflows and extending platform capabilities.

Getting Started with FaaS 🌟

1. Basic Function Creation

import dtlpy as dl

class HelloWorld(dl.BaseServiceConfig):
    def hello_world(self, item: dl.Item):
        """A simple function that prints item details"""
        print(f'Item name: {item.name}')
        print(f'Item id: {item.id}')
        return item

Create the DPK manifest file (dataloop.json):

{
  "name": "hello-world",
  "version": "1.0.0",
  "description": "A simple function that prints item details",
  "components": {
    "modules": [
      {
        "name": "hello-world",
        "entrypoint": "hello_world.py",
        "className": "HelloWorld",
        "functions": [
          {
            "name": "hello_world",
            "inputs": [
              {
                "name": "item",
                "type": "item"
              }
            ]
          }
        ]
      }
    ]
  }
}

Push the package to the platform:

dpk = project.dpks.publish()

2. Service Configuration

Add the service configuration to the DPK manifest file (dataloop.json):

{
  "name": "hello-world",
  "version": "1.0.0",
  "description": "A simple function that prints item details",
  "components": {
    "modules": [
      {
        "name": "hello-world",
        "entrypoint": "hello_world.py",
        "className": "HelloWorld",
        "functions": [
          {
            "name": "hello_world",
            "inputs": [
              {
                "name": "item",
                "type": "item"
              }
            ]
          }
        ]
      }
    ],
    "services": [
      {
        "name": "hello-world",
        "moduleName": "hello-world",
        "runtime": {
          "podType": "regular-m",
          "concurrency": 10,
          "runnerImage": "python:3.10",
          "autoscaler": {
            "type": "rabbitmq",
            "minReplicas": 0,
            "maxReplicas": 2,
            "queueLength": 100
          }
        }
      }
    ]
  }
}

Install the application:

app = project.apps.install(dpk)

Function Types and Triggers 🎯

1. Item Functions

def process_single_item(item: dl.Item):
    """Function that processes a single item"""
    # Add metadata
    item.metadata['processed'] = True
    item.metadata['processor'] = 'faas'
    item.update()

    # Add annotation
    builder = item.annotations.builder()
    builder.add(dl.Classification(label='processed'))
    item.annotations.upload(builder)

    return item

2. Dataset Functions

def dataset_stats(dataset: dl.Dataset):
    """Calculate dataset statistics"""
    stats = {
        'total_items': dataset.items_count,
        'annotations_count': 0,
        'file_types': {}
    }

    # Collect detailed stats
    for item in dataset.items.list().all():
        # Count annotations
        stats['annotations_count'] += len(item.annotations.list())
        # Track file types
        file_type = item.filename.split('.')[-1]
        stats['file_types'][file_type] = stats['file_types'].get(file_type, 0) + 1

    return stats

3. Trigger Functions

Add trigger to the DPK manifest file (dataloop.json):

"components": {
    "triggers": [
        {
        "name": "run-on-item-created",
        "active": true,
        "type": "Event",
        "namespace": "services.hello-world",
        "spec": {
          "filter": {
            "$and": [
              {
                "$or": [
                  {
                    "metadata.system.mimetype": "image/*"
                  },
                  {
                    "metadata.system.mimetype": "text/*"
                  }
                ]
              },
              {
                "hidden": false
              },
              {
                "type": "file"
              }
            ]
          },
          "executionMode": "Always",
          "resource": "Item",
          "actions": [
            "Created"
          ],
          "input": {},
          "operation": {
            "type": "function",
            "functionName": "hello_world"
          }
        }
      }
    ]
}

Function Management 📋

1. Execution Management

# Execute function
execution = service.execute(
    function_name='process_item',
    item_id='item_id',
    project_id='project_id'
)
# Wait for execution to complete
execution = execution.wait()

# Get results
if execution.latest_status['status'] == 'success':
    results = execution.output
else:
    error = execution.latest_status['message']

2. Service Management

# List all services
services = project.services.list()

# Get service logs
logs = service.logs(follow=True)
print(logs)

# Update service
service.update(
    runtime=dl.KubernetesRuntime(
        pod_type=dl.InstanceCatalog.REGULAR_M,  # Upgrade resources
        concurrency=20
    )
)

# Stop service
service.pause()

# Resume service
service.resume()

Ready to explore data versioning? Let's move on to the next chapter! 🚀