Skip to content

Examples

Practical examples of using Dataknobs packages across different use cases.

AI & LLM Examples

Chatbots and AI Agents

LLM Integration

Workflow & Data Processing

FSM Examples

Data Backend Examples

Configuration Examples

Data Structures Examples

Text Processing Examples

Integration Examples

Complete Application Examples

RAG Application with Multi-Backend Storage

import asyncio
from dataknobs_bots import DynaBot, BotContext
from dataknobs_data import database_factory
from dataknobs_config import Config

async def main():
    # Configuration for multi-backend setup
    config = Config({
        "databases": {
            "conversations": {
                "backend": "postgres",
                "connection": "postgresql://..."
            },
            "knowledge": {
                "backend": "elasticsearch",
                "host": "localhost:9200",
                "index": "documentation"
            }
        }
    })

    config.register_factory("database", database_factory)

    # Create databases
    conversations_db = config.get_instance("databases", "conversations")
    knowledge_db = config.get_instance("databases", "knowledge")

    # Configure RAG bot with multi-backend storage
    bot_config = {
        "llm": {"provider": "openai", "model": "gpt-4"},
        "conversation_storage": {"backend": "postgres", "connection": "postgresql://..."},
        "memory": {
            "type": "buffer",
            "max_messages": 20
        },
        "rag": {
            "enabled": True,
            "knowledge_base": knowledge_db,  # Elasticsearch for search
            "top_k": 5
        },
        "system_prompt": "Answer questions using documentation when available."
    }

    bot = await DynaBot.from_config(bot_config)

    # Use bot with persistent memory and knowledge retrieval
    context = BotContext(
        conversation_id="docs-session",
        client_id="my-app",
        user_id="user123"
    )
    response = await bot.chat(
        "How do I configure the database backend?",
        context
    )
    print(response)

asyncio.run(main())

Data Pipeline with FSM and Multiple Backends

from dataknobs_fsm import SimpleFSM, DataHandlingMode
from dataknobs_data import database_factory
from dataknobs_config import Config

# Multi-backend configuration
config = Config({
    "databases": {
        "source": {"backend": "postgres", "connection": "..."},
        "staging": {"backend": "memory"},
        "target": {"backend": "elasticsearch", "host": "..."}
    }
})

config.register_factory("database", database_factory)

# Create databases
source_db = config.get_instance("databases", "source")
staging_db = config.get_instance("databases", "staging")
target_db = config.get_instance("databases", "target")

# FSM pipeline configuration
fsm_config = {
    "name": "multi_backend_pipeline",
    "states": [
        {"name": "extract", "is_start": True},
        {"name": "stage"},
        {"name": "transform"},
        {"name": "load", "is_end": True}
    ],
    "arcs": [
        {
            "from": "extract",
            "to": "stage",
            "transform": {
                "type": "inline",
                "code": "lambda data, ctx: ctx.resources['source'].search(...)"
            }
        },
        {
            "from": "stage",
            "to": "transform",
            "transform": {
                "type": "inline",
                "code": "lambda data, ctx: ctx.resources['staging'].create(...)"
            }
        },
        {
            "from": "transform",
            "to": "load",
            "transform": {
                "type": "inline",
                "code": "lambda data, ctx: ctx.resources['target'].create(...)"
            }
        }
    ]
}

fsm = SimpleFSM(fsm_config, data_mode=DataHandlingMode.COPY)
fsm.context["resources"] = {
    "source": source_db,
    "staging": staging_db,
    "target": target_db
}

result = fsm.process({"query": "SELECT * FROM users"})

LLM-Powered Content Processing

import asyncio
from dataknobs_fsm import SimpleFSM
from dataknobs_llm import create_llm_provider, LLMMessage
from dataknobs_data import database_factory, Record
from dataknobs_xization import normalize

async def main():
    # Initialize components
    llm = create_llm_provider({"provider": "openai", "model": "gpt-4"})
    s3_storage = database_factory.create({
        "backend": "s3",
        "bucket": "processed-content"
    })

    # Content processing pipeline
    pipeline_config = {
        "name": "content_processor",
        "states": [
            {"name": "load", "is_start": True},
            {"name": "normalize"},
            {"name": "summarize"},
            {"name": "tag"},
            {"name": "store", "is_end": True}
        ],
        "arcs": [
            {
                "from": "load",
                "to": "normalize",
                "transform": {
                    "type": "inline",
                    "code": "lambda data, ctx: {'text': normalize.basic_normalization_fn(data['text'])}"
                }
            },
            {
                "from": "normalize",
                "to": "summarize",
                "transform": {
                    "type": "inline",
                    "code": "lambda data, ctx: {'summary': 'Summary of text'}  # Async LLM call needed"
                }
            },
            {
                "from": "summarize",
                "to": "tag",
                "transform": {
                    "type": "inline",
                    "code": "lambda data, ctx: {'tags': ['tag1', 'tag2']}  # Async LLM call needed"
                }
            },
            {
                "from": "tag",
                "to": "store",
                "transform": {
                    "type": "inline",
                    "code": "lambda data, ctx: ctx['storage'].create(Record(data))"
                }
            }
        ]
    }

    fsm = SimpleFSM(pipeline_config)
    fsm.context["llm"] = llm
    fsm.context["storage"] = s3_storage

    # Process content
    result = fsm.process({"text": "Long article content..."})
    print(result)

asyncio.run(main())

Example Categories

By Use Case

AI & Machine Learning: - Bots Examples - Chatbots and AI agents - FSM LLM Examples - LLM workflows

Data Engineering: - FSM Data Pipelines - ETL workflows - Database Examples - Multi-backend data access - Streaming Examples - Real-time processing

Application Development: - Configuration Examples - App configuration patterns - Data Structure Examples - Trees and documents - Text Processing Examples - Text utilities

By Package

Next Steps