LLM Utilities API Documentation¶

The llm_utils module provides utilities for working with Large Language Models (LLMs), including prompt management, message formatting, and conversation handling.

Overview¶

This module includes:

Utilities for deep dictionary value retrieval
Prompt message management classes
Conversation and thread management
Integration with tree structures for hierarchical data

Utility Functions¶

get_value_by_key()¶

def get_value_by_key(
    d: Optional[Dict[str, Any]],
    pathkey: str,
    default_value: Any = None,
) -> Any

Get a "deep" value from a nested dictionary using dot-delimited path notation.

Parameters: - d (Optional[Dict[str, Any]]): The (possibly nested) dictionary - pathkey (str): The dot-delimited path key (e.g., "foo.bar.baz") - default_value (Any, default=None): Value to return when path doesn't exist

Returns: The retrieved value or the default_value

Example:

from dataknobs_utils import llm_utils

# Simple nested dictionary access
data = {
    "user": {
        "profile": {
            "name": "Alice",
            "email": "alice@example.com"
        },
        "preferences": {
            "theme": "dark",
            "language": "en"
        }
    }
}

# Get nested values
name = llm_utils.get_value_by_key(data, "user.profile.name")
print(name)  # "Alice"

email = llm_utils.get_value_by_key(data, "user.profile.email")
print(email)  # "alice@example.com"

theme = llm_utils.get_value_by_key(data, "user.preferences.theme")
print(theme)  # "dark"

# Handle missing keys with default value
age = llm_utils.get_value_by_key(data, "user.profile.age", 25)
print(age)  # 25 (default value)

# Handle None input safely
result = llm_utils.get_value_by_key(None, "any.path", "fallback")
print(result)  # "fallback"

Classes¶

PromptMessage¶

class PromptMessage:
    def __init__(
        self, 
        role: str, 
        content: str, 
        metadata: Optional[Dict[str, Any]] = None
    )

Wrapper for a prompt message with role-based content and optional metadata.

Parameters: - role (str): The message role (e.g., "system", "user", "assistant") - content (str): The message content - metadata (Optional[Dict[str, Any]], default=None): Additional metadata

Properties: - role (str): Message role - content (str): Message content
- metadata (Dict[str, Any]): Message metadata

Metadata Structure: The metadata dictionary can contain: - generation_args: Arguments used for generation - execution_data: Model name, start time, end time, etc. - user_comments: List of user comments with user and comment fields

Example:

from dataknobs_utils import llm_utils
from datetime import datetime

# Create system message
system_msg = llm_utils.PromptMessage(
    "system",
    "You are a helpful AI assistant specialized in data analysis."
)

# Create user message with metadata
user_msg = llm_utils.PromptMessage(
    "user",
    "Analyze this dataset and provide insights.",
    metadata={
        "generation_args": {
            "temperature": 0.7,
            "max_tokens": 1000
        },
        "user_comments": [
            {
                "user": "alice",
                "comment": "This is a priority analysis"
            }
        ]
    }
)

# Create assistant response with execution metadata
assistant_msg = llm_utils.PromptMessage(
    "assistant",
    "Based on the dataset analysis, I found the following patterns...",
    metadata={
        "execution_data": {
            "model_name": "gpt-4",
            "starttime": datetime.now().isoformat(),
            "endtime": datetime.now().isoformat(),
            "tokens_used": 250
        }
    }
)

# Access message properties
print(f"Role: {user_msg.role}")
print(f"Content: {user_msg.content}")
print(f"Temperature: {user_msg.metadata['generation_args']['temperature']}")

Usage Patterns¶

Building Conversation Flows¶

from dataknobs_utils import llm_utils
from dataknobs_structures import Tree

class ConversationManager:
    def __init__(self):
        self.messages = []

    def add_system_message(self, content: str) -> None:
        """Add a system message to set context."""
        msg = llm_utils.PromptMessage("system", content)
        self.messages.append(msg)

    def add_user_message(self, content: str, **metadata) -> None:
        """Add a user message with optional metadata."""
        msg = llm_utils.PromptMessage("user", content, metadata)
        self.messages.append(msg)

    def add_assistant_response(self, content: str, model_info: dict) -> None:
        """Add an assistant response with execution metadata."""
        metadata = {"execution_data": model_info}
        msg = llm_utils.PromptMessage("assistant", content, metadata)
        self.messages.append(msg)

    def get_conversation_context(self) -> list:
        """Get conversation as list of role-content dictionaries."""
        return [
            {"role": msg.role, "content": msg.content} 
            for msg in self.messages
        ]

    def get_metadata_summary(self) -> dict:
        """Summarize metadata across all messages."""
        summary = {
            "total_messages": len(self.messages),
            "roles": {},
            "models_used": set(),
            "total_tokens": 0
        }

        for msg in self.messages:
            # Count roles
            summary["roles"][msg.role] = summary["roles"].get(msg.role, 0) + 1

            # Extract model info
            if msg.metadata:
                exec_data = msg.metadata.get("execution_data", {})
                if "model_name" in exec_data:
                    summary["models_used"].add(exec_data["model_name"])
                if "tokens_used" in exec_data:
                    summary["total_tokens"] += exec_data["tokens_used"]

        summary["models_used"] = list(summary["models_used"])
        return summary

# Usage example
conversation = ConversationManager()

# Set up conversation
conversation.add_system_message(
    "You are a data scientist helping with analysis tasks."
)

conversation.add_user_message(
    "What's the best approach for analyzing customer churn?",
    user_id="user123",
    priority="high"
)

conversation.add_assistant_response(
    "For customer churn analysis, I recommend starting with...",
    {
        "model_name": "gpt-4",
        "tokens_used": 150,
        "response_time": 2.3
    }
)

print(conversation.get_metadata_summary())

Hierarchical Conversation Trees¶

from dataknobs_utils import llm_utils
from dataknobs_structures import Tree

class ConversationTree:
    """Manage branching conversations using Tree structure."""

    def __init__(self, initial_message: str = None):
        self.root = Tree("conversation_root")
        if initial_message:
            self.add_message(initial_message, "system", parent=self.root)

    def add_message(self, content: str, role: str, parent=None, **metadata) -> Tree:
        """Add a message to the conversation tree."""
        if parent is None:
            parent = self.root

        message = llm_utils.PromptMessage(role, content, metadata)
        message_node = parent.add_child(message)
        return message_node

    def get_conversation_path(self, node: Tree) -> list:
        """Get conversation path from root to specific node."""
        path = node.get_path()[1:]  # Skip root
        return [
            {"role": n.data.role, "content": n.data.content}
            for n in path
        ]

    def branch_conversation(self, from_node: Tree, new_content: str, role: str) -> Tree:
        """Create a new branch in the conversation."""
        return self.add_message(new_content, role, parent=from_node)

    def find_messages_by_role(self, role: str) -> list:
        """Find all messages with specific role."""
        return self.root.find_nodes(
            lambda n: hasattr(n.data, 'role') and n.data.role == role
        )

    def get_all_paths(self) -> list:
        """Get all conversation paths (to leaf nodes)."""
        leaves = self.root.collect_terminal_nodes()
        return [self.get_conversation_path(leaf) for leaf in leaves]

# Usage example
conv_tree = ConversationTree(
    "You are an expert in machine learning and data analysis."
)

# Main conversation path
user_q1 = conv_tree.add_message(
    "How do I improve model accuracy?", 
    "user", 
    conv_tree.root.children[0]
)

assist_a1 = conv_tree.add_message(
    "There are several strategies: feature engineering, hyperparameter tuning...",
    "assistant",
    user_q1
)

# Branch 1: Follow up on feature engineering
user_q2a = conv_tree.add_message(
    "Tell me more about feature engineering techniques.",
    "user",
    assist_a1
)

assist_a2a = conv_tree.add_message(
    "Feature engineering involves creating new features from existing data...",
    "assistant",
    user_q2a
)

# Branch 2: Follow up on hyperparameters
user_q2b = conv_tree.add_message(
    "What's the best approach for hyperparameter tuning?",
    "user",
    assist_a1
)

assist_a2b = conv_tree.add_message(
    "For hyperparameter tuning, consider using grid search or random search...",
    "assistant",
    user_q2b
)

# Get all conversation paths
paths = conv_tree.get_all_paths()
for i, path in enumerate(paths):
    print(f"\nConversation Path {i + 1}:")
    for msg in path:
        print(f"  {msg['role']}: {msg['content'][:50]}...")

Configuration and Settings Management¶

from dataknobs_utils import llm_utils

class LLMConfig:
    """Manage LLM configuration with nested settings."""

    def __init__(self, config_dict: dict):
        self.config = config_dict

    def get_setting(self, path: str, default=None):
        """Get setting using dot notation."""
        return llm_utils.get_value_by_key(self.config, path, default)

    def get_model_config(self, model_name: str) -> dict:
        """Get complete configuration for a specific model."""
        model_path = f"models.{model_name}"
        return self.get_setting(model_path, {})

    def get_generation_params(self, model_name: str) -> dict:
        """Get generation parameters for a model."""
        params_path = f"models.{model_name}.generation"
        default_params = self.get_setting("defaults.generation", {})
        model_params = self.get_setting(params_path, {})

        # Merge default and model-specific parameters
        return {**default_params, **model_params}

# Example configuration
config_data = {
    "defaults": {
        "generation": {
            "temperature": 0.7,
            "max_tokens": 1000,
            "top_p": 0.9
        }
    },
    "models": {
        "gpt-4": {
            "api_key": "sk-...",
            "base_url": "https://api.openai.com/v1",
            "generation": {
                "temperature": 0.8,
                "max_tokens": 2000
            }
        },
        "claude": {
            "api_key": "sk-ant-...",
            "base_url": "https://api.anthropic.com",
            "generation": {
                "temperature": 0.6,
                "max_tokens": 1500
            }
        }
    },
    "features": {
        "conversation_memory": True,
        "auto_save": {
            "enabled": True,
            "interval": 300
        }
    }
}

config = LLMConfig(config_data)

# Get various settings
print(config.get_setting("defaults.generation.temperature"))  # 0.7
print(config.get_setting("models.gpt-4.api_key"))  # "sk-..."
print(config.get_setting("features.auto_save.enabled"))  # True
print(config.get_setting("nonexistent.path", "fallback"))  # "fallback"

# Get model-specific configurations
gpt4_config = config.get_model_config("gpt-4")
print(gpt4_config)

# Get generation parameters (with inheritance)
gpt4_params = config.get_generation_params("gpt-4")
print(gpt4_params)  # Merged default + model-specific params

Error Handling¶

from dataknobs_utils import llm_utils

def safe_config_access(config_data, path, expected_type=None):
    """Safely access configuration with type checking."""
    try:
        value = llm_utils.get_value_by_key(config_data, path)

        if value is None:
            print(f"Configuration path '{path}' not found")
            return None

        if expected_type and not isinstance(value, expected_type):
            print(f"Expected {expected_type.__name__} for '{path}', got {type(value).__name__}")
            return None

        return value

    except Exception as e:
        print(f"Error accessing configuration path '{path}': {e}")
        return None

# Usage
config = {"api": {"timeout": "30"}}

# This will warn about type mismatch
timeout = safe_config_access(config, "api.timeout", int)

# Safe message creation
try:
    msg = llm_utils.PromptMessage("user", "Hello world")
    print(f"Created message: {msg.role} - {msg.content}")
except Exception as e:
    print(f"Failed to create message: {e}")

Integration Examples¶

With Tree Structures¶

from dataknobs_utils import llm_utils
from dataknobs_structures import Tree

# Build prompt template tree
def build_prompt_tree():
    root = Tree("prompt_templates")

    # Analysis templates
    analysis = root.add_child("analysis")
    analysis.add_child(llm_utils.PromptMessage(
        "system",
        "You are a data analyst. Analyze the provided data and give insights."
    ))

    # Creative templates
    creative = root.add_child("creative")
    creative.add_child(llm_utils.PromptMessage(
        "system", 
        "You are a creative writer. Help generate engaging content."
    ))

    return root

# Use templates
template_tree = build_prompt_tree()
analysis_templates = template_tree.find_nodes(
    lambda n: hasattr(n.data, 'content') and 'analyst' in n.data.content.lower()
)

With File Processing¶

from dataknobs_utils import llm_utils, file_utils
import json

# Load conversation history from files
def load_conversations(directory):
    conversations = []

    for filepath in file_utils.filepath_generator(directory):
        if filepath.endswith(".json"):
            for line in file_utils.fileline_generator(filepath):
                try:
                    data = json.loads(line)
                    role = data.get("role", "unknown")
                    content = data.get("content", "")
                    metadata = data.get("metadata", {})

                    msg = llm_utils.PromptMessage(role, content, metadata)
                    conversations.append(msg)
                except json.JSONDecodeError:
                    continue

    return conversations

# Save conversations
def save_conversations(conversations, output_file):
    lines = []
    for msg in conversations:
        data = {
            "role": msg.role,
            "content": msg.content,
            "metadata": msg.metadata
        }
        lines.append(json.dumps(data))

    file_utils.write_lines(output_file, lines)

Performance Considerations¶

Use get_value_by_key() for safe nested dictionary access instead of chained .get() calls
Store frequently accessed configuration paths in constants
Consider caching configuration values for repeated access
Use metadata efficiently - avoid storing large objects in message metadata

Best Practices¶

Always provide default values when accessing nested configuration
Include meaningful metadata in PromptMessage objects for debugging
Use consistent role names ("system", "user", "assistant")
Structure metadata with clear categories (generation_args, execution_data, user_comments)
Validate message content and roles before creating PromptMessage instances