Skip to content

LLM Utilities API Documentation

The llm_utils module provides utilities for working with Large Language Models (LLMs), including prompt management, message formatting, and conversation handling.

Overview

This module includes:

  • Utilities for deep dictionary value retrieval
  • Prompt message management classes
  • Conversation and thread management
  • Integration with tree structures for hierarchical data

Utility Functions

get_value_by_key()

def get_value_by_key(
    d: Optional[Dict[str, Any]],
    pathkey: str,
    default_value: Any = None,
) -> Any

Get a "deep" value from a nested dictionary using dot-delimited path notation.

Parameters: - d (Optional[Dict[str, Any]]): The (possibly nested) dictionary - pathkey (str): The dot-delimited path key (e.g., "foo.bar.baz") - default_value (Any, default=None): Value to return when path doesn't exist

Returns: The retrieved value or the default_value

Example:

from dataknobs_utils import llm_utils

# Simple nested dictionary access
data = {
    "user": {
        "profile": {
            "name": "Alice",
            "email": "alice@example.com"
        },
        "preferences": {
            "theme": "dark",
            "language": "en"
        }
    }
}

# Get nested values
name = llm_utils.get_value_by_key(data, "user.profile.name")
print(name)  # "Alice"

email = llm_utils.get_value_by_key(data, "user.profile.email")
print(email)  # "alice@example.com"

theme = llm_utils.get_value_by_key(data, "user.preferences.theme")
print(theme)  # "dark"

# Handle missing keys with default value
age = llm_utils.get_value_by_key(data, "user.profile.age", 25)
print(age)  # 25 (default value)

# Handle None input safely
result = llm_utils.get_value_by_key(None, "any.path", "fallback")
print(result)  # "fallback"

Classes

PromptMessage

class PromptMessage:
    def __init__(
        self, 
        role: str, 
        content: str, 
        metadata: Optional[Dict[str, Any]] = None
    )

Wrapper for a prompt message with role-based content and optional metadata.

Parameters: - role (str): The message role (e.g., "system", "user", "assistant") - content (str): The message content - metadata (Optional[Dict[str, Any]], default=None): Additional metadata

Properties: - role (str): Message role - content (str): Message content
- metadata (Dict[str, Any]): Message metadata

Metadata Structure: The metadata dictionary can contain: - generation_args: Arguments used for generation - execution_data: Model name, start time, end time, etc. - user_comments: List of user comments with user and comment fields

Example:

from dataknobs_utils import llm_utils
from datetime import datetime

# Create system message
system_msg = llm_utils.PromptMessage(
    "system",
    "You are a helpful AI assistant specialized in data analysis."
)

# Create user message with metadata
user_msg = llm_utils.PromptMessage(
    "user",
    "Analyze this dataset and provide insights.",
    metadata={
        "generation_args": {
            "temperature": 0.7,
            "max_tokens": 1000
        },
        "user_comments": [
            {
                "user": "alice",
                "comment": "This is a priority analysis"
            }
        ]
    }
)

# Create assistant response with execution metadata
assistant_msg = llm_utils.PromptMessage(
    "assistant",
    "Based on the dataset analysis, I found the following patterns...",
    metadata={
        "execution_data": {
            "model_name": "gpt-4",
            "starttime": datetime.now().isoformat(),
            "endtime": datetime.now().isoformat(),
            "tokens_used": 250
        }
    }
)

# Access message properties
print(f"Role: {user_msg.role}")
print(f"Content: {user_msg.content}")
print(f"Temperature: {user_msg.metadata['generation_args']['temperature']}")

Usage Patterns

Building Conversation Flows

from dataknobs_utils import llm_utils
from dataknobs_structures import Tree

class ConversationManager:
    def __init__(self):
        self.messages = []

    def add_system_message(self, content: str) -> None:
        """Add a system message to set context."""
        msg = llm_utils.PromptMessage("system", content)
        self.messages.append(msg)

    def add_user_message(self, content: str, **metadata) -> None:
        """Add a user message with optional metadata."""
        msg = llm_utils.PromptMessage("user", content, metadata)
        self.messages.append(msg)

    def add_assistant_response(self, content: str, model_info: dict) -> None:
        """Add an assistant response with execution metadata."""
        metadata = {"execution_data": model_info}
        msg = llm_utils.PromptMessage("assistant", content, metadata)
        self.messages.append(msg)

    def get_conversation_context(self) -> list:
        """Get conversation as list of role-content dictionaries."""
        return [
            {"role": msg.role, "content": msg.content} 
            for msg in self.messages
        ]

    def get_metadata_summary(self) -> dict:
        """Summarize metadata across all messages."""
        summary = {
            "total_messages": len(self.messages),
            "roles": {},
            "models_used": set(),
            "total_tokens": 0
        }

        for msg in self.messages:
            # Count roles
            summary["roles"][msg.role] = summary["roles"].get(msg.role, 0) + 1

            # Extract model info
            if msg.metadata:
                exec_data = msg.metadata.get("execution_data", {})
                if "model_name" in exec_data:
                    summary["models_used"].add(exec_data["model_name"])
                if "tokens_used" in exec_data:
                    summary["total_tokens"] += exec_data["tokens_used"]

        summary["models_used"] = list(summary["models_used"])
        return summary

# Usage example
conversation = ConversationManager()

# Set up conversation
conversation.add_system_message(
    "You are a data scientist helping with analysis tasks."
)

conversation.add_user_message(
    "What's the best approach for analyzing customer churn?",
    user_id="user123",
    priority="high"
)

conversation.add_assistant_response(
    "For customer churn analysis, I recommend starting with...",
    {
        "model_name": "gpt-4",
        "tokens_used": 150,
        "response_time": 2.3
    }
)

print(conversation.get_metadata_summary())

Hierarchical Conversation Trees

from dataknobs_utils import llm_utils
from dataknobs_structures import Tree

class ConversationTree:
    """Manage branching conversations using Tree structure."""

    def __init__(self, initial_message: str = None):
        self.root = Tree("conversation_root")
        if initial_message:
            self.add_message(initial_message, "system", parent=self.root)

    def add_message(self, content: str, role: str, parent=None, **metadata) -> Tree:
        """Add a message to the conversation tree."""
        if parent is None:
            parent = self.root

        message = llm_utils.PromptMessage(role, content, metadata)
        message_node = parent.add_child(message)
        return message_node

    def get_conversation_path(self, node: Tree) -> list:
        """Get conversation path from root to specific node."""
        path = node.get_path()[1:]  # Skip root
        return [
            {"role": n.data.role, "content": n.data.content}
            for n in path
        ]

    def branch_conversation(self, from_node: Tree, new_content: str, role: str) -> Tree:
        """Create a new branch in the conversation."""
        return self.add_message(new_content, role, parent=from_node)

    def find_messages_by_role(self, role: str) -> list:
        """Find all messages with specific role."""
        return self.root.find_nodes(
            lambda n: hasattr(n.data, 'role') and n.data.role == role
        )

    def get_all_paths(self) -> list:
        """Get all conversation paths (to leaf nodes)."""
        leaves = self.root.collect_terminal_nodes()
        return [self.get_conversation_path(leaf) for leaf in leaves]

# Usage example
conv_tree = ConversationTree(
    "You are an expert in machine learning and data analysis."
)

# Main conversation path
user_q1 = conv_tree.add_message(
    "How do I improve model accuracy?", 
    "user", 
    conv_tree.root.children[0]
)

assist_a1 = conv_tree.add_message(
    "There are several strategies: feature engineering, hyperparameter tuning...",
    "assistant",
    user_q1
)

# Branch 1: Follow up on feature engineering
user_q2a = conv_tree.add_message(
    "Tell me more about feature engineering techniques.",
    "user",
    assist_a1
)

assist_a2a = conv_tree.add_message(
    "Feature engineering involves creating new features from existing data...",
    "assistant",
    user_q2a
)

# Branch 2: Follow up on hyperparameters
user_q2b = conv_tree.add_message(
    "What's the best approach for hyperparameter tuning?",
    "user",
    assist_a1
)

assist_a2b = conv_tree.add_message(
    "For hyperparameter tuning, consider using grid search or random search...",
    "assistant",
    user_q2b
)

# Get all conversation paths
paths = conv_tree.get_all_paths()
for i, path in enumerate(paths):
    print(f"\nConversation Path {i + 1}:")
    for msg in path:
        print(f"  {msg['role']}: {msg['content'][:50]}...")

Configuration and Settings Management

from dataknobs_utils import llm_utils

class LLMConfig:
    """Manage LLM configuration with nested settings."""

    def __init__(self, config_dict: dict):
        self.config = config_dict

    def get_setting(self, path: str, default=None):
        """Get setting using dot notation."""
        return llm_utils.get_value_by_key(self.config, path, default)

    def get_model_config(self, model_name: str) -> dict:
        """Get complete configuration for a specific model."""
        model_path = f"models.{model_name}"
        return self.get_setting(model_path, {})

    def get_generation_params(self, model_name: str) -> dict:
        """Get generation parameters for a model."""
        params_path = f"models.{model_name}.generation"
        default_params = self.get_setting("defaults.generation", {})
        model_params = self.get_setting(params_path, {})

        # Merge default and model-specific parameters
        return {**default_params, **model_params}

# Example configuration
config_data = {
    "defaults": {
        "generation": {
            "temperature": 0.7,
            "max_tokens": 1000,
            "top_p": 0.9
        }
    },
    "models": {
        "gpt-4": {
            "api_key": "sk-...",
            "base_url": "https://api.openai.com/v1",
            "generation": {
                "temperature": 0.8,
                "max_tokens": 2000
            }
        },
        "claude": {
            "api_key": "sk-ant-...",
            "base_url": "https://api.anthropic.com",
            "generation": {
                "temperature": 0.6,
                "max_tokens": 1500
            }
        }
    },
    "features": {
        "conversation_memory": True,
        "auto_save": {
            "enabled": True,
            "interval": 300
        }
    }
}

config = LLMConfig(config_data)

# Get various settings
print(config.get_setting("defaults.generation.temperature"))  # 0.7
print(config.get_setting("models.gpt-4.api_key"))  # "sk-..."
print(config.get_setting("features.auto_save.enabled"))  # True
print(config.get_setting("nonexistent.path", "fallback"))  # "fallback"

# Get model-specific configurations
gpt4_config = config.get_model_config("gpt-4")
print(gpt4_config)

# Get generation parameters (with inheritance)
gpt4_params = config.get_generation_params("gpt-4")
print(gpt4_params)  # Merged default + model-specific params

Error Handling

from dataknobs_utils import llm_utils

def safe_config_access(config_data, path, expected_type=None):
    """Safely access configuration with type checking."""
    try:
        value = llm_utils.get_value_by_key(config_data, path)

        if value is None:
            print(f"Configuration path '{path}' not found")
            return None

        if expected_type and not isinstance(value, expected_type):
            print(f"Expected {expected_type.__name__} for '{path}', got {type(value).__name__}")
            return None

        return value

    except Exception as e:
        print(f"Error accessing configuration path '{path}': {e}")
        return None

# Usage
config = {"api": {"timeout": "30"}}

# This will warn about type mismatch
timeout = safe_config_access(config, "api.timeout", int)

# Safe message creation
try:
    msg = llm_utils.PromptMessage("user", "Hello world")
    print(f"Created message: {msg.role} - {msg.content}")
except Exception as e:
    print(f"Failed to create message: {e}")

Integration Examples

With Tree Structures

from dataknobs_utils import llm_utils
from dataknobs_structures import Tree

# Build prompt template tree
def build_prompt_tree():
    root = Tree("prompt_templates")

    # Analysis templates
    analysis = root.add_child("analysis")
    analysis.add_child(llm_utils.PromptMessage(
        "system",
        "You are a data analyst. Analyze the provided data and give insights."
    ))

    # Creative templates
    creative = root.add_child("creative")
    creative.add_child(llm_utils.PromptMessage(
        "system", 
        "You are a creative writer. Help generate engaging content."
    ))

    return root

# Use templates
template_tree = build_prompt_tree()
analysis_templates = template_tree.find_nodes(
    lambda n: hasattr(n.data, 'content') and 'analyst' in n.data.content.lower()
)

With File Processing

from dataknobs_utils import llm_utils, file_utils
import json

# Load conversation history from files
def load_conversations(directory):
    conversations = []

    for filepath in file_utils.filepath_generator(directory):
        if filepath.endswith(".json"):
            for line in file_utils.fileline_generator(filepath):
                try:
                    data = json.loads(line)
                    role = data.get("role", "unknown")
                    content = data.get("content", "")
                    metadata = data.get("metadata", {})

                    msg = llm_utils.PromptMessage(role, content, metadata)
                    conversations.append(msg)
                except json.JSONDecodeError:
                    continue

    return conversations

# Save conversations
def save_conversations(conversations, output_file):
    lines = []
    for msg in conversations:
        data = {
            "role": msg.role,
            "content": msg.content,
            "metadata": msg.metadata
        }
        lines.append(json.dumps(data))

    file_utils.write_lines(output_file, lines)

Performance Considerations

  • Use get_value_by_key() for safe nested dictionary access instead of chained .get() calls
  • Store frequently accessed configuration paths in constants
  • Consider caching configuration values for repeated access
  • Use metadata efficiently - avoid storing large objects in message metadata

Best Practices

  • Always provide default values when accessing nested configuration
  • Include meaningful metadata in PromptMessage objects for debugging
  • Use consistent role names ("system", "user", "assistant")
  • Structure metadata with clear categories (generation_args, execution_data, user_comments)
  • Validate message content and roles before creating PromptMessage instances