LLMMiddleware#

class council.llm.LLMMiddleware(*args, **kwargs)[source]#

Bases: Protocol

Protocol for defining LLM middleware.

Middleware can intercept and modify requests and responses between the client and the LLM, introducing custom logic.

LLMMiddlewareChain#

class council.llm.LLMMiddlewareChain(llm: LLMBase, middlewares: Sequence[LLMMiddleware] | None = None)[source]#

Bases: object

Manages a chain of LLM middlewares and executes requests through them.

add_middleware(middleware: LLMMiddleware) None[source]#

Add middleware to a chain.

execute(request: LLMRequest) LLMResponse[source]#

Execute middleware chain.

Logging#

LLMLoggingStrategy#

class council.llm.LLMLoggingStrategy(value)[source]#

Bases: str, Enum

Defines logging strategies for LLM middleware.

Minimal = 'minimal'#

Basic request/response info without details

MinimalWithConsumptions = 'minimal_consumptions'#

Basic info with consumption details

Verbose = 'verbose'#

Full request/response content

VerboseWithConsumptions = 'verbose_consumptions'#

Full request/response content with consumption details

property has_consumptions: bool#

Whether this strategy includes consumption details.

property is_minimal: bool#

Whether this strategy uses minimal logging.

property is_verbose: bool#

Whether this strategy uses verbose logging.

LLMLoggingMiddleware#

class council.llm.LLMLoggingMiddleware(strategy: LLMLoggingStrategy = LLMLoggingStrategy.Verbose, component_name: str | None = None)[source]#

Bases: LLMLoggingMiddlewareBase

Middleware for logging LLM requests, responses and consumptions to the context logger.

LLMFileLoggingMiddleware#

class council.llm.LLMFileLoggingMiddleware(log_file: str, strategy: LLMLoggingStrategy = LLMLoggingStrategy.Verbose, component_name: str | None = None)[source]#

Bases: LLMLoggingMiddlewareBase

Middleware for logging LLM requests, responses and consumptions into a log_file by appending to it.

LLMRetryMiddleware#

class council.llm.LLMRetryMiddleware(retries: int, delay: float, exception_to_check: type[Exception] | None = None)[source]#

Bases: object

Middleware for implementing retry logic for LLM requests.

Attempts to retry failed requests a specified number of times with a delay between attempts.

LLMCachingMiddleware#

class council.llm.LLMCachingMiddleware(ttl: float = 300.0, cache_limit_size: int = 10)[source]#

Bases: object

Middleware that caches LLM responses to avoid duplicate calls.

__init__(ttl: float = 300.0, cache_limit_size: int = 10) None[source]#

Initialize the caching middleware.

Parameters:
  • ttl – Sliding window time-to-live in seconds for cache entries (default: 5 mins)

  • cache_limit_size – Cache limit size in cached entries (default: 10)

clear_cache() None[source]#

Clear all cached entries.

static get_hash(request: LLMRequest, configuration: T_Configuration) str[source]#

Convert the request and LLM configuration to a hash with hashlib.sha256.

Code Example#

Example usage with council.llm.LLMFunction.

import dotenv

# !pip install council-ai==0.0.26

from council import AnthropicLLM
from council.llm import LLMFunction, LLMCachingMiddleware, LLMResponse
from council.llm.llm_response_parser import EchoResponseParser


dotenv.load_dotenv()
llm = AnthropicLLM.from_env()
llm_func: LLMFunction[LLMResponse] = LLMFunction(
    llm,
    EchoResponseParser.from_response,
    system_message="You're a helpful assistant"
)
# add caching middleware
llm_func.add_middleware(LLMCachingMiddleware())

# first request will be cached
llm_response_v1 = llm_func.execute("What is the capital of France?")
print(llm_response_v1.duration)  # 0.43
for consumption in llm_response_v1.result.consumptions:
    print(consumption)
# sample output:
# claude-3-haiku-20240307 consumption: 1 call
# claude-3-haiku-20240307 consumption: 0.3583 second
# claude-3-haiku-20240307:prompt_tokens consumption: 19 token
# ...
# claude-3-haiku-20240307:total_tokens_cost consumption: 1.852e-05 USD

# will hit the cache
llm_response_v1_1 = llm_func.execute("What is the capital of France?")
print(llm_response_v1_1.duration)  # 0
for consumption in llm_response_v1_1.result.consumptions:
    print(consumption)
# sample output:
# claude-3-haiku-20240307 consumption: 1 cached_call
# claude-3-haiku-20240307 consumption: 0.3583 cached_second
# claude-3-haiku-20240307:prompt_tokens consumption: 19 cached_token
# ...
# claude-3-haiku-20240307:total_tokens_cost consumption: 1.852e-05 cached_USD

# will not hit the cache since message is different
llm_response_v2 = llm_func.execute("Again, what is the capital of France?")

LLMRequest#

class council.llm.LLMRequest(context: LLMContext, messages: Sequence[LLMMessage], **kwargs: Any)[source]#

Bases: object

static default(messages: Sequence[LLMMessage], **kwargs: Any) LLMRequest[source]#

Creates a default LLMRequest with an empty context.

LLMResponse#

class council.llm.LLMResponse(request: LLMRequest, result: LLMResult | None, duration: float)[source]#

Bases: object

static empty(request: LLMRequest) LLMResponse[source]#

Creates an empty LLMResponse for a given request.