Configuration Overview
AI Cost Firewall uses nginx-style configuration.
directive value;
Example:
listen_addr 0.0.0.0:8080;
Directives are case-sensitive and must end with a semicolon.
Example configuration
listen_addr 0.0.0.0:8080;
# Redis exact cache
#
# Local/dev example:
redis_url redis://redis:6379;
# Production reminder:
# Protect Redis with authentication and private networking.
# Example with password:
# redis_url redis://:your-redis-password@redis:6379;
# Chat upstream provider
# Default: openai_compatible
# Supports OpenAI-compatible /v1/chat/completions endpoints.
upstream_provider openai_compatible;
upstream_base_url https://api.openai.com;
upstream_api_key sk-your-api-key;
# Embedding provider
# Default: openai_compatible
# Supports OpenAI-compatible /v1/embeddings endpoints.
embedding_provider openai_compatible;
embedding_base_url https://api.openai.com;
embedding_api_key sk-your-api-key;
embedding_model text-embedding-3-small;
# Qdrant semantic cache
#
# Local/dev example:
qdrant_url http://qdrant:6334;
# Production reminder:
# Protect Qdrant with an API key and private networking.
# Example:
# qdrant_api_key your-qdrant-api-key;
qdrant_collection aif_semantic_cache;
# Must match the dimension of the configured embedding_model.
qdrant_vector_size 1536;
# Backward-compatible default for both cache layers
cache_ttl_seconds 2592000;
# Optional explicit lifecycle controls (v0.1.5)
# exact_cache_ttl_seconds 86400;
# semantic_cache_retention_seconds 604800;
# Semantic cache lifecycle behavior:
# - Entries include inserted_at and expires_at metadata
# - Expired entries are skipped during lookup
# - Entries are NOT automatically deleted from Qdrant
#
# To clean up expired entries manually:
# ai-firewall --prune-expired-semantic-cache
request_timeout_seconds 120;
semantic_cache_enabled true;
semantic_similarity_threshold 0.92;
# Higher threshold = stricter similarity (fewer semantic hits)
# Lower threshold = more reuse (potentially less precise)
# If the embedding/semantic provider is unavailable, skip semantic cache and continue upstream.
# Default: true
semantic_cache_fail_open true;
# Model validation behavior
# By default, only models defined via `model_price` are allowed.
# Unknown models will be rejected with 400.
allow_unknown_models_pass_through false;
# Chat-completion pricing (USD per 1M tokens)
# model_price <model> <input_usd_per_1m_tokens> <output_usd_per_1m_tokens>;
model_price gpt-4o-mini-2024-07-18 0.15 0.60;
model_price gpt-4.1-mini-2025-04-14 0.30 1.20;
# Embedding pricing (optional, used for net cost estimation only)
embedding_price 0.020;
Default paths
configs/ai-firewall.conf
/etc/ai-firewall/ai-firewall.conf