Skip to content

Commit

Permalink
Merge pull request #982 from danielaskdd/standalone-logger-setup
Browse files Browse the repository at this point in the history
Providing setup_logger for standalone LightRAG logger initialization
  • Loading branch information
LarFii authored Mar 4, 2025
2 parents fb07bc0 + d69042f commit acb074d
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 56 deletions.
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ import asyncio
from lightrag import LightRAG, QueryParam
from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import setup_logger

setup_logger("lightrag", level="INFO")

async def initialize_rag():
rag = LightRAG(
Expand Down Expand Up @@ -344,6 +347,10 @@ from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_i
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import setup_logger

# Setup log handler for LightRAG
setup_logger("lightrag", level="INFO")

async def initialize_rag():
rag = LightRAG(
Expand Down Expand Up @@ -640,6 +647,9 @@ export NEO4J_URI="neo4j://localhost:7687"
export NEO4J_USERNAME="neo4j"
export NEO4J_PASSWORD="password"

# Setup logger for LightRAG
setup_logger("lightrag", level="INFO")

# When you launch the project be sure to override the default KG: NetworkX
# by specifying kg="Neo4JStorage".

Expand All @@ -649,8 +659,12 @@ rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
graph_storage="Neo4JStorage", #<-----------override KG default
log_level="DEBUG" #<-----------override log_level default
)

# Initialize database connections
await rag.initialize_storages()
# Initialize pipeline status for document processing
await initialize_pipeline_status()
```
see test_neo4j.py for a working example.

Expand Down Expand Up @@ -859,7 +873,6 @@ Valid modes are:
| **kv\_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`, `OracleKVStorage` | `JsonKVStorage` |
| **vector\_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`, `OracleVectorDBStorage` | `NanoVectorDBStorage` |
| **graph\_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`, `Neo4JStorage`, `OracleGraphStorage` | `NetworkXStorage` |
| **log\_level** | | Log level for application runtime | `logging.DEBUG` |
| **chunk\_token\_size** | `int` | Maximum token size per chunk when splitting documents | `1200` |
| **chunk\_overlap\_token\_size** | `int` | Overlap token size between two chunks when splitting documents | `100` |
| **tiktoken\_model\_name** | `str` | Model name for the Tiktoken encoder used to calculate token numbers | `gpt-4o-mini` |
Expand All @@ -881,7 +894,6 @@ Valid modes are:
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
| **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` |
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|**log\_dir** | `str` | Directory to store logs. | `./` |

</details>

Expand Down
53 changes: 13 additions & 40 deletions lightrag/api/gunicorn_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
import os
import logging
from lightrag.kg.shared_storage import finalize_share_data
from lightrag.api.lightrag_server import LightragPathFilter
from lightrag.utils import setup_logger

# Get log directory path from environment variable
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))

# Ensure log directory exists
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)

# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
Expand Down Expand Up @@ -108,6 +111,9 @@ def on_starting(server):
except ImportError:
print("psutil not installed, skipping memory usage reporting")

# Log the location of the LightRAG log file
print(f"LightRAG log file: {log_file_path}\n")

print("Gunicorn initialization complete, forking workers...\n")


Expand All @@ -134,51 +140,18 @@ def post_fork(server, worker):
Executed after a worker has been forked.
This is a good place to set up worker-specific configurations.
"""
# Configure formatters
detailed_formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
simple_formatter = logging.Formatter("%(levelname)s: %(message)s")

def setup_logger(logger_name: str, level: str = "INFO", add_filter: bool = False):
"""Set up a logger with console and file handlers"""
logger_instance = logging.getLogger(logger_name)
logger_instance.setLevel(level)
logger_instance.handlers = [] # Clear existing handlers
logger_instance.propagate = False

# Add console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(simple_formatter)
console_handler.setLevel(level)
logger_instance.addHandler(console_handler)

# Add file handler
file_handler = logging.handlers.RotatingFileHandler(
filename=log_file_path,
maxBytes=log_max_bytes,
backupCount=log_backup_count,
encoding="utf-8",
)
file_handler.setFormatter(detailed_formatter)
file_handler.setLevel(level)
logger_instance.addHandler(file_handler)

# Add path filter if requested
if add_filter:
path_filter = LightragPathFilter()
logger_instance.addFilter(path_filter)

# Set up main loggers
log_level = loglevel.upper() if loglevel else "INFO"
setup_logger("uvicorn", log_level)
setup_logger("uvicorn.access", log_level, add_filter=True)
setup_logger("lightrag", log_level, add_filter=True)
setup_logger("uvicorn", log_level, add_filter=False, log_file_path=log_file_path)
setup_logger(
"uvicorn.access", log_level, add_filter=True, log_file_path=log_file_path
)
setup_logger("lightrag", log_level, add_filter=True, log_file_path=log_file_path)

# Set up lightrag submodule loggers
for name in logging.root.manager.loggerDict:
if name.startswith("lightrag."):
setup_logger(name, log_level, add_filter=True)
setup_logger(name, log_level, add_filter=True, log_file_path=log_file_path)

# Disable uvicorn.error logger
uvicorn_error_logger = logging.getLogger("uvicorn.error")
Expand Down
5 changes: 3 additions & 2 deletions lightrag/api/lightrag_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,6 @@ async def azure_openai_model_complete(
"similarity_threshold": 0.95,
"use_llm_check": False,
},
log_level=args.log_level,
namespace_prefix=args.namespace_prefix,
auto_manage_storages_states=False,
)
Expand Down Expand Up @@ -359,7 +358,6 @@ async def azure_openai_model_complete(
"similarity_threshold": 0.95,
"use_llm_check": False,
},
log_level=args.log_level,
namespace_prefix=args.namespace_prefix,
auto_manage_storages_states=False,
)
Expand Down Expand Up @@ -437,6 +435,9 @@ def configure_logging():
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))

print(f"\nLightRAG log file: {log_file_path}\n")
os.makedirs(os.path.dirname(log_dir), exist_ok=True)

# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
Expand Down
34 changes: 24 additions & 10 deletions lightrag/lightrag.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
import configparser
import os
import warnings
from dataclasses import asdict, dataclass, field
from datetime import datetime
from functools import partial
Expand Down Expand Up @@ -85,14 +86,10 @@ class LightRAG:
doc_status_storage: str = field(default="JsonDocStatusStorage")
"""Storage type for tracking document processing statuses."""

# Logging
# Logging (Deprecated, use setup_logger in utils.py instead)
# ---

log_level: int = field(default=logger.level)
"""Logging level for the system (e.g., 'DEBUG', 'INFO', 'WARNING')."""

log_file_path: str = field(default=os.path.join(os.getcwd(), "lightrag.log"))
"""Log file path."""
log_level: int | None = field(default=None)
log_file_path: str | None = field(default=None)

# Entity extraction
# ---
Expand Down Expand Up @@ -266,13 +263,30 @@ class LightRAG:
_storages_status: StoragesStatus = field(default=StoragesStatus.NOT_CREATED)

def __post_init__(self):
os.makedirs(os.path.dirname(self.log_file_path), exist_ok=True)
logger.info(f"Logger initialized for working directory: {self.working_dir}")

from lightrag.kg.shared_storage import (
initialize_share_data,
)

# Handle deprecated parameters
if self.log_level is not None:
warnings.warn(
"WARNING: log_level parameter is deprecated, use setup_logger in utils.py instead",
UserWarning,
stacklevel=2,
)
if self.log_file_path is not None:
warnings.warn(
"WARNING: log_file_path parameter is deprecated, use setup_logger in utils.py instead",
UserWarning,
stacklevel=2,
)

# Remove these attributes to prevent their use
if hasattr(self, "log_level"):
delattr(self, "log_level")
if hasattr(self, "log_file_path"):
delattr(self, "log_file_path")

initialize_share_data()

if not os.path.exists(self.working_dir):
Expand Down
Loading

0 comments on commit acb074d

Please sign in to comment.