UPDATE
This commit is contained in:
+41
-23
@@ -12,6 +12,7 @@ from pathlib import Path
|
||||
from .chunker import chunk_text, chunk_texts
|
||||
from .config import load_rag_config, RAGConfig
|
||||
from .embedding import embed_texts
|
||||
from .observability import classify_exception, log_event, observe_operation, record_metric
|
||||
from .user_data import load_user_sources, build_user_weather_text
|
||||
from .vector_store import QdrantVectorStore
|
||||
|
||||
@@ -36,7 +37,19 @@ def _load_file(path: Path) -> str | None:
|
||||
return None
|
||||
try:
|
||||
return path.read_text(encoding="utf-8").strip()
|
||||
except Exception:
|
||||
except Exception as exc:
|
||||
failure = classify_exception(exc)
|
||||
log_event(
|
||||
level=40,
|
||||
message="rag ingest file load failed",
|
||||
source="rag.ingest",
|
||||
provider=None,
|
||||
operation="load_file",
|
||||
result_status="error",
|
||||
error_code=failure.error_code,
|
||||
path=str(path),
|
||||
)
|
||||
record_metric("rag.ingest.file_load_failure", error_code=failure.error_code)
|
||||
return None
|
||||
|
||||
|
||||
@@ -122,12 +135,14 @@ def ingest(
|
||||
"""
|
||||
cfg = config or load_rag_config()
|
||||
store = QdrantVectorStore(config=cfg)
|
||||
if recreate:
|
||||
store.ensure_collection(recreate=True)
|
||||
with observe_operation(source="rag.ingest", provider=cfg.embedding.provider, operation="ingest"):
|
||||
if recreate:
|
||||
store.ensure_collection(recreate=True)
|
||||
|
||||
sources = load_sources(config=cfg, kb_name=kb_name)
|
||||
if not sources:
|
||||
return {"chunks_added": 0, "sources": [], "error": "هیچ منبعی یافت نشد"}
|
||||
sources = load_sources(config=cfg, kb_name=kb_name)
|
||||
if not sources:
|
||||
record_metric("rag.ingest.empty_sources", kb_name=kb_name)
|
||||
return {"chunks_added": 0, "sources": [], "error": "هیچ منبعی یافت نشد"}
|
||||
|
||||
all_chunks: list[str] = []
|
||||
all_metas: list[dict] = []
|
||||
@@ -146,24 +161,27 @@ def ingest(
|
||||
"kb_name": src_kb,
|
||||
})
|
||||
|
||||
if not all_chunks:
|
||||
return {"chunks_added": 0, "sources": [s[0] for s in sources], "error": "هیچ چانکی ساخته نشد"}
|
||||
if not all_chunks:
|
||||
record_metric("rag.ingest.empty_chunks", kb_name=kb_name)
|
||||
return {"chunks_added": 0, "sources": [s[0] for s in sources], "error": "هیچ چانکی ساخته نشد"}
|
||||
|
||||
embeddings = embed_texts(all_chunks, config=cfg)
|
||||
if len(embeddings) != len(all_chunks):
|
||||
embeddings = embed_texts(all_chunks, config=cfg)
|
||||
if len(embeddings) != len(all_chunks):
|
||||
record_metric("rag.ingest.embedding_mismatch", kb_name=kb_name)
|
||||
return {
|
||||
"chunks_added": 0,
|
||||
"sources": [s[0] for s in sources],
|
||||
"error": f"تعداد embed با چانکها مطابقت ندارد: {len(embeddings)} vs {len(all_chunks)}",
|
||||
}
|
||||
|
||||
store.add_documents(
|
||||
ids=all_ids,
|
||||
embeddings=embeddings,
|
||||
documents=all_chunks,
|
||||
metadatas=all_metas,
|
||||
)
|
||||
record_metric("rag.ingest.success", kb_name=kb_name, chunks=len(all_chunks))
|
||||
return {
|
||||
"chunks_added": 0,
|
||||
"chunks_added": len(all_chunks),
|
||||
"sources": [s[0] for s in sources],
|
||||
"error": f"تعداد embed با چانکها مطابقت ندارد: {len(embeddings)} vs {len(all_chunks)}",
|
||||
}
|
||||
|
||||
store.add_documents(
|
||||
ids=all_ids,
|
||||
embeddings=embeddings,
|
||||
documents=all_chunks,
|
||||
metadatas=all_metas,
|
||||
)
|
||||
return {
|
||||
"chunks_added": len(all_chunks),
|
||||
"sources": [s[0] for s in sources],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user