Skip to content

Commit b7fe917

Browse files
committed
fix: skill search ranking - use overview for embedding and fix visited set filtering
1. skill_processor.py: Use LLM-generated overview for vectorization instead of short abstract, aligning with how resources handle directory vectorization in semantic_processor.py. 2. hierarchical_retriever.py: Separate 'visited for traversal' from 'collected as result'. The visited set previously dropped the most relevant results - global search found them first, marked visited, then parent directory search skipped them as children.
1 parent 68ed45b commit b7fe917

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

openviking/retrieve/hierarchical_retriever.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -320,20 +320,27 @@ def merge_filter(base_filter: Dict, extra_filter: Optional[Dict]) -> Dict:
320320
alpha * score + (1 - alpha) * current_score if current_score else score
321321
)
322322

323-
if passes_threshold(final_score) and uri not in visited:
323+
if not passes_threshold(final_score):
324+
logger.debug(
325+
f"[RecursiveSearch] URI {uri} score {final_score} did not pass threshold {effective_threshold}"
326+
)
327+
continue
328+
329+
# Always collect results that pass threshold, even if already
330+
# visited as a directory starting point. The visited set only
331+
# prevents re-entering directories for child search.
332+
if not any(c.get("uri") == uri for c in collected):
324333
r["_final_score"] = final_score
325334
collected.append(r)
326335
logger.debug(
327336
f"[RecursiveSearch] Added URI: {uri} to candidates with score: {final_score}"
328337
)
338+
339+
if uri not in visited:
329340
if r.get("is_leaf"):
330341
visited.add(uri)
331-
continue
332-
heapq.heappush(dir_queue, (-final_score, uri))
333-
else:
334-
logger.debug(
335-
f"[RecursiveSearch] URI {uri} score {final_score} did not pass threshold {effective_threshold}"
336-
)
342+
else:
343+
heapq.heappush(dir_queue, (-final_score, uri))
337344

338345
# Convergence check
339346
current_topk = sorted(collected, key=lambda x: x.get("_final_score", 0), reverse=True)[

openviking/utils/skill_processor.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,12 @@ async def process_skill(
7474
"source_path": skill_dict.get("source_path", ""),
7575
},
7676
)
77-
context.set_vectorize(Vectorize(text=context.abstract))
78-
7977
overview = await self._generate_overview(skill_dict, config)
8078

79+
# Use overview for vectorization (richer semantic content than abstract alone)
80+
vectorize_text = overview if overview else context.abstract
81+
context.set_vectorize(Vectorize(text=vectorize_text))
82+
8183
skill_dir_uri = f"viking://agent/skills/{context.meta['name']}"
8284

8385
await self._write_skill_content(

0 commit comments

Comments
 (0)