@@ -20,6 +20,8 @@ async def rag_example():
2020 print ("\n 🚀 RAG System Example" )
2121 print ("=" * 50 )
2222
23+ workspace_id = None
24+
2325 # Step 1: Create knowledge base from documents
2426 print ("\n 1. Creating Knowledge Base" )
2527 print ("-" * 30 )
@@ -70,49 +72,55 @@ async def rag_example():
7072 chunk_overlap = 50
7173 )
7274
75+ workspace_id = result ['workspace' ]['id' ]
7376 print (f"✅ Created workspace: { result ['workspace' ]['name' ]} " )
74- print (f" ID: { result [ 'workspace' ][ 'id' ] } " )
77+ print (f" ID: { workspace_id } " )
7578 print (f" Processed { result ['files_processed' ]} files" )
7679 print (f" Created { result ['chunks_created' ]} chunks" )
80+
81+ # Important: Give the system time to index the documents
82+ print ("\n ⏳ Waiting for indexing to complete..." )
83+ await asyncio .sleep (5 ) # Wait 5 seconds for indexing
7784
78- # Step 2: Demonstrate hierarchical search
79- print ("\n 2. Hierarchical Semantic Search " )
85+ # Step 2: Verify documents are indexed by searching
86+ print ("\n 2. Verifying Document Indexing " )
8087 print ("-" * 30 )
8188 async with CZeroEngineClient () as client :
82- # Search with hierarchy support
83- results = await client .semantic_search (
84- query = "How does AI and machine learning work? " ,
89+ # Search for content we just indexed, using workspace filter
90+ test_results = await client .semantic_search (
91+ query = "artificial intelligence " ,
8592 limit = 3 ,
86- include_hierarchy = True ,
87- hierarchy_level = None # Search all levels
93+ similarity_threshold = 0.3 , # Low threshold to ensure we find something
94+ workspace_filter = workspace_id # Search only in our workspace
8895 )
8996
90- print (f"Found { len (results .results )} results with hierarchy:" )
91- for i , res in enumerate (results .results , 1 ):
92- print (f"\n { i } . Score: { res .similarity :.3f} " )
93- print (f" { res .content [:100 ]} ..." )
94- if res .parent_chunk :
95- print (f" ↳ Has parent context" )
97+ if test_results .results :
98+ print (f"✅ Found { len (test_results .results )} indexed documents" )
99+ for i , res in enumerate (test_results .results , 1 ):
100+ print (f" { i } . Score: { res .similarity :.3f} " )
101+ print (f" { res .content [:100 ]} ..." )
102+ else :
103+ print ("⚠️ Documents may still be indexing. Continuing with example..." )
96104
97105 # Step 3: Use RAG for Q&A
98106 print ("\n 3. RAG-Enhanced Q&A" )
99107 print ("-" * 30 )
108+ print ("Note: RAG searches across all workspaces, not just the one we created." )
100109 async with RAGWorkflow () as rag_workflow :
101110
102- # Ask questions with RAG
111+ # Ask questions that match our indexed documents
103112 questions = [
104- "What is CZero Engine and what are its main features ?" ,
113+ "What is artificial intelligence and machine learning ?" ,
105114 "How does semantic search work?" ,
106- "What's the difference between AI, machine learning, and deep learning?" ,
107- "Does CZero Engine support GPU acceleration?"
115+ "What features does CZero Engine provide?"
108116 ]
109117
110118 for i , question in enumerate (questions , 1 ):
111119 print (f"\n 📝 Q{ i } : { question } " )
112120 response = await rag_workflow .ask (
113121 question = question ,
114122 chunk_limit = 3 ,
115- similarity_threshold = 0.5
123+ similarity_threshold = 0.3 # Lower threshold to be more inclusive
116124 )
117125 print (f"💡 A{ i } : { response .response [:250 ]} ..." )
118126
@@ -124,7 +132,7 @@ async def rag_example():
124132 # Step 4: Compare with and without RAG
125133 print ("\n 4. RAG vs Non-RAG Comparison" )
126134 print ("-" * 30 )
127- comparison_q = "What document processing features does CZero Engine provide ?"
135+ comparison_q = "What is machine learning and how does it relate to AI ?"
128136
129137 async with RAGWorkflow () as rag_workflow :
130138 comparison = await rag_workflow .compare_with_without_rag (
@@ -133,28 +141,31 @@ async def rag_example():
133141
134142 print (f"\n 🤔 Question: { comparison_q } " )
135143 print ("\n ❌ Without RAG (generic response):" )
136- print (f" { comparison ['without_rag' ][:200 ]} ..." )
144+ print (f" { comparison ['without_rag' ]. response [:200 ]} ..." )
137145 print ("\n ✅ With RAG (context-aware):" )
138- print (f" { comparison ['with_rag' ][:200 ]} ..." )
146+ print (f" { comparison ['with_rag' ]. response [:200 ]} ..." )
139147 print (f"\n 📊 Statistics:" )
140- print (f" Context chunks used: { comparison ['chunks_used' ]} " )
148+ chunks_used = len (comparison ['with_rag' ].context_used ) if comparison ['with_rag' ].context_used else 0
149+ print (f" Context chunks used: { chunks_used } " )
141150 print (f" Improvement: More specific and accurate with RAG" )
142151
143152 # Step 5: Find similar content
144153 print ("\n 5. Similarity Search" )
145154 print ("-" * 30 )
146155 async with CZeroEngineClient () as client :
147- # Get all chunks first
156+ # Search in our workspace for semantic search content
148157 search_res = await client .semantic_search (
149158 query = "semantic search" ,
150- limit = 1
159+ limit = 1 ,
160+ workspace_filter = workspace_id
151161 )
152162
153163 if search_res .results :
154164 chunk_id = search_res .results [0 ].chunk_id
155165 similar = await client .similarity_search (
156166 chunk_id = chunk_id ,
157- limit = 3
167+ limit = 3 ,
168+ similarity_threshold = 0.3 # Lower threshold
158169 )
159170
160171 print (f"Content similar to chunk '{ chunk_id [:20 ]} ...':\n " )
0 commit comments