Loading...
Loading...
Master Retrieval-Augmented Generation (RAG) to build LLM applications that provide accurate, grounded responses using external knowledge sources.
Purpose: Store and retrieve document embeddings efficiently
Options:
Purpose: Convert text to numerical vectors for similarity search
Models:
Approaches:
Purpose: Improve retrieval quality by reordering results
Methods:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
# 1. Load documents
loader = DirectoryLoader('./docs', glob="**/*.txt")
documents = loader.load()
# 2. Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_documents(documents)
# 3. Create embeddings and vector store
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(chunks, embeddings)
# 4. Create retrieval chain
qa_chain = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
return_source_documents=True
)
# 5. Query
result = qa_chain({"query": "What are the main features?"})
print(result['result'])
print(result['source_documents'])
from langchain.retrievers import BM25Retriever, EnsembleRetriever
# Sparse retriever (BM25)
bm25_retriever = BM25Retriever.from_documents(chunks)
bm25_retriever.k = 5
# Dense retriever (embeddings)
embedding_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
# Combine with weights
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, embedding_retriever],
weights=[0.3, 0.7]
)
from langchain.retrievers.multi_query import MultiQueryRetriever
# Generate multiple query perspectives
retriever = MultiQueryRetriever.from_llm(
retriever=vectorstore.as_retriever(),
llm=OpenAI()
)
# Single query → multiple variations → combined results
results = retriever.get_relevant_documents("What is the main topic?")
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor,
base_retriever=vectorstore.as_retriever()
)
# Returns only relevant parts of documents
compressed_docs = compression_retriever.get_relevant_documents("query")
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
# Store for parent documents
store = InMemoryStore()
# Small chunks for retrieval, large chunks for context
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
retriever = ParentDocumentRetriever(
vectorstore=vectorstore,
docstore=store,
child_splitter=child_splitter,
parent_splitter=parent_splitter
)
from langchain.text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len,
separators=["\n\n", "\n", " ", ""] # Try these in order
)
from langchain.text_splitters import TokenTextSplitter
splitter = TokenTextSplitter(
chunk_size=512,
chunk_overlap=50
)
from langchain.text_splitters import SemanticChunker
splitter = SemanticChunker(
embeddings=OpenAIEmbeddings(),
breakpoint_threshold_type="percentile"
)
from langchain.text_splitters import MarkdownHeaderTextSplitter
headers_to_split_on = [
("#", "Header 1"),
("##", "Header 2"),
("###", "Header 3"),
]
splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
import pinecone
from langchain.vectorstores import Pinecone
pinecone.init(api_key="your-api-key", environment="us-west1-gcp")
index = pinecone.Index("your-index-name")
vectorstore = Pinecone(index, embeddings.embed_query, "text")
import weaviate
from langchain.vectorstores import Weaviate
client = weaviate.Client("http://localhost:8080")
vectorstore = Weaviate(client, "Document", "content", embeddings)
from langchain.vectorstores import Chroma
vectorstore = Chroma(
collection_name="my_collection",
embedding_function=embeddings,
persist_directory="./chroma_db"
)
# Add metadata during indexing
chunks_with_metadata = []
for i, chunk in enumerate(chunks):
chunk.metadata = {
"source": chunk.metadata.get("source"),
"page": i,
"category": determine_category(chunk.page_content)
}
chunks_with_metadata.append(chunk)
# Filter during retrieval
results = vectorstore.similarity_search(
"query",
filter={"category": "technical"},
k=5
)
# Balance relevance with diversity
results = vectorstore.max_marginal_relevance_search(
"query",
k=5,
fetch_k=20, # Fetch 20, return top 5 diverse
lambda_mult=0.5 # 0=max diversity, 1=max relevance
)
from sentence_transformers import CrossEncoder
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
# Get initial results
candidates = vectorstore.similarity_search("query", k=20)
# Rerank
pairs = [[query, doc.page_content] for doc in candidates]
scores = reranker.predict(pairs)
# Sort by score and take top k
reranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)[:5]
prompt_template = """Use the following context to answer the question. If you cannot answer based on the context, say "I don't have enough information."
Context:
{context}
Question: {question}
Answer:"""
prompt_template = """Answer the question based on the context below. Include citations using [1], [2], etc.
Context:
{context}
Question: {question}
Answer (with citations):"""
prompt_template = """Answer the question using the context. Provide a confidence score (0-100%) for your answer.
Context:
{context}
Question: {question}
Answer:
Confidence:"""
def evaluate_rag_system(qa_chain, test_cases):
metrics = {
'accuracy': [],
'retrieval_quality': [],
'groundedness': []
}
for test in test_cases:
result = qa_chain({"query": test['question']})
# Check if answer matches expected
accuracy = calculate_accuracy(result['result'], test['expected'])
metrics['accuracy'].append(accuracy)
# Check if relevant docs were retrieved
retrieval_quality = evaluate_retrieved_docs(
result['source_documents'],
test['relevant_docs']
)
metrics['retrieval_quality'].append(retrieval_quality)
# Check if answer is grounded in context
groundedness = check_groundedness(
result['result'],
result['source_documents']
)
metrics['groundedness'].append(groundedness)
return {k: sum(v)/len(v) for k, v in metrics.items()}
rag-implementation is an expert AI persona designed to improve your coding workflow. Build Retrieval-Augmented Generation (RAG) systems for LLM applications with vector databases and semantic search. Use when implementing knowledge-grounded AI, building document Q&A systems, or integrating LLMs with external knowledge bases. It provides senior-level context directly within your IDE.
To install the rag-implementation skill, download the package, extract the files to your project's .cursor/skills directory, and type @rag-implementation in your editor chat to activate the expert instructions.
Yes, the rag-implementation AI persona is completely free to download and integrate into compatible Agentic IDEs like Cursor, Windsurf, Github Copilot, and Anthropic MCP servers.
Build Retrieval-Augmented Generation (RAG) systems for LLM applications with vector databases and semantic search. Use when implementing knowledge-grounded AI, building document Q&A systems, or integrating LLMs with external knowledge bases.
Download Skill Package.cursor/skills@rag-implementation in editor chat.Copy the instructions from the panel on the left and paste them into your custom instructions setting.
"Adding this rag-implementation persona to my Cursor workspace completely changed the quality of code my AI generates. Saves me hours every week."
Developers who downloaded rag-implementation also use these elite AI personas.
Expert in building 3D experiences for the web - Three.js, React Three Fiber, Spline, WebGL, and interactive 3D scenes. Covers product configurators, 3D portfolios, immersive websites, and bringing depth to web experiences. Use when: 3D website, three.js, WebGL, react three fiber, 3D experience.
Structured guide for setting up A/B tests with mandatory gates for hypothesis, metrics, and execution readiness.
You are an accessibility expert specializing in WCAG compliance, inclusive design, and assistive technology compatibility. Conduct audits, identify barriers, and provide remediation guidance.
Explore our most popular utilities designed for the modern Indian creator.