 
  
  
 pip install deepevalfrom deepeval.metrics.ragas import (
    RAGASContextualPrecisionMetric,
    RAGASFaithfulnessMetric,
    RAGASContextualRecallMetric,
    RAGASAnswerRelevancyMetric,
)
contextual_precision = RAGASContextualPrecisionMetric()
contextual_recall = RAGASContextualRecallMetric()
answer_relevancy = RAGASAnswerRelevancyMetric()
faithfulness = RAGASFaithfulnessMetric()from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCaseParams
bias = GEval(
    name="Bias",
    criteria="Coherence - determine if the actual output has an inherent bias against Asian culture.",
    evaluation_params=[LLMTestCaseParams.ACTUAL_OUTPUT],
)from deepeval import evaluate
from deepeval.test_case import LLMTestCase
test_case = LLMTestCase(
  input="",
  actual_output="",
  expected_output="",
  retrieval_context=[""]
)
evaluate(
  test_cases=[test_case], 
  metrics=[
    contextual_precision, 
    contextual_recall, 
    answer_relevancy, 
    faithfulness,
    bias
  ]
)pip install deepeval
deepeval logintouch test_rag.pyfrom deepeval.metrics.ragas import (
    RAGASContextualPrecisionMetric,
    RAGASFaithfulnessMetric,
    RAGASContextualRecallMetric,
    RAGASAnswerRelevancyMetric,
)
from deepeval.metrics import BiasMetric
bias = BiasMetric(threshold=
0.5
)
contextual_precision = RAGASContextualPrecisionMetric(threshold=
0.5
)
contextual_recall = RAGASContextualRecallMetric(threshold=
0.5
)
answer_relevancy = RAGASAnswerRelevancyMetric(threshold=
0.5
)
faithfulness = RAGASFaithfulnessMetric(threshold=
0.5
)...
# Replace this with your own data
input_output_pairs = [
  {
    "input": "...",
    "expected_output": "...", 
  },
  {
    "input": "...",
    "expected_output": "...", 
  }
]import pytest
from deepeval import assert_test
from deepeval.metrics.ragas import (
    RAGASContextualPrecisionMetric,
    RAGASFaithfulnessMetric,
    RAGASContextualRecallMetric,
    RAGASAnswerRelevancyMetric,
)
from deepeval.metrics import BiasMetric
from deepeval.test_case import LLMTestCase
#######################################
# Initialize metrics with thresholds ##
#######################################
bias = BiasMetric(threshold=
0.5
)
contextual_precision = RAGASContextualPrecisionMetric(threshold=
0.5
)
contextual_recall = RAGASContextualRecallMetric(threshold=
0.5
)
answer_relevancy = RAGASAnswerRelevancyMetric(threshold=
0.5
)
faithfulness = RAGASFaithfulnessMetric(threshold=
0.5
)
#######################################
# Specify evaluation metrics to use ###
#######################################
evaluation_metrics = [
  bias,
  contextual_precision,
  contextual_recall,
  answer_relevancy,
  faithfulness
]
#######################################
# Specify inputs to test RAG app on ###
#######################################
input_output_pairs = [
  {
    "input": "",
    "expected_output": "", 
  },
  {
    "input": "",
    "expected_output": "", 
  }
]
#######################################
# Loop through input output pairs #####
#######################################
@pytest.mark.parametrize(
    "input_output_pair",
    input_output_pairs,
)
def test_llamaindex(input_output_pair: Dict):
    input = input_output_pair.get("input", None)
    expected_output = input_output_pair.get("expected_output", None)
    # Hypothentical RAG application for demonstration only. 
    # Replace this with your own RAG implementation.
    # The idea is you'll be generating LLM outputs and
    # getting the retrieval context at evaluation time for each input
    actual_output = rag_application.query(input)
    retrieval_context = rag_application.get_retrieval_context()
    test_case = LLMTestCase(
        input=input,
        actual_output=actual_output,
        retrieval_context=retrieval_context,
        expected_output=expected_output
    )
    # assert test case
    assert_test(test_case, evaluation_metrics)deepeval test run test_rag.pyname: RAG Deployment Evaluations
on:
	push:
  
jobs:  
	test:    
  	runs-on: ubuntu-latest
		steps:
    	# Some extra steps to setup and install dependencies
    	...
      
       # Optional Login
     	- name: Login to Confident
        env:
          CONFIDENT_API_KEY: ${{ secrets.CONFIDENT_API_KEY }}
        run: poetry run deepeval login --confident-api-key "$CONFIDENT_API_KEY"
        
      - name: Run deepeval tests
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: poetry run deepeval test run test_rag.py