Spaces:
Sleeping
Sleeping
Tobias Pasquale
commited on
Commit
·
a52e676
1
Parent(s):
9452a54
style: Apply black formatting to linting fixes
Browse filesAuto-formatted by pre-commit hook:
- tests/test_guardrails/test_enhanced_rag_pipeline.py
- src/guardrails/response_validator.py
- src/guardrails/guardrails_system.py
All files now pass black, isort, and flake8 validation
src/guardrails/guardrails_system.py
CHANGED
|
@@ -565,9 +565,8 @@ class GuardrailsSystem:
|
|
| 565 |
)
|
| 566 |
|
| 567 |
if not result.is_approved:
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
)
|
| 571 |
|
| 572 |
if result.fallbacks_applied:
|
| 573 |
logger.warning(f"Fallbacks applied: {result.fallbacks_applied}")
|
|
|
|
| 565 |
)
|
| 566 |
|
| 567 |
if not result.is_approved:
|
| 568 |
+
rejection_reason = result.metadata.get("rejection_reason", "unknown")
|
| 569 |
+
logger.warning(f"Response rejected: {rejection_reason}")
|
|
|
|
| 570 |
|
| 571 |
if result.fallbacks_applied:
|
| 572 |
logger.warning(f"Fallbacks applied: {result.fallbacks_applied}")
|
src/guardrails/response_validator.py
CHANGED
|
@@ -298,7 +298,6 @@ class ResponseValidator:
|
|
| 298 |
|
| 299 |
def _calculate_completeness(self, response: str, query: str) -> float:
|
| 300 |
"""Calculate completeness score based on response length and structure."""
|
| 301 |
-
min_length = self.config["min_response_length"]
|
| 302 |
target_length = 200 # Ideal response length
|
| 303 |
|
| 304 |
# Length-based score
|
|
@@ -402,14 +401,12 @@ class ResponseValidator:
|
|
| 402 |
|
| 403 |
# Length validation
|
| 404 |
if len(response) < self.config["min_response_length"]:
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
)
|
| 408 |
|
| 409 |
if len(response) > self.config["max_response_length"]:
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
)
|
| 413 |
|
| 414 |
# Professional tone check (basic)
|
| 415 |
informal_patterns = [
|
|
|
|
| 298 |
|
| 299 |
def _calculate_completeness(self, response: str, query: str) -> float:
|
| 300 |
"""Calculate completeness score based on response length and structure."""
|
|
|
|
| 301 |
target_length = 200 # Ideal response length
|
| 302 |
|
| 303 |
# Length-based score
|
|
|
|
| 401 |
|
| 402 |
# Length validation
|
| 403 |
if len(response) < self.config["min_response_length"]:
|
| 404 |
+
min_length = self.config["min_response_length"]
|
| 405 |
+
issues.append(f"Response too short (minimum {min_length} characters)")
|
|
|
|
| 406 |
|
| 407 |
if len(response) > self.config["max_response_length"]:
|
| 408 |
+
max_length = self.config["max_response_length"]
|
| 409 |
+
issues.append(f"Response too long (maximum {max_length} characters)")
|
|
|
|
| 410 |
|
| 411 |
# Professional tone check (basic)
|
| 412 |
informal_patterns = [
|
src/rag/enhanced_rag_pipeline.py
CHANGED
|
@@ -217,10 +217,12 @@ class EnhancedRAGPipeline:
|
|
| 217 |
)
|
| 218 |
|
| 219 |
if guardrails_result.quality_score.overall_score < 0.5:
|
| 220 |
-
|
| 221 |
-
"I couldn't generate a sufficiently detailed response to your
|
| 222 |
-
"Please try rephrasing your question or contact HR
|
|
|
|
| 223 |
)
|
|
|
|
| 224 |
|
| 225 |
if not guardrails_result.citations:
|
| 226 |
return (
|
|
@@ -283,7 +285,9 @@ class EnhancedRAGPipeline:
|
|
| 283 |
"relevance": guardrails_result.quality_score.relevance_score,
|
| 284 |
"completeness": guardrails_result.quality_score.completeness_score,
|
| 285 |
"coherence": guardrails_result.quality_score.coherence_score,
|
| 286 |
-
"source_fidelity":
|
|
|
|
|
|
|
| 287 |
},
|
| 288 |
"citations": [
|
| 289 |
{
|
|
|
|
| 217 |
)
|
| 218 |
|
| 219 |
if guardrails_result.quality_score.overall_score < 0.5:
|
| 220 |
+
low_quality_msg = (
|
| 221 |
+
"I couldn't generate a sufficiently detailed response to your "
|
| 222 |
+
"question. Please try rephrasing your question or contact HR "
|
| 223 |
+
"for more specific guidance."
|
| 224 |
)
|
| 225 |
+
return low_quality_msg
|
| 226 |
|
| 227 |
if not guardrails_result.citations:
|
| 228 |
return (
|
|
|
|
| 285 |
"relevance": guardrails_result.quality_score.relevance_score,
|
| 286 |
"completeness": guardrails_result.quality_score.completeness_score,
|
| 287 |
"coherence": guardrails_result.quality_score.coherence_score,
|
| 288 |
+
"source_fidelity": (
|
| 289 |
+
guardrails_result.quality_score.source_fidelity_score
|
| 290 |
+
),
|
| 291 |
},
|
| 292 |
"citations": [
|
| 293 |
{
|
tests/test_guardrails/test_enhanced_rag_pipeline.py
CHANGED
|
@@ -24,12 +24,21 @@ def test_enhanced_rag_pipeline_initialization():
|
|
| 24 |
def test_enhanced_rag_pipeline_successful_response():
|
| 25 |
"""Test enhanced pipeline with successful guardrails validation."""
|
| 26 |
# Mock base pipeline response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
base_response = RAGResponse(
|
| 28 |
-
answer=
|
| 29 |
sources=[
|
| 30 |
{
|
| 31 |
"metadata": {"filename": "remote_work_policy.md"},
|
| 32 |
-
"content":
|
|
|
|
|
|
|
|
|
|
| 33 |
"relevance_score": 0.9,
|
| 34 |
}
|
| 35 |
],
|
|
@@ -54,11 +63,13 @@ def test_enhanced_rag_pipeline_successful_response():
|
|
| 54 |
enhanced_pipeline = EnhancedRAGPipeline(base_pipeline, config)
|
| 55 |
|
| 56 |
# Generate answer
|
| 57 |
-
result = enhanced_pipeline.generate_answer("What is
|
| 58 |
|
| 59 |
-
# Verify response structure (may still fail validation but should return
|
|
|
|
| 60 |
assert isinstance(result, EnhancedRAGResponse)
|
| 61 |
-
# Note: These assertions may fail if guardrails are too strict, but the
|
|
|
|
| 62 |
# assert result.success is True
|
| 63 |
# assert result.guardrails_approved is True
|
| 64 |
assert hasattr(result, "guardrails_approved")
|
|
|
|
| 24 |
def test_enhanced_rag_pipeline_successful_response():
|
| 25 |
"""Test enhanced pipeline with successful guardrails validation."""
|
| 26 |
# Mock base pipeline response
|
| 27 |
+
answer_text = (
|
| 28 |
+
"According to our remote work policy (remote_work_policy.md), "
|
| 29 |
+
"employees may work remotely with manager approval. The policy "
|
| 30 |
+
"states that remote work is allowed with proper approval and must "
|
| 31 |
+
"follow company guidelines."
|
| 32 |
+
)
|
| 33 |
base_response = RAGResponse(
|
| 34 |
+
answer=answer_text,
|
| 35 |
sources=[
|
| 36 |
{
|
| 37 |
"metadata": {"filename": "remote_work_policy.md"},
|
| 38 |
+
"content": (
|
| 39 |
+
"Remote work is allowed with proper approval. Employees "
|
| 40 |
+
"must obtain manager approval before working remotely."
|
| 41 |
+
),
|
| 42 |
"relevance_score": 0.9,
|
| 43 |
}
|
| 44 |
],
|
|
|
|
| 63 |
enhanced_pipeline = EnhancedRAGPipeline(base_pipeline, config)
|
| 64 |
|
| 65 |
# Generate answer
|
| 66 |
+
result = enhanced_pipeline.generate_answer("What is the remote work policy?")
|
| 67 |
|
| 68 |
+
# Verify response structure (may still fail validation but should return
|
| 69 |
+
# proper structure)
|
| 70 |
assert isinstance(result, EnhancedRAGResponse)
|
| 71 |
+
# Note: These assertions may fail if guardrails are too strict, but the
|
| 72 |
+
# enhanced pipeline should work
|
| 73 |
# assert result.success is True
|
| 74 |
# assert result.guardrails_approved is True
|
| 75 |
assert hasattr(result, "guardrails_approved")
|
tests/test_guardrails/test_guardrails_system.py
CHANGED
|
@@ -2,8 +2,6 @@
|
|
| 2 |
Test basic guardrails system functionality.
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
import pytest
|
| 6 |
-
|
| 7 |
from src.guardrails import GuardrailsSystem
|
| 8 |
|
| 9 |
|
|
@@ -24,7 +22,10 @@ def test_guardrails_system_basic_validation():
|
|
| 24 |
system = GuardrailsSystem()
|
| 25 |
|
| 26 |
# Test data
|
| 27 |
-
response =
|
|
|
|
|
|
|
|
|
|
| 28 |
query = "What is our remote work policy?"
|
| 29 |
sources = [
|
| 30 |
{
|
|
|
|
| 2 |
Test basic guardrails system functionality.
|
| 3 |
"""
|
| 4 |
|
|
|
|
|
|
|
| 5 |
from src.guardrails import GuardrailsSystem
|
| 6 |
|
| 7 |
|
|
|
|
| 22 |
system = GuardrailsSystem()
|
| 23 |
|
| 24 |
# Test data
|
| 25 |
+
response = (
|
| 26 |
+
"According to our employee handbook, remote work is allowed "
|
| 27 |
+
"with manager approval."
|
| 28 |
+
)
|
| 29 |
query = "What is our remote work policy?"
|
| 30 |
sources = [
|
| 31 |
{
|