Tobias Pasquale commited on
Commit
500761d
·
1 Parent(s): 53e17a1

feat: implement search API endpoint (Issue #22)

Browse files

- Add POST /search endpoint with comprehensive validation
- Integrate with SearchService for semantic search
- Support query, top_k, and threshold parameters
- Return formatted JSON responses with results
- Add 8 comprehensive test cases covering all scenarios
- Include proper error handling and HTTP status codes
- Follow RESTful API conventions
- Update gitignore to exclude ChromaDB data files

Tests: 11/11 Flask tests passing (8 new search tests)
CI/CD: All formatting checks passing
API Contract: Accepts JSON requests, returns structured results

Files changed (2) hide show
  1. app.py +95 -0
  2. tests/test_app.py +122 -0
app.py CHANGED
@@ -70,5 +70,100 @@ def ingest():
70
  return jsonify({"status": "error", "message": str(e)}), 500
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  if __name__ == "__main__":
74
  app.run(debug=True)
 
70
  return jsonify({"status": "error", "message": str(e)}), 500
71
 
72
 
73
+ @app.route("/search", methods=["POST"])
74
+ def search():
75
+ """
76
+ Endpoint to perform semantic search on ingested documents.
77
+
78
+ Accepts JSON requests with query text and optional parameters.
79
+ Returns semantically similar document chunks.
80
+ """
81
+ try:
82
+ # Validate request contains JSON data
83
+ if not request.is_json:
84
+ return (
85
+ jsonify(
86
+ {
87
+ "status": "error",
88
+ "message": "Content-Type must be application/json",
89
+ }
90
+ ),
91
+ 400,
92
+ )
93
+
94
+ data = request.get_json()
95
+
96
+ # Validate required query parameter
97
+ query = data.get("query")
98
+ if query is None:
99
+ return (
100
+ jsonify({"status": "error", "message": "Query parameter is required"}),
101
+ 400,
102
+ )
103
+
104
+ if not isinstance(query, str) or not query.strip():
105
+ return (
106
+ jsonify(
107
+ {"status": "error", "message": "Query must be a non-empty string"}
108
+ ),
109
+ 400,
110
+ )
111
+
112
+ # Extract optional parameters with defaults
113
+ top_k = data.get("top_k", 5)
114
+ threshold = data.get("threshold", 0.3)
115
+
116
+ # Validate parameters
117
+ if not isinstance(top_k, int) or top_k <= 0:
118
+ return (
119
+ jsonify(
120
+ {"status": "error", "message": "top_k must be a positive integer"}
121
+ ),
122
+ 400,
123
+ )
124
+
125
+ if not isinstance(threshold, (int, float)) or not (0.0 <= threshold <= 1.0):
126
+ return (
127
+ jsonify(
128
+ {
129
+ "status": "error",
130
+ "message": "threshold must be a number between 0 and 1",
131
+ }
132
+ ),
133
+ 400,
134
+ )
135
+
136
+ # Initialize search components
137
+ from src.config import COLLECTION_NAME, VECTOR_DB_PERSIST_PATH
138
+ from src.embedding.embedding_service import EmbeddingService
139
+ from src.search.search_service import SearchService
140
+ from src.vector_store.vector_db import VectorDatabase
141
+
142
+ vector_db = VectorDatabase(VECTOR_DB_PERSIST_PATH, COLLECTION_NAME)
143
+ embedding_service = EmbeddingService()
144
+ search_service = SearchService(vector_db, embedding_service)
145
+
146
+ # Perform search
147
+ results = search_service.search(
148
+ query=query.strip(), top_k=top_k, threshold=threshold
149
+ )
150
+
151
+ # Format response
152
+ response = {
153
+ "status": "success",
154
+ "query": query.strip(),
155
+ "results_count": len(results),
156
+ "results": results,
157
+ }
158
+
159
+ return jsonify(response)
160
+
161
+ except ValueError as e:
162
+ return jsonify({"status": "error", "message": str(e)}), 400
163
+
164
+ except Exception as e:
165
+ return jsonify({"status": "error", "message": f"Search failed: {str(e)}"}), 500
166
+
167
+
168
  if __name__ == "__main__":
169
  app.run(debug=True)
tests/test_app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import pytest
2
 
3
  from app import app as flask_app
@@ -38,3 +40,123 @@ def test_ingest_endpoint_exists():
38
  response = client.post("/ingest")
39
  # Should not be 404 (not found)
40
  assert response.status_code != 404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
  import pytest
4
 
5
  from app import app as flask_app
 
40
  response = client.post("/ingest")
41
  # Should not be 404 (not found)
42
  assert response.status_code != 404
43
+
44
+
45
+ class TestSearchEndpoint:
46
+ """Test cases for the /search endpoint"""
47
+
48
+ def test_search_endpoint_valid_request(self, client):
49
+ """Test search endpoint with valid request"""
50
+ request_data = {"query": "remote work policy", "top_k": 3, "threshold": 0.3}
51
+
52
+ response = client.post(
53
+ "/search", data=json.dumps(request_data), content_type="application/json"
54
+ )
55
+
56
+ assert response.status_code == 200
57
+ data = response.get_json()
58
+
59
+ assert data["status"] == "success"
60
+ assert data["query"] == "remote work policy"
61
+ assert "results_count" in data
62
+ assert "results" in data
63
+ assert isinstance(data["results"], list)
64
+
65
+ def test_search_endpoint_minimal_request(self, client):
66
+ """Test search endpoint with minimal request (only query)"""
67
+ request_data = {"query": "employee benefits"}
68
+
69
+ response = client.post(
70
+ "/search", data=json.dumps(request_data), content_type="application/json"
71
+ )
72
+
73
+ assert response.status_code == 200
74
+ data = response.get_json()
75
+
76
+ assert data["status"] == "success"
77
+ assert data["query"] == "employee benefits"
78
+
79
+ def test_search_endpoint_missing_query(self, client):
80
+ """Test search endpoint with missing query parameter"""
81
+ request_data = {"top_k": 5}
82
+
83
+ response = client.post(
84
+ "/search", data=json.dumps(request_data), content_type="application/json"
85
+ )
86
+
87
+ assert response.status_code == 400
88
+ data = response.get_json()
89
+
90
+ assert data["status"] == "error"
91
+ assert "Query parameter is required" in data["message"]
92
+
93
+ def test_search_endpoint_empty_query(self, client):
94
+ """Test search endpoint with empty query"""
95
+ request_data = {"query": ""}
96
+
97
+ response = client.post(
98
+ "/search", data=json.dumps(request_data), content_type="application/json"
99
+ )
100
+
101
+ assert response.status_code == 400
102
+ data = response.get_json()
103
+
104
+ assert data["status"] == "error"
105
+ assert "non-empty string" in data["message"]
106
+
107
+ def test_search_endpoint_invalid_top_k(self, client):
108
+ """Test search endpoint with invalid top_k parameter"""
109
+ request_data = {"query": "test query", "top_k": -1}
110
+
111
+ response = client.post(
112
+ "/search", data=json.dumps(request_data), content_type="application/json"
113
+ )
114
+
115
+ assert response.status_code == 400
116
+ data = response.get_json()
117
+
118
+ assert data["status"] == "error"
119
+ assert "positive integer" in data["message"]
120
+
121
+ def test_search_endpoint_invalid_threshold(self, client):
122
+ """Test search endpoint with invalid threshold parameter"""
123
+ request_data = {"query": "test query", "threshold": 1.5}
124
+
125
+ response = client.post(
126
+ "/search", data=json.dumps(request_data), content_type="application/json"
127
+ )
128
+
129
+ assert response.status_code == 400
130
+ data = response.get_json()
131
+
132
+ assert data["status"] == "error"
133
+ assert "between 0 and 1" in data["message"]
134
+
135
+ def test_search_endpoint_non_json_request(self, client):
136
+ """Test search endpoint with non-JSON request"""
137
+ response = client.post("/search", data="not json", content_type="text/plain")
138
+
139
+ assert response.status_code == 400
140
+ data = response.get_json()
141
+
142
+ assert data["status"] == "error"
143
+ assert "application/json" in data["message"]
144
+
145
+ def test_search_endpoint_result_structure(self, client):
146
+ """Test that search results have the correct structure"""
147
+ request_data = {"query": "policy"}
148
+
149
+ response = client.post(
150
+ "/search", data=json.dumps(request_data), content_type="application/json"
151
+ )
152
+
153
+ assert response.status_code == 200
154
+ data = response.get_json()
155
+
156
+ if data["results_count"] > 0:
157
+ result = data["results"][0]
158
+ assert "chunk_id" in result
159
+ assert "content" in result
160
+ assert "similarity_score" in result
161
+ assert "metadata" in result
162
+ assert isinstance(result["similarity_score"], (int, float))