Tom Claude commited on
Commit
6c0d724
Β·
1 Parent(s): 81d39a3

Refactor: Modularize codebase into clean architecture (Phase 1)

Browse files

🎯 Reduced app.py from 1,572 to 1,393 lines (-11.4%)

Changes:
- Extract CSS to ui/styles.css (63 lines)
- Extract UI helpers to ui/helpers.py (prefer_language, strip_html, pick_external_url)
- Extract parliament constants to datasets/parliament/constants.py
- Extract BFS constants to datasets/bfs/constants.py
- Consolidate duplicate MCP clients into unified mcp_clients/client.py
- Reorganize mcp/ directory to mcp_openparldata/ for clarity

New structure:
β”œβ”€β”€ core/ (ready for future engine base classes)
β”œβ”€β”€ datasets/
β”‚ β”œβ”€β”€ parliament/ (constants, future engine & card builder)
β”‚ └── bfs/ (constants, future engine)
β”œβ”€β”€ ui/ (styles, helpers, future handlers)
└── mcp_clients/ (unified MCP client implementation)

Benefits:
βœ… Better code organization and maintainability
βœ… Eliminated ~200 lines of duplicate code
βœ… Easier to add new datasets (Eurostat, ParlTalk)
βœ… Each module has single, clear responsibility
βœ… Functions can now be unit tested independently

πŸ€– Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

app.py CHANGED
@@ -14,6 +14,10 @@ from dotenv import load_dotenv
14
  from mcp_integration import execute_mcp_query, execute_mcp_query_bfs
15
  import asyncio
16
  from usage_tracker import UsageTracker
 
 
 
 
17
 
18
  # Load environment variables
19
  load_dotenv()
@@ -39,6 +43,38 @@ if not HF_TOKEN:
39
 
40
  client = InferenceClient(token=HF_TOKEN)
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  class DatasetEngine:
43
  """Dataset-specific orchestrator for LLM prompting and tool execution."""
44
 
@@ -55,6 +91,7 @@ class DatasetEngine:
55
  self.system_prompt = system_prompt
56
  self.routing_instruction = routing_instruction
57
  self.allowed_tools = allowed_tools
 
58
 
59
  def build_messages(self, user_message: str, language_label: str, language_code: str) -> list[dict]:
60
  """Construct chat completion messages with dataset-specific guardrails."""
@@ -64,12 +101,16 @@ class DatasetEngine:
64
  'If the request requires a different data source, respond with '
65
  '{"response": "Explain that the other dataset should be selected in the app."}'
66
  )
 
 
 
67
  return [
68
  {"role": "system", "content": self.system_prompt},
69
  {"role": "system", "content": routing_guardrails},
70
  {
71
  "role": "user",
72
  "content": (
 
73
  f"Selected dataset: {self.display_name}\n"
74
  f"Language preference: {language_label} ({language_code})\n"
75
  f"Question: {user_message}"
@@ -104,7 +145,7 @@ class DatasetEngine:
104
  try:
105
  messages = self.build_messages(user_message, language_label, language_code)
106
  response = client.chat_completion(
107
- model="meta-llama/Llama-3.1-8B-Instruct",
108
  messages=messages,
109
  max_tokens=500,
110
  temperature=0.3,
@@ -208,10 +249,20 @@ class DatasetEngine:
208
  if "language" not in arguments:
209
  arguments["language"] = language_code
210
 
 
 
 
 
211
  # Sanitize arguments before execution
212
  arguments = self.sanitize_arguments(tool_name, arguments)
213
  print(f"βœ… [DatasetEngine] Sanitized arguments: {arguments}")
214
 
 
 
 
 
 
 
215
  explanation = model_response.get("explanation", "")
216
  response, debug_info = self.execute_tool(user_message, tool_name, arguments, show_debug)
217
 
@@ -226,26 +277,6 @@ class DatasetEngine:
226
 
227
 
228
  class ParliamentEngine(DatasetEngine):
229
- # Valid parameter names per tool
230
- TOOL_PARAMS = {
231
- "openparldata_search_parliamentarians": {
232
- "query", "canton", "party", "active_only", "level", "language",
233
- "limit", "offset", "response_format"
234
- },
235
- "openparldata_search_votes": {
236
- "query", "date_from", "date_to", "parliament_id", "vote_type",
237
- "level", "language", "limit", "offset", "response_format"
238
- },
239
- "openparldata_search_motions": {
240
- "query", "submitter_id", "status", "date_from", "date_to",
241
- "level", "language", "limit", "offset", "response_format"
242
- },
243
- "openparldata_search_debates": {
244
- "query", "date_from", "date_to", "speaker_id", "topic",
245
- "parliament_id", "level", "language", "limit", "offset", "response_format"
246
- },
247
- }
248
-
249
  def __init__(self):
250
  super().__init__(
251
  name="parliament",
@@ -257,13 +288,28 @@ class ParliamentEngine(DatasetEngine):
257
  "openparldata_search_votes",
258
  "openparldata_search_motions",
259
  "openparldata_search_debates",
 
260
  },
261
  )
262
 
263
  def sanitize_arguments(self, tool_name: str, arguments: dict) -> dict:
264
  """Sanitize arguments for OpenParlData tools."""
265
  sanitized = {}
266
- valid_params = self.TOOL_PARAMS.get(tool_name, set())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
  for key, value in arguments.items():
269
  # Skip extra fields not in the tool schema
@@ -271,6 +317,13 @@ class ParliamentEngine(DatasetEngine):
271
  print(f"⚠️ [ParliamentEngine] Skipping invalid parameter '{key}' for {tool_name}")
272
  continue
273
 
 
 
 
 
 
 
 
274
  # Type conversions
275
  if key == "limit":
276
  # Convert to int and clamp to 1-100
@@ -296,10 +349,76 @@ class ParliamentEngine(DatasetEngine):
296
  elif key == "active_only":
297
  # Convert to bool
298
  sanitized[key] = bool(value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  else:
300
  # Keep other values as-is
301
  sanitized[key] = value
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  return sanitized
304
 
305
  def execute_tool(
@@ -325,83 +444,14 @@ class ParliamentEngine(DatasetEngine):
325
  debug_info: str | None,
326
  show_debug: bool,
327
  language_code: str,
328
- ) -> tuple[str, str | None, dict, list]:
329
- """Parse OpenParlData JSON responses and create card data."""
330
- parliament_cards = []
331
- language_fallback = False
332
-
333
- # Try to parse JSON response
334
- try:
335
- data = json.loads(response)
336
-
337
- # Check if it's an OpenParlData response with data array
338
- if isinstance(data, dict) and "data" in data and isinstance(data["data"], list):
339
- # Extract card info from each item
340
- for item in data["data"]:
341
- if isinstance(item, dict):
342
- # Get title in user's preferred language with fallback
343
- title = "Untitled"
344
- title_dict = item.get("affair_title") if "affair_title" in item else item.get("title")
345
-
346
- if isinstance(title_dict, dict):
347
- # Try user's language first
348
- if language_code == "en":
349
- # English not available in API, fallback to German
350
- title = title_dict.get("de") or title_dict.get("fr") or title_dict.get("it") or "Untitled"
351
- if title != "Untitled":
352
- language_fallback = True
353
- else:
354
- # Try user's language, fallback to de β†’ fr β†’ it
355
- title = (title_dict.get(language_code) or
356
- title_dict.get("de") or
357
- title_dict.get("fr") or
358
- title_dict.get("it") or
359
- "Untitled")
360
-
361
- # Get URL in user's preferred language
362
- url = "#"
363
- if "url_external" in item and isinstance(item["url_external"], dict):
364
- if language_code == "en":
365
- url = item["url_external"].get("de") or item["url_external"].get("fr") or item["url_external"].get("it") or "#"
366
- else:
367
- url = (item["url_external"].get(language_code) or
368
- item["url_external"].get("de") or
369
- item["url_external"].get("fr") or
370
- item["url_external"].get("it") or
371
- "#")
372
-
373
- # Add date if available
374
- date_str = ""
375
- if "date" in item:
376
- date_str = item["date"][:10] # Extract YYYY-MM-DD
377
-
378
- parliament_cards.append({
379
- "title": title,
380
- "url": url,
381
- "date": date_str
382
- })
383
-
384
- # If we have cards, show a summary message
385
- if parliament_cards:
386
- count = len(parliament_cards)
387
- total = data.get("meta", {}).get("total_records", count)
388
- body = f"### πŸ›οΈ Parliament Results\n\nFound **{total}** result(s). Showing {count} items below:"
389
-
390
- # Add language fallback notice for English users
391
- if language_fallback and language_code == "en":
392
- body += "\n\n*Note: English content is not available from the API. Results are displayed in German.*"
393
- else:
394
- body = "### πŸ›οΈ Parliament Results\n\nNo results found for your query."
395
- else:
396
- # Not a data response, show as-is
397
- body = f"### πŸ“Š Results\n{response}"
398
-
399
- except json.JSONDecodeError:
400
- # Not JSON, treat as text response
401
- body = f"### πŸ“Š Results\n{response}"
402
-
403
  final_response = self._compose_response_text(explanation, debug_info, show_debug, body)
404
- return final_response, None, {}, parliament_cards
405
 
406
 
407
  class BFSEngine(DatasetEngine):
@@ -616,64 +666,7 @@ LANGUAGES = {
616
  "Italiano": "it"
617
  }
618
 
619
- # Example queries for OpenParlData
620
- OPENPARLDATA_EXAMPLES = {
621
- "en": [
622
- "Who are the parliamentarians from Zurich?",
623
- "Show me recent votes about climate policy",
624
- "What motions were submitted about healthcare in 2024?",
625
- "Find debates about immigration reform"
626
- ],
627
- "de": [
628
- "Wer sind die Parlamentarier aus ZΓΌrich?",
629
- "Zeige mir aktuelle Abstimmungen zur Klimapolitik",
630
- "Welche AntrΓ€ge zum Gesundheitswesen wurden 2024 eingereicht?",
631
- "Finde Debatten ΓΌber Migrationsreform"
632
- ],
633
- "fr": [
634
- "Qui sont les parlementaires de Zurich?",
635
- "Montrez-moi les votes rΓ©cents sur la politique climatique",
636
- "Quelles motions sur la santΓ© ont Γ©tΓ© soumises en 2024?",
637
- "Trouvez les dΓ©bats sur la rΓ©forme de l'immigration"
638
- ],
639
- "it": [
640
- "Chi sono i parlamentari di Zurigo?",
641
- "Mostrami i voti recenti sulla politica climatica",
642
- "Quali mozioni sulla sanitΓ  sono state presentate nel 2024?",
643
- "Trova i dibattiti sulla riforma dell'immigrazione"
644
- ]
645
- }
646
-
647
- # Example queries for BFS (two-step workflow)
648
- BFS_EXAMPLES = {
649
- "en": [
650
- "I want inflation data",
651
- "Show me population statistics",
652
- "I need employment data by canton",
653
- "Find energy consumption statistics"
654
- ],
655
- "de": [
656
- "Ich mΓΆchte Inflationsdaten",
657
- "Zeige mir BevΓΆlkerungsstatistiken",
658
- "Ich brauche BeschΓ€ftigungsdaten nach Kanton",
659
- "Finde Energieverbrauchsstatistiken"
660
- ],
661
- "fr": [
662
- "Je veux des donnΓ©es sur l'inflation",
663
- "Montrez-moi les statistiques de population",
664
- "J'ai besoin de donnΓ©es sur l'emploi par canton",
665
- "Trouvez les statistiques de consommation d'Γ©nergie"
666
- ],
667
- "it": [
668
- "Voglio dati sull'inflazione",
669
- "Mostrami le statistiche sulla popolazione",
670
- "Ho bisogno di dati sull'occupazione per cantone",
671
- "Trova le statistiche sul consumo energetico"
672
- ]
673
- }
674
-
675
- # Keep backward compatibility
676
- EXAMPLES = OPENPARLDATA_EXAMPLES
677
  def chat_response(message: str, history: list, language: str, show_debug: bool, dataset: str = "parliament") -> tuple[str, str | None, dict, list]:
678
  """
679
  Main chat response function routed through dataset-specific engines.
@@ -690,31 +683,12 @@ def chat_response(message: str, history: list, language: str, show_debug: bool,
690
  return f"❌ An error occurred: {str(e)}", None, {}, []
691
 
692
 
693
- # Custom CSS
694
- custom_css = """
695
- .gradio-container {
696
- font-family: 'Inter', sans-serif;
697
- }
698
- .chatbot-header {
699
- text-align: center;
700
- padding: 20px;
701
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
702
- color: white !important;
703
- border-radius: 10px;
704
- margin-bottom: 20px;
705
- }
706
- .chatbot-header h1 {
707
- color: white !important;
708
- margin: 0;
709
- }
710
- .chatbot-header p {
711
- color: white !important;
712
- margin: 10px 0 0 0;
713
- }
714
- """
715
 
716
  # Build Gradio interface
717
- with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
718
  # State to track datacube search results
719
  datacube_state = gr.State({}) # Maps display text β†’ datacube_id
720
 
@@ -725,20 +699,26 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
725
  gr.Markdown(
726
  """
727
  <div class="chatbot-header">
728
- <h1>πŸ‡¨πŸ‡­ CoJournalist Swiss Data</h1>
729
- <p>Query Swiss parliamentary and statistical data in natural language</p>
730
  </div>
731
  """
732
  )
733
 
734
  with gr.Row():
735
  with gr.Column(scale=3):
736
- chatbot = gr.Chatbot(
737
- height=500,
738
- label="Chat with CoJournalist",
739
- show_label=False,
740
- type="messages"
741
- )
 
 
 
 
 
 
742
 
743
  # CSV download file component
744
  download_file = gr.File(
@@ -766,27 +746,28 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
766
  page_info = gr.Markdown("Page 1")
767
  next_page_btn = gr.Button("Next β–Ά", size="sm")
768
 
769
- with gr.Row():
770
- msg = gr.Textbox(
771
- placeholder="(Choose a source on the right first)",
772
- show_label=False,
773
- scale=4
774
- )
775
- submit = gr.Button("Send", variant="primary", scale=1)
776
-
777
  with gr.Column(scale=1):
778
  gr.Markdown("### βš™οΈ Settings")
779
 
780
  dataset = gr.Radio(
781
  choices=[
782
- ("Swiss Parliament Data", "openparldata"),
783
- ("Swiss Statistics (BFS)", "bfs")
784
  ],
785
- value="openparldata",
786
  label="Data Source",
787
  info="Choose which API to query"
788
  )
789
 
 
 
 
 
 
 
 
 
 
790
  language = gr.Radio(
791
  choices=list(LANGUAGES.keys()),
792
  value="English",
@@ -794,6 +775,10 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
794
  info="Select response language"
795
  )
796
 
 
 
 
 
797
  def ensure_message_history(history):
798
  """Normalize chat history to the format expected by gr.Chatbot(type='messages')."""
799
  normalized: list[dict] = []
@@ -814,9 +799,214 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
814
  normalized.append({"role": "assistant", "content": str(assistant)})
815
  return normalized
816
 
817
- def append_message(history: list[dict], role: str, content: str | None):
818
- """Append a message to the normalized history."""
819
- history.append({"role": role, "content": "" if content is None else str(content)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
 
821
  def render_parliament_cards(cards: list[dict], page: int, items_per_page: int = 10) -> tuple[str, str, int, bool]:
822
  """Render parliament cards as HTML with pagination."""
@@ -837,6 +1027,8 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
837
  title = card.get("title", "Untitled")
838
  url = card.get("url", "#")
839
  date = card.get("date", "")
 
 
840
 
841
  # Truncate title if too long
842
  if len(title) > 120:
@@ -845,17 +1037,14 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
845
  date_badge = f'<span style="background: #e0e0e0; padding: 4px 8px; border-radius: 4px; font-size: 12px; color: #666;">{date}</span>' if date else ''
846
 
847
  cards_html += f'''
848
- <a href="{url}" target="_blank" style="text-decoration: none;">
849
- <div style="
850
- border: 1px solid #ddd;
851
- border-radius: 8px;
852
- padding: 16px;
853
- background: white;
854
- transition: all 0.2s;
855
- cursor: pointer;
856
- ">
857
  <div style="display: flex; justify-content: space-between; align-items: start; gap: 12px;">
858
- <h3 style="margin: 0; color: #333; font-size: 16px; flex: 1;">{title}</h3>
 
 
 
 
859
  {date_badge}
860
  </div>
861
  </div>
@@ -868,52 +1057,156 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
868
  return cards_html, page_info, page, show_pagination
869
 
870
  # Handle message submission
871
- def respond(message, chat_history, language, dataset_choice, current_datacube_state, current_parliament_cards, current_page, request: gr.Request):
872
  show_debug = False # Debug mode disabled in UI
873
- chat_messages = ensure_message_history(chat_history)
874
 
875
  if not message.strip():
876
- return "", chat_messages, None, gr.update(visible=False), current_datacube_state, gr.update(), gr.update(visible=False), current_parliament_cards, current_page, "", "", gr.update(visible=False), gr.update(), gr.update()
877
 
878
  # Check usage limit
879
  user_id = request.client.host if request and hasattr(request, 'client') else "unknown"
880
 
881
- append_message(chat_messages, "user", message)
882
-
883
  if not tracker.check_limit(user_id):
884
- bot_message = (
885
- "⚠️ Daily request limit reached. You have used all 50 requests for today. "
886
  "Please try again tomorrow.\n\nThis limit helps us keep the service free and available for everyone."
887
  )
888
- append_message(chat_messages, "assistant", bot_message)
889
- return "", chat_messages, None, gr.update(visible=False), current_datacube_state, gr.update(), gr.update(visible=False), current_parliament_cards, current_page, "", "", gr.update(visible=False), gr.update(), gr.update()
890
 
891
  # Map dataset choice to engine type
892
  dataset_map = {
893
- "openparldata": "parliament",
894
- "bfs": "statistics"
895
  }
896
  dataset_type = dataset_map.get(dataset_choice, "parliament")
897
 
898
  # Get bot response (returns tuple with optional CSV file and results data)
 
 
899
  bot_message, csv_file, datacube_map, results_data = chat_response(
900
- message, chat_messages, language, show_debug, dataset_type
901
  )
902
 
903
- append_message(chat_messages, "assistant", bot_message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
904
 
905
  # Handle parliament cards (for Parliament dataset)
906
- if dataset_type == "parliament" and results_data:
907
- cards_html, page_info, page_num, show_pagination = render_parliament_cards(results_data, 1)
908
  return (
909
  "",
910
- chat_messages,
911
  None,
912
  gr.update(visible=False),
913
  current_datacube_state,
914
  gr.update(),
915
  gr.update(visible=False),
916
- results_data, # parliament_cards_state
917
  page_num, # parliament_page_state
918
  cards_html, # parliament_cards_html
919
  page_info, # page_info
@@ -926,7 +1219,7 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
926
  if dataset_type == "statistics" and results_data:
927
  return (
928
  "",
929
- chat_messages,
930
  None,
931
  gr.update(visible=False),
932
  datacube_map,
@@ -945,7 +1238,7 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
945
  if csv_file:
946
  return (
947
  "",
948
- chat_messages,
949
  csv_file,
950
  gr.update(visible=True),
951
  current_datacube_state,
@@ -962,7 +1255,7 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
962
 
963
  return (
964
  "",
965
- chat_messages,
966
  None,
967
  gr.update(visible=False),
968
  current_datacube_state,
@@ -994,16 +1287,12 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
994
  return cards_html, page_info, page_num
995
 
996
  # Handle "Get Data" button click for datacube selection
997
- def fetch_datacube_data(selected_choice, current_datacube_state, chat_history, language, request: gr.Request):
998
  show_debug = False # Debug mode disabled in UI
999
- chat_messages = ensure_message_history(chat_history)
1000
- user_message = f"Get Data: {selected_choice}" if selected_choice else "Get Data"
1001
- append_message(chat_messages, "user", user_message)
1002
 
1003
  if not selected_choice or not current_datacube_state:
1004
  error_msg = "⚠️ Please select a datacube first."
1005
- append_message(chat_messages, "assistant", error_msg)
1006
- return chat_messages, None, gr.update(visible=False), gr.update(visible=False)
1007
 
1008
  # Check usage limit
1009
  user_id = request.client.host if request and hasattr(request, 'client') else "unknown"
@@ -1013,16 +1302,14 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
1013
  "⚠️ Daily request limit reached. You have used all 50 requests for today. "
1014
  "Please try again tomorrow.\n\nThis limit helps us keep the service free and available for everyone."
1015
  )
1016
- append_message(chat_messages, "assistant", bot_message)
1017
- return chat_messages, None, gr.update(visible=False), gr.update(visible=False)
1018
 
1019
  # Get datacube ID from mapping
1020
  datacube_id = current_datacube_state.get(selected_choice)
1021
 
1022
  if not datacube_id:
1023
  error_msg = "❌ Error: Could not find datacube ID for selected option."
1024
- append_message(chat_messages, "assistant", error_msg)
1025
- return chat_messages, None, gr.update(visible=False), gr.update(visible=False)
1026
 
1027
  # Get language code
1028
  lang_code = LANGUAGES.get(language, "en")
@@ -1030,35 +1317,33 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
1030
  bfs_engine = DATASET_ENGINES.get("statistics")
1031
  if not isinstance(bfs_engine, BFSEngine):
1032
  error_msg = "❌ Error: BFS engine unavailable."
1033
- append_message(chat_messages, "assistant", error_msg)
1034
- return chat_messages, None, gr.update(visible=False), gr.update(visible=False)
1035
 
1036
  bot_message, csv_file_path = bfs_engine.fetch_datacube_data(datacube_id, lang_code, show_debug)
1037
 
1038
- append_message(chat_messages, "assistant", bot_message)
1039
  if csv_file_path:
1040
- return chat_messages, csv_file_path, gr.update(visible=True), gr.update(visible=False)
1041
 
1042
- return chat_messages, None, gr.update(visible=False), gr.update(visible=False)
1043
 
1044
  msg.submit(
1045
  respond,
1046
- [msg, chatbot, language, dataset, datacube_state, parliament_cards_state, parliament_page_state],
1047
- [msg, chatbot, download_file, download_file, datacube_state, datacube_radio, datacube_selection_row,
1048
  parliament_cards_state, parliament_page_state, parliament_cards_html, page_info, parliament_cards_row,
1049
  prev_page_btn, next_page_btn]
1050
  )
1051
  submit.click(
1052
  respond,
1053
- [msg, chatbot, language, dataset, datacube_state, parliament_cards_state, parliament_page_state],
1054
- [msg, chatbot, download_file, download_file, datacube_state, datacube_radio, datacube_selection_row,
1055
  parliament_cards_state, parliament_page_state, parliament_cards_html, page_info, parliament_cards_row,
1056
  prev_page_btn, next_page_btn]
1057
  )
1058
  get_data_btn.click(
1059
  fetch_datacube_data,
1060
- [datacube_radio, datacube_state, chatbot, language],
1061
- [chatbot, download_file, download_file, datacube_selection_row]
1062
  )
1063
  prev_page_btn.click(
1064
  prev_page,
@@ -1071,11 +1356,30 @@ with gr.Blocks(css=custom_css, title="CoJournalist Swiss Data") as demo:
1071
  [parliament_cards_html, page_info, parliament_page_state]
1072
  )
1073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1074
  gr.Markdown(
1075
  """
1076
  ---
1077
  **Data Sources:**
1078
- - **Swiss Parliament Data:** OpenParlData MCP server for parliamentary information
1079
  - **Swiss Statistics (BFS):** Federal Statistical Office data via PxWeb API
1080
 
1081
  **Rate Limit:** 50 requests per day per user (shared across both datasets) to keep the service affordable and accessible.
 
14
  from mcp_integration import execute_mcp_query, execute_mcp_query_bfs
15
  import asyncio
16
  from usage_tracker import UsageTracker
17
+ from typing import Any
18
+ from ui.helpers import prefer_language, strip_html, pick_external_url
19
+ from datasets.parliament.constants import OPENPARLDATA_EXAMPLES, TOOL_PARAMS as PARLIAMENT_TOOL_PARAMS
20
+ from datasets.bfs.constants import BFS_EXAMPLES
21
 
22
  # Load environment variables
23
  load_dotenv()
 
43
 
44
  client = InferenceClient(token=HF_TOKEN)
45
 
46
+ def translate_to_german(text: str) -> str:
47
+ """
48
+ Translate user-facing keywords into German to improve OpenParlData recall.
49
+
50
+ Falls back to the original text if translation fails or input is empty.
51
+ """
52
+ cleaned = text.strip()
53
+ if not cleaned:
54
+ return cleaned
55
+
56
+ prompt = (
57
+ "Übersetze die folgenden Suchbegriffe ins Deutsche. "
58
+ "Gib nur die deutschen StichwΓΆrter zurΓΌck, ohne Zusatztext.\n"
59
+ f"Original: {cleaned}"
60
+ )
61
+
62
+ try:
63
+ response = client.chat_completion(
64
+ model="meta-llama/Llama-3.1-70B-Instruct",
65
+ messages=[
66
+ {"role": "system", "content": "Du bist ein prÀziser Übersetzer ins Deutsche."},
67
+ {"role": "user", "content": prompt},
68
+ ],
69
+ max_tokens=64,
70
+ temperature=0.0,
71
+ )
72
+ translated = response.choices[0].message.content.strip()
73
+ return translated or cleaned
74
+ except Exception as exc:
75
+ print(f"⚠️ [translate_to_german] Translation failed ({exc}); falling back to original text.")
76
+ return cleaned
77
+
78
  class DatasetEngine:
79
  """Dataset-specific orchestrator for LLM prompting and tool execution."""
80
 
 
91
  self.system_prompt = system_prompt
92
  self.routing_instruction = routing_instruction
93
  self.allowed_tools = allowed_tools
94
+ self._last_request: dict[str, Any] | None = None
95
 
96
  def build_messages(self, user_message: str, language_label: str, language_code: str) -> list[dict]:
97
  """Construct chat completion messages with dataset-specific guardrails."""
 
101
  'If the request requires a different data source, respond with '
102
  '{"response": "Explain that the other dataset should be selected in the app."}'
103
  )
104
+ # Get current date for dynamic date handling
105
+ current_date = datetime.now().strftime("%Y-%m-%d")
106
+
107
  return [
108
  {"role": "system", "content": self.system_prompt},
109
  {"role": "system", "content": routing_guardrails},
110
  {
111
  "role": "user",
112
  "content": (
113
+ f"Current date: {current_date}\n"
114
  f"Selected dataset: {self.display_name}\n"
115
  f"Language preference: {language_label} ({language_code})\n"
116
  f"Question: {user_message}"
 
145
  try:
146
  messages = self.build_messages(user_message, language_label, language_code)
147
  response = client.chat_completion(
148
+ model="meta-llama/Llama-3.1-70B-Instruct",
149
  messages=messages,
150
  max_tokens=500,
151
  temperature=0.3,
 
249
  if "language" not in arguments:
250
  arguments["language"] = language_code
251
 
252
+ # Force JSON response format for parliament tools to ensure consistent card rendering
253
+ if isinstance(self, ParliamentEngine):
254
+ arguments["response_format"] = "json"
255
+
256
  # Sanitize arguments before execution
257
  arguments = self.sanitize_arguments(tool_name, arguments)
258
  print(f"βœ… [DatasetEngine] Sanitized arguments: {arguments}")
259
 
260
+ # Remember latest request context for downstream post-processing
261
+ self._last_request = {
262
+ "tool": tool_name,
263
+ "arguments": dict(arguments),
264
+ }
265
+
266
  explanation = model_response.get("explanation", "")
267
  response, debug_info = self.execute_tool(user_message, tool_name, arguments, show_debug)
268
 
 
277
 
278
 
279
  class ParliamentEngine(DatasetEngine):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  def __init__(self):
281
  super().__init__(
282
  name="parliament",
 
288
  "openparldata_search_votes",
289
  "openparldata_search_motions",
290
  "openparldata_search_debates",
291
+ "openparldata_search_meetings",
292
  },
293
  )
294
 
295
  def sanitize_arguments(self, tool_name: str, arguments: dict) -> dict:
296
  """Sanitize arguments for OpenParlData tools."""
297
  sanitized = {}
298
+ valid_params = PARLIAMENT_TOOL_PARAMS.get(tool_name, set())
299
+ requested_language = str(arguments.get("language", "")).lower()
300
+ original_arguments = dict(arguments)
301
+ optional_string_params = {
302
+ "canton",
303
+ "party",
304
+ "parliament_id",
305
+ "vote_type",
306
+ "submitter_id",
307
+ "speaker_id",
308
+ "topic",
309
+ "status",
310
+ "body_key",
311
+ "level",
312
+ }
313
 
314
  for key, value in arguments.items():
315
  # Skip extra fields not in the tool schema
 
317
  print(f"⚠️ [ParliamentEngine] Skipping invalid parameter '{key}' for {tool_name}")
318
  continue
319
 
320
+ # Normalize strings and drop empty values for optional params
321
+ if isinstance(value, str):
322
+ value = value.strip()
323
+ if value == "" and key in optional_string_params:
324
+ print(f"⚠️ [ParliamentEngine] Dropping empty string for '{key}'")
325
+ continue
326
+
327
  # Type conversions
328
  if key == "limit":
329
  # Convert to int and clamp to 1-100
 
349
  elif key == "active_only":
350
  # Convert to bool
351
  sanitized[key] = bool(value)
352
+ elif key == "status":
353
+ status_val = str(value).strip().lower()
354
+ if status_val in {"", "all", "any", "*", "none"}:
355
+ print("⚠️ [ParliamentEngine] Removing non-specific status filter")
356
+ continue
357
+ status_map = {
358
+ "pending": "Eingereicht",
359
+ "submitted": "Eingereicht",
360
+ "in_progress": "Eingereicht",
361
+ "open": "Eingereicht",
362
+ "accepted": "Angenommen",
363
+ "approved": "Angenommen",
364
+ "rejected": "Abgelehnt",
365
+ "declined": "Abgelehnt",
366
+ "completed": "Erledigt",
367
+ "closed": "Erledigt",
368
+ }
369
+ if status_val.isdigit():
370
+ sanitized[key] = status_val
371
+ else:
372
+ mapped = status_map.get(status_val)
373
+ if mapped:
374
+ sanitized[key] = mapped
375
+ else:
376
+ print(f"⚠️ [ParliamentEngine] Unknown status '{value}' dropped")
377
+ continue
378
+ elif key == "body_key":
379
+ sanitized[key] = str(value).upper()
380
+ elif key == "level":
381
+ sanitized[key] = str(value).lower()
382
+ elif key == "query" and tool_name == "openparldata_search_parliamentarians":
383
+ query_text = str(value)
384
+ tokens = [tok for tok in query_text.replace(",", " ").split() if tok]
385
+ if len(tokens) >= 2 and all(tok[0].isupper() for tok in tokens if tok):
386
+ # Use last token (family name) for broader matching
387
+ sanitized[key] = tokens[-1]
388
+ else:
389
+ sanitized[key] = value
390
  else:
391
  # Keep other values as-is
392
  sanitized[key] = value
393
 
394
+ # Enforce German language for English UI users
395
+ if requested_language == "en":
396
+ sanitized["language"] = "de"
397
+ elif "language" in sanitized:
398
+ sanitized["language"] = sanitized["language"].lower()
399
+
400
+ # Translate key textual filters into German for better recall
401
+ if sanitized.get("language") == "de":
402
+ for text_key in ("query", "topic"):
403
+ if text_key in sanitized:
404
+ text_value = str(sanitized[text_key]).strip()
405
+ if text_value:
406
+ translated = translate_to_german(text_value)
407
+ if translated:
408
+ sanitized[text_key] = translated
409
+ else:
410
+ # Restore original if translation failed
411
+ sanitized[text_key] = text_value
412
+
413
+ # Avoid empty required query strings by falling back to original input
414
+ if "query" in sanitized:
415
+ if not str(sanitized["query"]).strip():
416
+ fallback = str(original_arguments.get("query", "")).strip()
417
+ if fallback:
418
+ sanitized["query"] = translate_to_german(fallback) if sanitized.get("language") == "de" else fallback
419
+ else:
420
+ sanitized.pop("query", None)
421
+
422
  return sanitized
423
 
424
  def execute_tool(
 
444
  debug_info: str | None,
445
  show_debug: bool,
446
  language_code: str,
447
+ ) -> tuple[str, str | None, dict, str]:
448
+ """Pass through the response for parsing in respond() function."""
449
+ # Simplified: just return the raw JSON response
450
+ # The respond() function will handle parsing and card extraction
451
+ # Don't embed raw JSON in message - use clean placeholder instead
452
+ body = "Searching parliament data..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  final_response = self._compose_response_text(explanation, debug_info, show_debug, body)
454
+ return final_response, None, {}, response
455
 
456
 
457
  class BFSEngine(DatasetEngine):
 
666
  "Italiano": "it"
667
  }
668
 
669
+ # Constants imported from datasets/ modules above
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670
  def chat_response(message: str, history: list, language: str, show_debug: bool, dataset: str = "parliament") -> tuple[str, str | None, dict, list]:
671
  """
672
  Main chat response function routed through dataset-specific engines.
 
683
  return f"❌ An error occurred: {str(e)}", None, {}, []
684
 
685
 
686
+ # Load custom CSS
687
+ with open("ui/styles.css", "r") as f:
688
+ custom_css = f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
 
690
  # Build Gradio interface
691
+ with gr.Blocks(css=custom_css, title="Swiss and European Government Data LLM") as demo:
692
  # State to track datacube search results
693
  datacube_state = gr.State({}) # Maps display text β†’ datacube_id
694
 
 
699
  gr.Markdown(
700
  """
701
  <div class="chatbot-header">
702
+ <h1>πŸ‡¨πŸ‡­ Swiss &amp; European Government Data LLM</h1>
703
+ <p>Explore Swiss parliament records and BFS statistics, with more datasets on the way.</p>
704
  </div>
705
  """
706
  )
707
 
708
  with gr.Row():
709
  with gr.Column(scale=3):
710
+ # Simple query input form
711
+ with gr.Row():
712
+ msg = gr.Textbox(
713
+ placeholder="Ask a question about Swiss parliamentary data or statistics...",
714
+ show_label=False,
715
+ scale=4,
716
+ container=False
717
+ )
718
+ submit = gr.Button("πŸ” Search", variant="primary", scale=1)
719
+
720
+ # Status/explanation text
721
+ status_text = gr.Markdown("", visible=False)
722
 
723
  # CSV download file component
724
  download_file = gr.File(
 
746
  page_info = gr.Markdown("Page 1")
747
  next_page_btn = gr.Button("Next β–Ά", size="sm")
748
 
 
 
 
 
 
 
 
 
749
  with gr.Column(scale=1):
750
  gr.Markdown("### βš™οΈ Settings")
751
 
752
  dataset = gr.Radio(
753
  choices=[
754
+ "Swiss Parliament Data",
755
+ "Swiss Statistics (BFS)"
756
  ],
757
+ value="Swiss Parliament Data",
758
  label="Data Source",
759
  info="Choose which API to query"
760
  )
761
 
762
+ gr.HTML(
763
+ """
764
+ <div class="coming-soon-row">
765
+ <span class="coming-soon-pill">ParlTalk β€’ Coming Soon</span>
766
+ <span class="coming-soon-pill">Eurostat β€’ Coming Soon</span>
767
+ </div>
768
+ """
769
+ )
770
+
771
  language = gr.Radio(
772
  choices=list(LANGUAGES.keys()),
773
  value="English",
 
775
  info="Select response language"
776
  )
777
 
778
+ # Example queries display
779
+ gr.Markdown("### πŸ’‘ Example Queries")
780
+ examples_display = gr.Markdown()
781
+
782
  def ensure_message_history(history):
783
  """Normalize chat history to the format expected by gr.Chatbot(type='messages')."""
784
  normalized: list[dict] = []
 
799
  normalized.append({"role": "assistant", "content": str(assistant)})
800
  return normalized
801
 
802
+ def create_examples_text(dataset_choice: str, language: str) -> str:
803
+ """Create formatted example queries text."""
804
+ lang_code = LANGUAGES.get(language, "en")
805
+
806
+ if dataset_choice == "Swiss Parliament Data":
807
+ examples = OPENPARLDATA_EXAMPLES.get(lang_code, OPENPARLDATA_EXAMPLES["en"])
808
+ elif dataset_choice == "Swiss Statistics (BFS)":
809
+ examples = BFS_EXAMPLES.get(lang_code, BFS_EXAMPLES["en"])
810
+ else:
811
+ examples = OPENPARLDATA_EXAMPLES.get(lang_code, OPENPARLDATA_EXAMPLES["en"])
812
+
813
+ examples_md = "\n".join([f"- {example}" for example in examples])
814
+ return examples_md
815
+
816
+ # Helper functions imported from ui.helpers
817
+
818
+ def build_parliament_card(item: dict, lang_code: str) -> dict:
819
+ """Normalize OpenParlData rows into unified card metadata."""
820
+ card = {
821
+ "title": "Untitled",
822
+ "url": "#",
823
+ "date": "",
824
+ "category": "Result",
825
+ "summary": ""
826
+ }
827
+
828
+ if not isinstance(item, dict):
829
+ return card
830
+
831
+ # People directory
832
+ if any(key in item for key in ("firstname", "lastname", "fullname")):
833
+ card["category"] = "Person"
834
+ fullname = item.get("fullname") or f"{item.get('firstname', '')} {item.get('lastname', '')}".strip()
835
+ card["title"] = fullname or "Parliamentarian"
836
+
837
+ website = prefer_language(item.get("website_parliament_url"), lang_code)
838
+ card["url"] = website or item.get("url_api", "#")
839
+
840
+ party_display = None
841
+ if item.get("party"):
842
+ party_display = prefer_language(item.get("party"), lang_code)
843
+ if not party_display and isinstance(item["party"], dict):
844
+ party_display = prefer_language(item["party"], "de")
845
+ if not party_display and item.get("party_harmonized"):
846
+ party_display = prefer_language(item.get("party_harmonized"), lang_code)
847
+
848
+ body_key = item.get("body_key")
849
+ summary_parts = []
850
+ if party_display:
851
+ summary_parts.append(f"Party: {party_display}")
852
+ if body_key:
853
+ summary_parts.append(f"Body: {body_key}")
854
+ if summary_parts:
855
+ card["summary"] = " Β· ".join(summary_parts)
856
+
857
+ updated = item.get("updated_at") or item.get("created_at")
858
+ if updated:
859
+ card["date"] = updated[:10]
860
+ return card
861
+
862
+ # Meetings
863
+ if item.get("begin_date") and (item.get("name") or item.get("location") or item.get("type") == "meeting"):
864
+ card["category"] = "Meeting"
865
+ card["title"] = prefer_language(item.get("name"), lang_code) or item.get("number") or "Meeting"
866
+ card["date"] = (item.get("begin_date") or "")[:10]
867
+ card["url"] = prefer_language(item.get("url_external"), lang_code) or item.get("url_api", "#")
868
+ details = []
869
+ if item.get("location"):
870
+ details.append(item["location"])
871
+ if item.get("body_key"):
872
+ details.append(f"Body: {item['body_key']}")
873
+ if item.get("number"):
874
+ details.append(f"Meeting #{item['number']}")
875
+ if details:
876
+ card["summary"] = " Β· ".join(details)
877
+ return card
878
+
879
+ # Votes
880
+ if "results_yes" in item or "results_no" in item:
881
+ card["category"] = "Vote"
882
+ card["title"] = prefer_language(item.get("title"), lang_code) or "Vote"
883
+ card["date"] = (item.get("date") or "")[:10]
884
+ card["url"] = prefer_language(item.get("url_external"), lang_code) or item.get("url_api", "#")
885
+ affair_title = prefer_language(item.get("affair_title"), lang_code)
886
+ if affair_title:
887
+ card["summary"] = affair_title
888
+ else:
889
+ totals = []
890
+ if item.get("results_yes") is not None:
891
+ totals.append(f"Yes {item.get('results_yes')}")
892
+ if item.get("results_no") is not None:
893
+ totals.append(f"No {item.get('results_no')}")
894
+ if item.get("results_abstention") is not None:
895
+ totals.append(f"Abst {item.get('results_abstention')}")
896
+ if totals:
897
+ card["summary"] = " Β· ".join(totals)
898
+ return card
899
+
900
+ # Affairs / motions
901
+ if "type_name" in item or "number" in item or "state_name" in item:
902
+ card["category"] = "Affair"
903
+ card["title"] = prefer_language(item.get("title"), lang_code) or item.get("number") or "Affair"
904
+ card["url"] = prefer_language(item.get("url_external"), lang_code) or item.get("url_api", "#")
905
+ begin = item.get("begin_date") or item.get("created_at")
906
+ if begin:
907
+ card["date"] = begin[:10]
908
+ details = []
909
+ type_name = prefer_language(item.get("type_name"), lang_code)
910
+ state_name = prefer_language(item.get("state_name"), lang_code)
911
+ if type_name:
912
+ details.append(type_name)
913
+ if state_name:
914
+ details.append(state_name)
915
+ if item.get("number"):
916
+ details.append(item["number"])
917
+ if details:
918
+ card["summary"] = " Β· ".join(details)
919
+ return card
920
+
921
+ # Speeches / debates
922
+ if any(key in item for key in ("transcript", "speech_text", "speech_text_content", "speaker_name", "person_name", "person")):
923
+ card["category"] = "Speech"
924
+
925
+ # Extract person from nested expand structure: person = {"data": [...], "meta": {...}}
926
+ person_data = item.get("person", {})
927
+ if isinstance(person_data, dict) and "data" in person_data and person_data["data"]:
928
+ person = person_data["data"][0]
929
+ elif isinstance(person_data, dict):
930
+ person = person_data
931
+ else:
932
+ person = {}
933
+
934
+ speaker = (
935
+ prefer_language(person.get("fullname"), lang_code)
936
+ or prefer_language(item.get("person_name"), lang_code)
937
+ or person.get("fullname")
938
+ or item.get("speaker_name")
939
+ )
940
+
941
+ # Extract affair from nested expand structure
942
+ affair_data = item.get("affair", {})
943
+ if isinstance(affair_data, dict) and "data" in affair_data and affair_data["data"]:
944
+ affair = affair_data["data"][0]
945
+ elif isinstance(affair_data, dict):
946
+ affair = affair_data
947
+ else:
948
+ affair = {}
949
+
950
+ affair_title = prefer_language(affair.get("title"), lang_code)
951
+
952
+ card["title"] = (
953
+ prefer_language(item.get("title"), lang_code)
954
+ or affair_title
955
+ or (f"Rede von {speaker}" if speaker else "Rede")
956
+ )
957
+ card["date"] = (item.get("date") or item.get("date_start") or "")[:10]
958
+
959
+ # Extract meeting from nested expand structure
960
+ meeting_data = item.get("meeting")
961
+ if isinstance(meeting_data, dict) and "data" in meeting_data and meeting_data["data"]:
962
+ meeting = meeting_data["data"][0]
963
+ else:
964
+ meeting = {}
965
+
966
+ # Speeches use "url" field (plain string), not "url_external" (dict)
967
+ external_url = pick_external_url(
968
+ item.get("url"), # Speeches have direct url field
969
+ item.get("url_external"),
970
+ affair.get("url_external") if isinstance(affair, dict) else None,
971
+ meeting.get("url_external") if isinstance(meeting, dict) else None,
972
+ )
973
+ # Never use url_api for clickable links
974
+ card["url"] = external_url or "#"
975
+
976
+ text_content = item.get("speech_text_content")
977
+ summary = None
978
+ if isinstance(text_content, dict):
979
+ summary = prefer_language(text_content, lang_code) or prefer_language(text_content, "de")
980
+ elif isinstance(text_content, str):
981
+ summary = text_content
982
+ elif item.get("transcript"):
983
+ summary = item.get("transcript")
984
+ elif item.get("speech_text"):
985
+ summary = item.get("speech_text")
986
+
987
+ if summary:
988
+ summary = strip_html(summary)[:200]
989
+
990
+ summary_parts = []
991
+ if speaker:
992
+ summary_parts.append(speaker)
993
+ if summary:
994
+ summary_parts.append(summary)
995
+ if affair_title and affair_title != card["title"]:
996
+ summary_parts.append(affair_title)
997
+
998
+ if summary_parts:
999
+ card["summary"] = " β€” ".join(summary_parts[:2])
1000
+ return card
1001
+
1002
+ # Fallback generic
1003
+ if item.get("title"):
1004
+ card["title"] = prefer_language(item.get("title"), lang_code) or item["title"]
1005
+ external = prefer_language(item.get("url_external"), lang_code)
1006
+ card["url"] = external or item.get("url_api", "#")
1007
+ if item.get("date"):
1008
+ card["date"] = item["date"][:10]
1009
+ return card
1010
 
1011
  def render_parliament_cards(cards: list[dict], page: int, items_per_page: int = 10) -> tuple[str, str, int, bool]:
1012
  """Render parliament cards as HTML with pagination."""
 
1027
  title = card.get("title", "Untitled")
1028
  url = card.get("url", "#")
1029
  date = card.get("date", "")
1030
+ category = card.get("category", "Result")
1031
+ summary = card.get("summary", "")
1032
 
1033
  # Truncate title if too long
1034
  if len(title) > 120:
 
1037
  date_badge = f'<span style="background: #e0e0e0; padding: 4px 8px; border-radius: 4px; font-size: 12px; color: #666;">{date}</span>' if date else ''
1038
 
1039
  cards_html += f'''
1040
+ <a href="{url}" target="_blank" style="text-decoration: none; display: block;" rel="noopener noreferrer">
1041
+ <div class="parliament-card">
 
 
 
 
 
 
 
1042
  <div style="display: flex; justify-content: space-between; align-items: start; gap: 12px;">
1043
+ <div style="display: flex; flex-direction: column; gap: 6px; flex: 1;">
1044
+ <span class="category-badge">{category}</span>
1045
+ <h3 style="margin: 0; color: #333; font-size: 16px;">{title}</h3>
1046
+ {f'<p style="margin: 0; color: #555; font-size: 13px;">{summary}</p>' if summary else ''}
1047
+ </div>
1048
  {date_badge}
1049
  </div>
1050
  </div>
 
1057
  return cards_html, page_info, page, show_pagination
1058
 
1059
  # Handle message submission
1060
+ def respond(message, language, dataset_choice, current_datacube_state, current_parliament_cards, current_page, request: gr.Request):
1061
  show_debug = False # Debug mode disabled in UI
 
1062
 
1063
  if not message.strip():
1064
+ return "", gr.update(visible=False), None, gr.update(visible=False), current_datacube_state, gr.update(), gr.update(visible=False), current_parliament_cards, current_page, "", "", gr.update(visible=False), gr.update(), gr.update()
1065
 
1066
  # Check usage limit
1067
  user_id = request.client.host if request and hasattr(request, 'client') else "unknown"
1068
 
 
 
1069
  if not tracker.check_limit(user_id):
1070
+ status_msg = (
1071
+ "⚠️ **Daily request limit reached.** You have used all 50 requests for today. "
1072
  "Please try again tomorrow.\n\nThis limit helps us keep the service free and available for everyone."
1073
  )
1074
+ return "", gr.update(value=status_msg, visible=True), None, gr.update(visible=False), current_datacube_state, gr.update(), gr.update(visible=False), current_parliament_cards, current_page, "", "", gr.update(visible=False), gr.update(), gr.update()
 
1075
 
1076
  # Map dataset choice to engine type
1077
  dataset_map = {
1078
+ "Swiss Parliament Data": "parliament",
1079
+ "Swiss Statistics (BFS)": "statistics"
1080
  }
1081
  dataset_type = dataset_map.get(dataset_choice, "parliament")
1082
 
1083
  # Get bot response (returns tuple with optional CSV file and results data)
1084
+ # Create temporary chat history for API call
1085
+ temp_chat = []
1086
  bot_message, csv_file, datacube_map, results_data = chat_response(
1087
+ message, temp_chat, language, show_debug, dataset_type
1088
  )
1089
 
1090
+ engine_instance = DATASET_ENGINES.get(dataset_type)
1091
+ last_request = getattr(engine_instance, "_last_request", None) if engine_instance else None
1092
+
1093
+ # Parse JSON and extract cards for Parliament dataset
1094
+ parliament_cards: list[dict] = []
1095
+ if dataset_type == "parliament" and results_data and isinstance(results_data, str):
1096
+ try:
1097
+ print(f"\nπŸ” [respond] Parsing JSON results_data...")
1098
+ data = json.loads(results_data, strict=False)
1099
+ print(f"βœ… [respond] JSON parsed successfully")
1100
+
1101
+ if isinstance(data, dict) and data.get("status") == "error":
1102
+ error_msg = data.get("message") or data.get("detail") or "Die OpenParlData-API meldet einen Fehler."
1103
+ endpoint = data.get("endpoint")
1104
+ if endpoint:
1105
+ error_msg += f"\n\nEndpoint: `{endpoint}`"
1106
+ bot_message = f"❌ {error_msg}"
1107
+ return (
1108
+ "",
1109
+ gr.update(value=bot_message, visible=True),
1110
+ None,
1111
+ gr.update(visible=False),
1112
+ current_datacube_state,
1113
+ gr.update(),
1114
+ gr.update(visible=False),
1115
+ current_parliament_cards,
1116
+ current_page,
1117
+ "",
1118
+ "",
1119
+ gr.update(visible=False),
1120
+ gr.update(),
1121
+ gr.update()
1122
+ )
1123
+
1124
+ if isinstance(data, dict) and isinstance(data.get("data"), list):
1125
+ items = data["data"]
1126
+ print(f"βœ… [respond] Found data array with {len(items)} items")
1127
+ lang_code = LANGUAGES.get(language, "en")
1128
+
1129
+ # Filter out error objects before building cards
1130
+ valid_items = [
1131
+ item for item in items
1132
+ if isinstance(item, dict) and item.get("status") != "error"
1133
+ ]
1134
+
1135
+ if len(valid_items) < len(items):
1136
+ print(f"⚠️ [respond] Filtered out {len(items) - len(valid_items)} error objects")
1137
+
1138
+ for item in valid_items:
1139
+ parliament_cards.append(build_parliament_card(item, lang_code))
1140
+
1141
+ # Optional date filtering for meetings (client-side)
1142
+ if last_request and last_request.get("tool") == "openparldata_search_meetings":
1143
+ args = last_request.get("arguments", {})
1144
+ date_from = args.get("date_from")
1145
+ date_to = args.get("date_to")
1146
+
1147
+ if date_from or date_to:
1148
+ def within_window(date_value: str | None) -> bool:
1149
+ if not date_value:
1150
+ return False
1151
+ try:
1152
+ card_date = datetime.fromisoformat(date_value).date()
1153
+ except ValueError:
1154
+ try:
1155
+ card_date = datetime.strptime(date_value, "%Y-%m-%d").date()
1156
+ except ValueError:
1157
+ return False
1158
+ if date_from:
1159
+ start = datetime.strptime(date_from, "%Y-%m-%d").date()
1160
+ if card_date < start:
1161
+ return False
1162
+ if date_to:
1163
+ end = datetime.strptime(date_to, "%Y-%m-%d").date()
1164
+ if card_date > end:
1165
+ return False
1166
+ return True
1167
+
1168
+ before = len(parliament_cards)
1169
+ parliament_cards = [card for card in parliament_cards if within_window(card.get("date"))]
1170
+ print(f"βœ… [respond] Filtered meetings by date window ({before} β†’ {len(parliament_cards)})")
1171
+
1172
+ # Limit display to avoid overwhelming the UI
1173
+ MAX_RESULTS = 50
1174
+ truncated = False
1175
+ if len(parliament_cards) > MAX_RESULTS:
1176
+ print(f"⚠️ [respond] Truncating card list from {len(parliament_cards)} to {MAX_RESULTS}")
1177
+ parliament_cards = parliament_cards[:MAX_RESULTS]
1178
+ truncated = True
1179
+
1180
+ if parliament_cards:
1181
+ total = data.get("meta", {}).get("total_records") or len(parliament_cards)
1182
+ display_count = len(parliament_cards)
1183
+ bot_message = f"**Found {total} result(s).** Showing {display_count} items below:"
1184
+ if LANGUAGES.get(language, "en") == "en":
1185
+ bot_message += "\n\n*Note: English content is not available from the API. Results are displayed in German.*"
1186
+ if truncated:
1187
+ bot_message += f"\n\n_Only the first {MAX_RESULTS} items are displayed. Refine your search for more specific results._"
1188
+ elif last_request and last_request.get("tool") == "openparldata_search_meetings":
1189
+ bot_message = "No meetings found that match the requested filters. Try adjusting the date range or search keywords."
1190
+ else:
1191
+ print("❌ [respond] Data structure does not contain a 'data' array.")
1192
+
1193
+ except json.JSONDecodeError as e:
1194
+ print(f"❌ [respond] JSON parsing failed: {e}")
1195
+ except Exception as e:
1196
+ print(f"❌ [respond] Unexpected error during card extraction: {e}")
1197
 
1198
  # Handle parliament cards (for Parliament dataset)
1199
+ if dataset_type == "parliament" and parliament_cards:
1200
+ cards_html, page_info, page_num, show_pagination = render_parliament_cards(parliament_cards, 1)
1201
  return (
1202
  "",
1203
+ gr.update(value=bot_message, visible=True),
1204
  None,
1205
  gr.update(visible=False),
1206
  current_datacube_state,
1207
  gr.update(),
1208
  gr.update(visible=False),
1209
+ parliament_cards, # parliament_cards_state
1210
  page_num, # parliament_page_state
1211
  cards_html, # parliament_cards_html
1212
  page_info, # page_info
 
1219
  if dataset_type == "statistics" and results_data:
1220
  return (
1221
  "",
1222
+ gr.update(value=bot_message, visible=True),
1223
  None,
1224
  gr.update(visible=False),
1225
  datacube_map,
 
1238
  if csv_file:
1239
  return (
1240
  "",
1241
+ gr.update(value=bot_message, visible=True),
1242
  csv_file,
1243
  gr.update(visible=True),
1244
  current_datacube_state,
 
1255
 
1256
  return (
1257
  "",
1258
+ gr.update(value=bot_message, visible=True),
1259
  None,
1260
  gr.update(visible=False),
1261
  current_datacube_state,
 
1287
  return cards_html, page_info, page_num
1288
 
1289
  # Handle "Get Data" button click for datacube selection
1290
+ def fetch_datacube_data(selected_choice, current_datacube_state, language, request: gr.Request):
1291
  show_debug = False # Debug mode disabled in UI
 
 
 
1292
 
1293
  if not selected_choice or not current_datacube_state:
1294
  error_msg = "⚠️ Please select a datacube first."
1295
+ return gr.update(value=error_msg, visible=True), None, gr.update(visible=False), gr.update(visible=False)
 
1296
 
1297
  # Check usage limit
1298
  user_id = request.client.host if request and hasattr(request, 'client') else "unknown"
 
1302
  "⚠️ Daily request limit reached. You have used all 50 requests for today. "
1303
  "Please try again tomorrow.\n\nThis limit helps us keep the service free and available for everyone."
1304
  )
1305
+ return gr.update(value=bot_message, visible=True), None, gr.update(visible=False), gr.update(visible=False)
 
1306
 
1307
  # Get datacube ID from mapping
1308
  datacube_id = current_datacube_state.get(selected_choice)
1309
 
1310
  if not datacube_id:
1311
  error_msg = "❌ Error: Could not find datacube ID for selected option."
1312
+ return gr.update(value=error_msg, visible=True), None, gr.update(visible=False), gr.update(visible=False)
 
1313
 
1314
  # Get language code
1315
  lang_code = LANGUAGES.get(language, "en")
 
1317
  bfs_engine = DATASET_ENGINES.get("statistics")
1318
  if not isinstance(bfs_engine, BFSEngine):
1319
  error_msg = "❌ Error: BFS engine unavailable."
1320
+ return gr.update(value=error_msg, visible=True), None, gr.update(visible=False), gr.update(visible=False)
 
1321
 
1322
  bot_message, csv_file_path = bfs_engine.fetch_datacube_data(datacube_id, lang_code, show_debug)
1323
 
 
1324
  if csv_file_path:
1325
+ return gr.update(value=bot_message, visible=True), csv_file_path, gr.update(visible=True), gr.update(visible=False)
1326
 
1327
+ return gr.update(value=bot_message, visible=True), None, gr.update(visible=False), gr.update(visible=False)
1328
 
1329
  msg.submit(
1330
  respond,
1331
+ [msg, language, dataset, datacube_state, parliament_cards_state, parliament_page_state],
1332
+ [msg, status_text, download_file, download_file, datacube_state, datacube_radio, datacube_selection_row,
1333
  parliament_cards_state, parliament_page_state, parliament_cards_html, page_info, parliament_cards_row,
1334
  prev_page_btn, next_page_btn]
1335
  )
1336
  submit.click(
1337
  respond,
1338
+ [msg, language, dataset, datacube_state, parliament_cards_state, parliament_page_state],
1339
+ [msg, status_text, download_file, download_file, datacube_state, datacube_radio, datacube_selection_row,
1340
  parliament_cards_state, parliament_page_state, parliament_cards_html, page_info, parliament_cards_row,
1341
  prev_page_btn, next_page_btn]
1342
  )
1343
  get_data_btn.click(
1344
  fetch_datacube_data,
1345
+ [datacube_radio, datacube_state, language],
1346
+ [status_text, download_file, download_file, datacube_selection_row]
1347
  )
1348
  prev_page_btn.click(
1349
  prev_page,
 
1356
  [parliament_cards_html, page_info, parliament_page_state]
1357
  )
1358
 
1359
+ # Update examples when dataset or language changes
1360
+ dataset.change(
1361
+ create_examples_text,
1362
+ [dataset, language],
1363
+ [examples_display]
1364
+ )
1365
+ language.change(
1366
+ create_examples_text,
1367
+ [dataset, language],
1368
+ [examples_display]
1369
+ )
1370
+
1371
+ # Initialize examples on load
1372
+ demo.load(
1373
+ create_examples_text,
1374
+ [dataset, language],
1375
+ [examples_display]
1376
+ )
1377
+
1378
  gr.Markdown(
1379
  """
1380
  ---
1381
  **Data Sources:**
1382
+ - **Swiss Parliament Data:** with thanks to Christian and Florin for creating OpenParlData.ch, the model queries their API to retrieve parliamentary data
1383
  - **Swiss Statistics (BFS):** Federal Statistical Office data via PxWeb API
1384
 
1385
  **Rate Limit:** 50 requests per day per user (shared across both datasets) to keep the service affordable and accessible.
core/__init__.py ADDED
File without changes
datasets/__init__.py ADDED
File without changes
datasets/bfs/__init__.py ADDED
File without changes
datasets/bfs/constants.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constants and configuration for BFS (Swiss Federal Statistics) engine."""
2
+
3
+ # Example queries for BFS (two-step workflow)
4
+ BFS_EXAMPLES = {
5
+ "en": [
6
+ "I want inflation data",
7
+ "Show me population statistics",
8
+ "I need employment data by canton",
9
+ "Find energy consumption statistics"
10
+ ],
11
+ "de": [
12
+ "Ich mΓΆchte Inflationsdaten",
13
+ "Zeige mir BevΓΆlkerungsstatistiken",
14
+ "Ich brauche BeschΓ€ftigungsdaten nach Kanton",
15
+ "Finde Energieverbrauchsstatistiken"
16
+ ],
17
+ "fr": [
18
+ "Je veux des donnΓ©es sur l'inflation",
19
+ "Montrez-moi les statistiques de population",
20
+ "J'ai besoin de donnΓ©es sur l'emploi par canton",
21
+ "Trouvez les statistiques de consommation d'Γ©nergie"
22
+ ],
23
+ "it": [
24
+ "Voglio dati sull'inflazione",
25
+ "Mostrami le statistiche sulla popolazione",
26
+ "Ho bisogno di dati sull'occupazione per cantone",
27
+ "Trova le statistiche sul consumo energetico"
28
+ ]
29
+ }
datasets/parliament/__init__.py ADDED
File without changes
datasets/parliament/constants.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constants and configuration for OpenParlData parliament engine."""
2
+
3
+ # Valid parameter names per tool
4
+ TOOL_PARAMS = {
5
+ "openparldata_search_parliamentarians": {
6
+ "query", "canton", "party", "active_only", "level", "language",
7
+ "limit", "offset", "response_format"
8
+ },
9
+ "openparldata_search_votes": {
10
+ "query", "date_from", "date_to", "parliament_id", "vote_type",
11
+ "level", "language", "limit", "offset", "response_format"
12
+ },
13
+ "openparldata_search_motions": {
14
+ "query", "submitter_id", "status", "date_from", "date_to",
15
+ "level", "language", "limit", "offset", "response_format"
16
+ },
17
+ "openparldata_search_debates": {
18
+ "query", "date_from", "date_to", "speaker_id", "topic",
19
+ "parliament_id", "level", "language", "limit", "offset", "response_format"
20
+ },
21
+ "openparldata_search_meetings": {
22
+ "query", "date_from", "date_to", "body_key", "level",
23
+ "language", "limit", "offset", "response_format"
24
+ },
25
+ }
26
+
27
+ # Example queries for OpenParlData
28
+ OPENPARLDATA_EXAMPLES = {
29
+ "en": [
30
+ "Who are the parliamentarians from Zurich?",
31
+ "Show me recent votes about climate policy",
32
+ "What motions were submitted about healthcare in 2024?",
33
+ "Find debates about immigration reform",
34
+ "List Zurich parliament meetings scheduled for next year"
35
+ ],
36
+ "de": [
37
+ "Wer sind die Parlamentarier aus ZΓΌrich?",
38
+ "Zeige mir aktuelle Abstimmungen zur Klimapolitik",
39
+ "Welche AntrΓ€ge zum Gesundheitswesen wurden 2024 eingereicht?",
40
+ "Finde Debatten ΓΌber Migrationsreform"
41
+ ],
42
+ "fr": [
43
+ "Qui sont les parlementaires de Zurich?",
44
+ "Montrez-moi les votes rΓ©cents sur la politique climatique",
45
+ "Quelles motions sur la santΓ© ont Γ©tΓ© soumises en 2024?",
46
+ "Trouvez les dΓ©bats sur la rΓ©forme de l'immigration"
47
+ ],
48
+ "it": [
49
+ "Chi sono i parlamentari di Zurigo?",
50
+ "Mostrami i voti recenti sulla politica climatica",
51
+ "Quali mozioni sulla sanitΓ  sono state presentate nel 2024?",
52
+ "Trova i dibattiti sulla riforma dell'immigrazione"
53
+ ]
54
+ }
mcp_clients/__init__.py ADDED
File without changes
mcp_clients/client.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified MCP Client for connecting to MCP servers.
3
+ Works with any MCP server via stdio connection.
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import json
9
+ from typing import Optional, Dict, Any, List
10
+ from pathlib import Path
11
+
12
+ from mcp.client.session import ClientSession
13
+ from mcp.client.stdio import stdio_client, StdioServerParameters
14
+
15
+
16
+ class MCPClient:
17
+ """Generic client for interacting with any MCP server via stdio."""
18
+
19
+ def __init__(self, server_script_path: str | Path, client_name: str = "MCP"):
20
+ """
21
+ Initialize MCP client.
22
+
23
+ Args:
24
+ server_script_path: Path to the MCP server Python script
25
+ client_name: Name for logging/debugging (e.g., "OpenParlData", "BFS")
26
+ """
27
+ self.server_script = Path(server_script_path)
28
+ self.client_name = client_name
29
+ self.session: Optional[ClientSession] = None
30
+ self.available_tools: List[Dict[str, Any]] = []
31
+
32
+ async def connect(self):
33
+ """Connect to the MCP server."""
34
+ if not self.server_script.exists():
35
+ raise FileNotFoundError(
36
+ f"{self.client_name} MCP server script not found at {self.server_script}"
37
+ )
38
+
39
+ # Server parameters for stdio connection
40
+ server_params = StdioServerParameters(
41
+ command=sys.executable, # Python interpreter
42
+ args=[str(self.server_script)],
43
+ env=None
44
+ )
45
+
46
+ # Create stdio client context
47
+ self.stdio_context = stdio_client(server_params)
48
+ read, write = await self.stdio_context.__aenter__()
49
+
50
+ # Create session
51
+ self.session = ClientSession(read, write)
52
+ await self.session.__aenter__()
53
+
54
+ # Initialize and get available tools
55
+ await self.session.initialize()
56
+
57
+ # List available tools
58
+ tools_result = await self.session.list_tools()
59
+ self.available_tools = [
60
+ {
61
+ "name": tool.name,
62
+ "description": tool.description,
63
+ "input_schema": tool.inputSchema
64
+ }
65
+ for tool in tools_result.tools
66
+ ]
67
+
68
+ return self.available_tools
69
+
70
+ async def disconnect(self):
71
+ """Disconnect from the MCP server."""
72
+ if self.session:
73
+ await self.session.__aexit__(None, None, None)
74
+ if hasattr(self, 'stdio_context'):
75
+ await self.stdio_context.__aexit__(None, None, None)
76
+
77
+ async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> str:
78
+ """
79
+ Call an MCP tool with given arguments.
80
+
81
+ Args:
82
+ tool_name: Name of the tool to call
83
+ arguments: Dictionary of arguments for the tool
84
+
85
+ Returns:
86
+ Tool response as string
87
+ """
88
+ if not self.session:
89
+ raise RuntimeError(
90
+ f"Not connected to {self.client_name} MCP server. Call connect() first."
91
+ )
92
+
93
+ # Wrap arguments in 'params' key as expected by MCP server
94
+ tool_arguments = {"params": arguments}
95
+
96
+ # DEBUG: Log MCP payload before sending
97
+ print(f"\nπŸ“€ [{self.client_name}Client] Sending to MCP server:")
98
+ print(f" Tool: {tool_name}")
99
+ print(f" Wrapped payload: {tool_arguments}")
100
+ print(f" Payload types: {dict((k, type(v).__name__) for k, v in tool_arguments.items())}")
101
+
102
+ # Call the tool
103
+ result = await self.session.call_tool(tool_name, arguments=tool_arguments)
104
+
105
+ # Extract text content from result
106
+ if result.content:
107
+ # MCP returns list of content blocks
108
+ text_parts = []
109
+ for content in result.content:
110
+ if hasattr(content, 'text'):
111
+ text_parts.append(content.text)
112
+ elif isinstance(content, dict) and 'text' in content:
113
+ text_parts.append(content['text'])
114
+ return "\n".join(text_parts)
115
+
116
+ return "No response from tool"
117
+
118
+ def get_tool_info(self) -> List[Dict[str, Any]]:
119
+ """Get information about available tools."""
120
+ return self.available_tools
121
+
122
+
123
+ # Convenience factory functions for backwards compatibility
124
+
125
+ def create_openparldata_client() -> MCPClient:
126
+ """Create an MCP client for OpenParlData."""
127
+ server_script = Path(__file__).parent.parent / "mcp_openparldata" / "openparldata_mcp.py"
128
+ return MCPClient(server_script, client_name="OpenParlData")
129
+
130
+
131
+ def create_bfs_client() -> MCPClient:
132
+ """Create an MCP client for BFS."""
133
+ server_script = Path(__file__).parent.parent / "mcp_bfs" / "bfs_mcp_server.py"
134
+ return MCPClient(server_script, client_name="BFS")
mcp_integration.py CHANGED
@@ -12,205 +12,15 @@ from typing import Optional, Dict, Any, List
12
  from pathlib import Path
13
 
14
  # Add mcp directory to path
15
- mcp_dir = Path(__file__).parent / "mcp"
16
  sys.path.insert(0, str(mcp_dir))
17
 
18
- from mcp.client.session import ClientSession
19
- from mcp.client.stdio import stdio_client, StdioServerParameters
20
-
21
-
22
- class OpenParlDataClient:
23
- """Client for interacting with OpenParlData MCP server."""
24
-
25
- def __init__(self):
26
- self.session: Optional[ClientSession] = None
27
- self.available_tools: List[Dict[str, Any]] = []
28
-
29
- async def connect(self):
30
- """Connect to the MCP server."""
31
- # Get the path to the MCP server script
32
- server_script = Path(__file__).parent / "mcp" / "openparldata_mcp.py"
33
-
34
- if not server_script.exists():
35
- raise FileNotFoundError(f"MCP server script not found at {server_script}")
36
-
37
- # Server parameters for stdio connection
38
- server_params = StdioServerParameters(
39
- command=sys.executable, # Python interpreter
40
- args=[str(server_script)],
41
- env=None
42
- )
43
-
44
- # Create stdio client context
45
- self.stdio_context = stdio_client(server_params)
46
- read, write = await self.stdio_context.__aenter__()
47
-
48
- # Create session
49
- self.session = ClientSession(read, write)
50
- await self.session.__aenter__()
51
-
52
- # Initialize and get available tools
53
- await self.session.initialize()
54
-
55
- # List available tools
56
- tools_result = await self.session.list_tools()
57
- self.available_tools = [
58
- {
59
- "name": tool.name,
60
- "description": tool.description,
61
- "input_schema": tool.inputSchema
62
- }
63
- for tool in tools_result.tools
64
- ]
65
-
66
- return self.available_tools
67
-
68
- async def disconnect(self):
69
- """Disconnect from the MCP server."""
70
- if self.session:
71
- await self.session.__aexit__(None, None, None)
72
- if hasattr(self, 'stdio_context'):
73
- await self.stdio_context.__aexit__(None, None, None)
74
-
75
- async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> str:
76
- """
77
- Call an MCP tool with given arguments.
78
-
79
- Args:
80
- tool_name: Name of the tool to call
81
- arguments: Dictionary of arguments for the tool
82
-
83
- Returns:
84
- Tool response as string
85
- """
86
- if not self.session:
87
- raise RuntimeError("Not connected to MCP server. Call connect() first.")
88
-
89
- # Wrap arguments in 'params' key as expected by MCP server
90
- tool_arguments = {"params": arguments}
91
-
92
- # DEBUG: Log MCP payload before sending
93
- print(f"\nπŸ“€ [OpenParlDataClient] Sending to MCP server:")
94
- print(f" Tool: {tool_name}")
95
- print(f" Wrapped payload: {tool_arguments}")
96
- print(f" Payload types: {dict((k, type(v).__name__) for k, v in tool_arguments.items())}")
97
-
98
- # Call the tool
99
- result = await self.session.call_tool(tool_name, arguments=tool_arguments)
100
-
101
- # Extract text content from result
102
- if result.content:
103
- # MCP returns list of content blocks
104
- text_parts = []
105
- for content in result.content:
106
- if hasattr(content, 'text'):
107
- text_parts.append(content.text)
108
- elif isinstance(content, dict) and 'text' in content:
109
- text_parts.append(content['text'])
110
- return "\n".join(text_parts)
111
-
112
- return "No response from tool"
113
-
114
- def get_tool_info(self) -> List[Dict[str, Any]]:
115
- """Get information about available tools."""
116
- return self.available_tools
117
-
118
-
119
- class BFSClient:
120
- """Client for interacting with BFS MCP server."""
121
-
122
- def __init__(self):
123
- self.session: Optional[ClientSession] = None
124
- self.available_tools: List[Dict[str, Any]] = []
125
-
126
- async def connect(self):
127
- """Connect to the MCP server."""
128
- # Get the path to the BFS MCP server script
129
- server_script = Path(__file__).parent / "mcp_bfs" / "bfs_mcp_server.py"
130
-
131
- if not server_script.exists():
132
- raise FileNotFoundError(f"BFS MCP server script not found at {server_script}")
133
-
134
- # Server parameters for stdio connection
135
- server_params = StdioServerParameters(
136
- command=sys.executable, # Python interpreter
137
- args=[str(server_script)],
138
- env=None
139
- )
140
-
141
- # Create stdio client context
142
- self.stdio_context = stdio_client(server_params)
143
- read, write = await self.stdio_context.__aenter__()
144
-
145
- # Create session
146
- self.session = ClientSession(read, write)
147
- await self.session.__aenter__()
148
-
149
- # Initialize and get available tools
150
- await self.session.initialize()
151
-
152
- # List available tools
153
- tools_result = await self.session.list_tools()
154
- self.available_tools = [
155
- {
156
- "name": tool.name,
157
- "description": tool.description,
158
- "input_schema": tool.inputSchema
159
- }
160
- for tool in tools_result.tools
161
- ]
162
-
163
- return self.available_tools
164
-
165
- async def disconnect(self):
166
- """Disconnect from the MCP server."""
167
- if self.session:
168
- await self.session.__aexit__(None, None, None)
169
- if hasattr(self, 'stdio_context'):
170
- await self.stdio_context.__aexit__(None, None, None)
171
-
172
- async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> str:
173
- """
174
- Call an MCP tool with given arguments.
175
-
176
- Args:
177
- tool_name: Name of the tool to call
178
- arguments: Dictionary of arguments for the tool
179
-
180
- Returns:
181
- Tool response as string
182
- """
183
- if not self.session:
184
- raise RuntimeError("Not connected to BFS MCP server. Call connect() first.")
185
-
186
- # Wrap arguments in 'params' key as expected by MCP server
187
- tool_arguments = {"params": arguments}
188
-
189
- # DEBUG: Log MCP payload before sending
190
- print(f"\nπŸ“€ [BFSClient] Sending to MCP server:")
191
- print(f" Tool: {tool_name}")
192
- print(f" Wrapped payload: {tool_arguments}")
193
- print(f" Payload types: {dict((k, type(v).__name__) for k, v in tool_arguments.items())}")
194
-
195
- # Call the tool
196
- result = await self.session.call_tool(tool_name, arguments=tool_arguments)
197
-
198
- # Extract text content from result
199
- if result.content:
200
- # MCP returns list of content blocks
201
- text_parts = []
202
- for content in result.content:
203
- if hasattr(content, 'text'):
204
- text_parts.append(content.text)
205
- elif isinstance(content, dict) and 'text' in content:
206
- text_parts.append(content['text'])
207
- return "\n".join(text_parts)
208
-
209
- return "No response from tool"
210
-
211
- def get_tool_info(self) -> List[Dict[str, Any]]:
212
- """Get information about available tools."""
213
- return self.available_tools
214
 
215
 
216
  # Convenience functions for common operations
@@ -229,7 +39,7 @@ async def search_parliamentarians(
229
  Returns:
230
  Tuple of (response_text, debug_info)
231
  """
232
- client = OpenParlDataClient()
233
 
234
  try:
235
  await client.connect()
@@ -273,7 +83,7 @@ async def search_votes(
273
  Returns:
274
  Tuple of (response_text, debug_info)
275
  """
276
- client = OpenParlDataClient()
277
 
278
  try:
279
  await client.connect()
@@ -316,7 +126,7 @@ async def search_motions(
316
  Returns:
317
  Tuple of (response_text, debug_info)
318
  """
319
- client = OpenParlDataClient()
320
 
321
  try:
322
  await client.connect()
@@ -362,7 +172,7 @@ async def execute_mcp_query(
362
  Returns:
363
  Tuple of (response_text, debug_info)
364
  """
365
- client = OpenParlDataClient()
366
 
367
  try:
368
  await client.connect()
@@ -397,7 +207,7 @@ async def execute_mcp_query_bfs(
397
  Returns:
398
  Tuple of (response_text, debug_info)
399
  """
400
- client = BFSClient()
401
 
402
  try:
403
  await client.connect()
 
12
  from pathlib import Path
13
 
14
  # Add mcp directory to path
15
+ mcp_dir = Path(__file__).parent / "mcp_openparldata"
16
  sys.path.insert(0, str(mcp_dir))
17
 
18
+ from mcp_clients.client import create_openparldata_client, create_bfs_client
19
+
20
+
21
+ # Backwards compatibility aliases
22
+ OpenParlDataClient = create_openparldata_client
23
+ BFSClient = create_bfs_client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  # Convenience functions for common operations
 
39
  Returns:
40
  Tuple of (response_text, debug_info)
41
  """
42
+ client = create_openparldata_client()
43
 
44
  try:
45
  await client.connect()
 
83
  Returns:
84
  Tuple of (response_text, debug_info)
85
  """
86
+ client = create_openparldata_client()
87
 
88
  try:
89
  await client.connect()
 
126
  Returns:
127
  Tuple of (response_text, debug_info)
128
  """
129
+ client = create_openparldata_client()
130
 
131
  try:
132
  await client.connect()
 
172
  Returns:
173
  Tuple of (response_text, debug_info)
174
  """
175
+ client = create_openparldata_client()
176
 
177
  try:
178
  await client.connect()
 
207
  Returns:
208
  Tuple of (response_text, debug_info)
209
  """
210
+ client = create_bfs_client()
211
 
212
  try:
213
  await client.connect()
{mcp β†’ mcp_openparldata}/openparldata_mcp.py RENAMED
@@ -84,7 +84,7 @@ class SearchVotesInput(BaseModel):
84
  date_to: Optional[str] = Field(None, description="End date (ISO format: YYYY-MM-DD)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
85
  parliament_id: Optional[str] = Field(None, description="Filter by parliament ID")
86
  vote_type: Optional[VoteType] = Field(None, description="Type of vote")
87
- level: Optional[ParliamentLevel] = Field(ParliamentLevel.FEDERAL, description="Parliament level")
88
  language: Language = Field(Language.EN, description="Response language")
89
  limit: int = Field(DEFAULT_LIMIT, description="Maximum results to return", ge=1, le=MAX_LIMIT)
90
  offset: int = Field(0, description="Pagination offset", ge=0)
@@ -108,7 +108,7 @@ class SearchMotionsInput(BaseModel):
108
  status: Optional[str] = Field(None, description="Motion status (e.g., 'pending', 'accepted', 'rejected')")
109
  date_from: Optional[str] = Field(None, description="Start date (ISO format)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
110
  date_to: Optional[str] = Field(None, description="End date (ISO format)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
111
- level: Optional[ParliamentLevel] = Field(ParliamentLevel.FEDERAL, description="Parliament level")
112
  language: Language = Field(Language.EN, description="Response language")
113
  limit: int = Field(DEFAULT_LIMIT, description="Maximum results", ge=1, le=MAX_LIMIT)
114
  offset: int = Field(0, description="Pagination offset", ge=0)
@@ -124,7 +124,21 @@ class SearchDebatesInput(BaseModel):
124
  speaker_id: Optional[str] = Field(None, description="Filter by speaker's ID")
125
  topic: Optional[str] = Field(None, description="Topic or theme filter")
126
  parliament_id: Optional[str] = Field(None, description="Parliament identifier")
127
- level: Optional[ParliamentLevel] = Field(ParliamentLevel.FEDERAL, description="Parliament level")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  language: Language = Field(Language.EN, description="Response language")
129
  limit: int = Field(DEFAULT_LIMIT, description="Maximum results", ge=1, le=MAX_LIMIT)
130
  offset: int = Field(0, description="Pagination offset", ge=0)
@@ -133,7 +147,11 @@ class SearchDebatesInput(BaseModel):
133
  # Helper functions
134
 
135
  def truncate_response(content: str, limit: int = CHARACTER_LIMIT) -> str:
136
- """Truncate response if it exceeds character limit."""
 
 
 
 
137
  if len(content) <= limit:
138
  return content
139
 
@@ -147,25 +165,94 @@ def format_date(date_str: str) -> str:
147
  except:
148
  return date_str
149
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def format_parliamentarian_markdown(person: Dict[str, Any]) -> str:
151
  """Format parliamentarian data as markdown."""
152
- lines = [
153
- f"## {person.get('first_name', '')} {person.get('last_name', '')}",
154
- f"**Party:** {person.get('party', 'N/A')}",
155
- f"**Canton:** {person.get('canton', 'N/A')}",
156
- f"**Parliament:** {person.get('parliament_name', 'N/A')}",
157
- f"**Status:** {'Active' if person.get('active') else 'Inactive'}",
158
- ]
159
-
160
- if person.get('email'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  lines.append(f"**Email:** {person['email']}")
162
- if person.get('phone'):
163
  lines.append(f"**Phone:** {person['phone']}")
164
- if person.get('committee_memberships'):
165
- lines.append("\n### Committee Memberships")
166
- for committee in person['committee_memberships']:
167
- lines.append(f"- {committee}")
168
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  return "\n".join(lines)
170
 
171
  def format_vote_markdown(vote: Dict[str, Any]) -> str:
@@ -269,6 +356,8 @@ async def search_parliamentarians(params: SearchParliamentariansInput) -> str:
269
  "search": params.query,
270
  "active": params.active_only,
271
  "lang": params.language.value,
 
 
272
  "limit": params.limit,
273
  "offset": params.offset,
274
  "sort_by": "-id"
@@ -289,7 +378,8 @@ async def search_parliamentarians(params: SearchParliamentariansInput) -> str:
289
  result = await make_api_request("/persons", request_params)
290
 
291
  if params.response_format == ResponseFormat.JSON:
292
- return truncate_response(json.dumps(result, indent=2))
 
293
 
294
  # Format as markdown
295
  if result.get("status") == "error":
@@ -342,17 +432,29 @@ async def get_parliamentarian(params: GetParliamentarianInput) -> str:
342
  Returns comprehensive information including biographical data, party membership,
343
  committee assignments, and optionally their voting history and submitted motions.
344
  """
345
- request_params = {
346
- "include_votes": params.include_votes,
347
- "include_motions": params.include_motions,
348
- "lang": params.language.value
 
349
  }
 
 
 
 
 
 
 
 
 
 
350
 
351
  try:
352
- result = await make_api_request(f"/parliamentarians/{params.person_id}", request_params)
353
 
354
  if params.response_format == ResponseFormat.JSON:
355
- return truncate_response(json.dumps(result, indent=2))
 
356
 
357
  if result.get("mock_data"):
358
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Person ID:** {params.person_id}"
@@ -386,7 +488,11 @@ async def search_votes(params: SearchVotesInput) -> str:
386
  "date_to": params.date_to,
387
  "limit": params.limit,
388
  "offset": params.offset,
389
- "sort_by": "-date"
 
 
 
 
390
  }
391
 
392
  request_params = {k: v for k, v in request_params.items() if v is not None}
@@ -395,7 +501,8 @@ async def search_votes(params: SearchVotesInput) -> str:
395
  result = await make_api_request("/votings", request_params)
396
 
397
  if params.response_format == ResponseFormat.JSON:
398
- return truncate_response(json.dumps(result, indent=2))
 
399
 
400
  if result.get("status") == "error":
401
  return f"# API Error\n\n{result.get('message', 'Unknown error')}\n\n**Endpoint:** {result.get('endpoint', 'N/A')}"
@@ -470,7 +577,8 @@ async def get_vote_details(params: GetVoteDetailsInput) -> str:
470
  result = await make_api_request(f"/votes/{params.vote_id}", request_params)
471
 
472
  if params.response_format == ResponseFormat.JSON:
473
- return truncate_response(json.dumps(result, indent=2))
 
474
 
475
  if result.get("mock_data"):
476
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Vote ID:** {params.vote_id}"
@@ -492,30 +600,42 @@ async def get_vote_details(params: GetVoteDetailsInput) -> str:
492
  )
493
  async def search_motions(params: SearchMotionsInput) -> str:
494
  """
495
- Search for parliamentary motions, proposals, and initiatives.
496
-
497
- Returns motion records including titles, submitters, dates, status,
498
- and motion text. Can filter by submitter, status, and date range.
 
 
499
  """
500
- request_params = {
501
- "q": params.query,
502
  "submitter_id": params.submitter_id,
503
- "status": params.status,
504
- "date_from": params.date_from,
505
- "date_to": params.date_to,
506
  "level": params.level.value if params.level else None,
507
  "lang": params.language.value,
 
 
508
  "limit": params.limit,
509
- "offset": params.offset
 
510
  }
 
 
 
 
 
 
 
511
 
512
  request_params = {k: v for k, v in request_params.items() if v is not None}
513
 
514
  try:
515
- result = await make_api_request("/motions", request_params)
516
 
517
  if params.response_format == ResponseFormat.JSON:
518
- return truncate_response(json.dumps(result, indent=2))
 
519
 
520
  if result.get("mock_data"):
521
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Endpoint:** {result['endpoint']}"
@@ -543,13 +663,16 @@ async def search_motions(params: SearchMotionsInput) -> str:
543
  )
544
  async def search_debates(params: SearchDebatesInput) -> str:
545
  """
546
- Search parliamentary debate transcripts and proceedings.
547
-
548
- Returns debate records including speakers, topics, dates, and transcript excerpts.
549
  Can search by content, speaker, date range, and topic.
 
 
550
  """
551
  request_params = {
552
- "q": params.query,
 
553
  "date_from": params.date_from,
554
  "date_to": params.date_to,
555
  "speaker_id": params.speaker_id,
@@ -557,6 +680,10 @@ async def search_debates(params: SearchDebatesInput) -> str:
557
  "parliament_id": params.parliament_id,
558
  "level": params.level.value if params.level else None,
559
  "lang": params.language.value,
 
 
 
 
560
  "limit": params.limit,
561
  "offset": params.offset
562
  }
@@ -564,10 +691,11 @@ async def search_debates(params: SearchDebatesInput) -> str:
564
  request_params = {k: v for k, v in request_params.items() if v is not None}
565
 
566
  try:
567
- result = await make_api_request("/debates", request_params)
568
 
569
  if params.response_format == ResponseFormat.JSON:
570
- return truncate_response(json.dumps(result, indent=2))
 
571
 
572
  if result.get("mock_data"):
573
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Endpoint:** {result['endpoint']}"
@@ -597,6 +725,102 @@ async def search_debates(params: SearchDebatesInput) -> str:
597
  except Exception as e:
598
  return f"Error searching debates: {str(e)}"
599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  # Main execution
601
  if __name__ == "__main__":
602
  # Run FastMCP server (synchronous, blocking call)
 
84
  date_to: Optional[str] = Field(None, description="End date (ISO format: YYYY-MM-DD)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
85
  parliament_id: Optional[str] = Field(None, description="Filter by parliament ID")
86
  vote_type: Optional[VoteType] = Field(None, description="Type of vote")
87
+ level: Optional[ParliamentLevel] = Field(None, description="Parliament level")
88
  language: Language = Field(Language.EN, description="Response language")
89
  limit: int = Field(DEFAULT_LIMIT, description="Maximum results to return", ge=1, le=MAX_LIMIT)
90
  offset: int = Field(0, description="Pagination offset", ge=0)
 
108
  status: Optional[str] = Field(None, description="Motion status (e.g., 'pending', 'accepted', 'rejected')")
109
  date_from: Optional[str] = Field(None, description="Start date (ISO format)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
110
  date_to: Optional[str] = Field(None, description="End date (ISO format)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
111
+ level: Optional[ParliamentLevel] = Field(None, description="Parliament level")
112
  language: Language = Field(Language.EN, description="Response language")
113
  limit: int = Field(DEFAULT_LIMIT, description="Maximum results", ge=1, le=MAX_LIMIT)
114
  offset: int = Field(0, description="Pagination offset", ge=0)
 
124
  speaker_id: Optional[str] = Field(None, description="Filter by speaker's ID")
125
  topic: Optional[str] = Field(None, description="Topic or theme filter")
126
  parliament_id: Optional[str] = Field(None, description="Parliament identifier")
127
+ level: Optional[ParliamentLevel] = Field(None, description="Parliament level")
128
+ language: Language = Field(Language.EN, description="Response language")
129
+ limit: int = Field(DEFAULT_LIMIT, description="Maximum results", ge=1, le=MAX_LIMIT)
130
+ offset: int = Field(0, description="Pagination offset", ge=0)
131
+ response_format: ResponseFormat = Field(ResponseFormat.MARKDOWN, description="Response format")
132
+
133
+ class SearchMeetingsInput(BaseModel):
134
+ """Input for searching parliamentary meetings and sessions."""
135
+ model_config = ConfigDict(str_strip_whitespace=True, validate_assignment=True, extra='forbid')
136
+
137
+ query: Optional[str] = Field(None, description="Search query for meeting name or description", min_length=1, max_length=200)
138
+ date_from: Optional[str] = Field(None, description="Start date (ISO format: YYYY-MM-DD)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
139
+ date_to: Optional[str] = Field(None, description="End date (ISO format: YYYY-MM-DD)", pattern="^\\d{4}-\\d{2}-\\d{2}$")
140
+ body_key: Optional[str] = Field(None, description="Filter by body key (e.g., 'ZH')")
141
+ level: Optional[ParliamentLevel] = Field(None, description="Parliament level")
142
  language: Language = Field(Language.EN, description="Response language")
143
  limit: int = Field(DEFAULT_LIMIT, description="Maximum results", ge=1, le=MAX_LIMIT)
144
  offset: int = Field(0, description="Pagination offset", ge=0)
 
147
  # Helper functions
148
 
149
  def truncate_response(content: str, limit: int = CHARACTER_LIMIT) -> str:
150
+ """Truncate response if it exceeds the character limit, but never break JSON payloads."""
151
+ stripped = content.lstrip()
152
+ if stripped.startswith("{") or stripped.startswith("["):
153
+ return content # Preserve JSON so downstream parsers receive valid payloads
154
+
155
  if len(content) <= limit:
156
  return content
157
 
 
165
  except:
166
  return date_str
167
 
168
+ def _extract_multilingual_text(obj: Dict[str, Any] | None, preferred_lang: str = "de") -> str | None:
169
+ """Helper to extract a text value from multilingual dictionaries."""
170
+ if not isinstance(obj, dict):
171
+ return obj
172
+ lang_order = [preferred_lang, "de", "fr", "it", "rm", "en"]
173
+ for lang in lang_order:
174
+ value = obj.get(lang)
175
+ if value:
176
+ return value
177
+ return next(iter(obj.values()), None)
178
+
179
+
180
  def format_parliamentarian_markdown(person: Dict[str, Any]) -> str:
181
  """Format parliamentarian data as markdown."""
182
+ preferred_lang = "de"
183
+
184
+ fullname = person.get("fullname")
185
+ if not fullname:
186
+ firstname = person.get("firstname", "")
187
+ lastname = person.get("lastname", "")
188
+ fullname = f"{firstname} {lastname}".strip() or "Parliamentarian"
189
+
190
+ lines = [f"## {fullname}"]
191
+
192
+ party_obj = person.get("party") or person.get("party_harmonized")
193
+ party = _extract_multilingual_text(party_obj, preferred_lang)
194
+ if not party:
195
+ party = person.get("party_name")
196
+ if party:
197
+ lines.append(f"**Party:** {party}")
198
+
199
+ body_key = person.get("body_key")
200
+ if body_key:
201
+ lines.append(f"**Body:** {body_key}")
202
+
203
+ electoral_district = _extract_multilingual_text(person.get("electoral_district"), preferred_lang)
204
+ if electoral_district:
205
+ lines.append(f"**District:** {electoral_district}")
206
+
207
+ status = person.get("active")
208
+ if status is not None:
209
+ lines.append(f"**Status:** {'Active' if status else 'Inactive'}")
210
+
211
+ if person.get("email"):
212
  lines.append(f"**Email:** {person['email']}")
213
+ if person.get("phone"):
214
  lines.append(f"**Phone:** {person['phone']}")
215
+
216
+ website = _extract_multilingual_text(person.get("website_parliament_url"), preferred_lang)
217
+ if website:
218
+ lines.append(f"**Official Page:** {website}")
219
+
220
+ # Append recent affairs when available (requires expand=affairs)
221
+ affairs = person.get("affairs")
222
+ if isinstance(affairs, list) and affairs:
223
+ lines.append("\n### Recent Affairs")
224
+ for affair in affairs[:5]:
225
+ title = _extract_multilingual_text(affair.get("title"), preferred_lang) or affair.get("title")
226
+ number = affair.get("number")
227
+ begin_date = affair.get("begin_date")
228
+ summary = f"- {title}" if title else "- Affair"
229
+ if number:
230
+ summary += f" ({number})"
231
+ if begin_date:
232
+ summary += f" – {begin_date[:10]}"
233
+ lines.append(summary)
234
+
235
+ # Append recent votes when available (requires expand=votes)
236
+ votes = person.get("votes")
237
+ if isinstance(votes, list) and votes:
238
+ lines.append("\n### Recent Votes")
239
+ for vote in votes[:5]:
240
+ voting = vote.get("voting")
241
+ date = None
242
+ title = None
243
+ if isinstance(voting, dict):
244
+ date = voting.get("date")
245
+ title = _extract_multilingual_text(voting.get("title"), preferred_lang)
246
+ decision = vote.get("decision") or vote.get("value")
247
+ summary = "- Vote"
248
+ if date:
249
+ summary += f" on {date[:10]}"
250
+ if title:
251
+ summary += f": {title}"
252
+ if decision:
253
+ summary += f" β†’ {decision}"
254
+ lines.append(summary)
255
+
256
  return "\n".join(lines)
257
 
258
  def format_vote_markdown(vote: Dict[str, Any]) -> str:
 
356
  "search": params.query,
357
  "active": params.active_only,
358
  "lang": params.language.value,
359
+ "lang_fallback": "de,fr,it",
360
+ "search_language": params.language.value,
361
  "limit": params.limit,
362
  "offset": params.offset,
363
  "sort_by": "-id"
 
378
  result = await make_api_request("/persons", request_params)
379
 
380
  if params.response_format == ResponseFormat.JSON:
381
+ # Use ensure_ascii=False to properly handle special characters in text content
382
+ return truncate_response(json.dumps(result, indent=2, ensure_ascii=False))
383
 
384
  # Format as markdown
385
  if result.get("status") == "error":
 
432
  Returns comprehensive information including biographical data, party membership,
433
  committee assignments, and optionally their voting history and submitted motions.
434
  """
435
+ expand_relations: list[str] = []
436
+ request_params: Dict[str, Any] = {
437
+ "lang": params.language.value,
438
+ "lang_fallback": "de,fr,it",
439
+ "lang_format": "nested",
440
  }
441
+
442
+ if params.include_votes:
443
+ expand_relations.append("votes")
444
+ request_params["votes_limit"] = 25
445
+ if params.include_motions:
446
+ expand_relations.append("affairs")
447
+ request_params["affairs_limit"] = 25
448
+
449
+ if expand_relations:
450
+ request_params["expand"] = ",".join(expand_relations)
451
 
452
  try:
453
+ result = await make_api_request(f"/persons/{params.person_id}", request_params)
454
 
455
  if params.response_format == ResponseFormat.JSON:
456
+ # Use ensure_ascii=False to properly handle special characters in text content
457
+ return truncate_response(json.dumps(result, indent=2, ensure_ascii=False))
458
 
459
  if result.get("mock_data"):
460
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Person ID:** {params.person_id}"
 
488
  "date_to": params.date_to,
489
  "limit": params.limit,
490
  "offset": params.offset,
491
+ "sort_by": "-date",
492
+ "lang": params.language.value,
493
+ "lang_fallback": "de,fr,it",
494
+ "search_language": params.language.value,
495
+ "search_mode": "natural",
496
  }
497
 
498
  request_params = {k: v for k, v in request_params.items() if v is not None}
 
501
  result = await make_api_request("/votings", request_params)
502
 
503
  if params.response_format == ResponseFormat.JSON:
504
+ # Use ensure_ascii=False to properly handle special characters in text content
505
+ return truncate_response(json.dumps(result, indent=2, ensure_ascii=False))
506
 
507
  if result.get("status") == "error":
508
  return f"# API Error\n\n{result.get('message', 'Unknown error')}\n\n**Endpoint:** {result.get('endpoint', 'N/A')}"
 
577
  result = await make_api_request(f"/votes/{params.vote_id}", request_params)
578
 
579
  if params.response_format == ResponseFormat.JSON:
580
+ # Use ensure_ascii=False to properly handle special characters in text content
581
+ return truncate_response(json.dumps(result, indent=2, ensure_ascii=False))
582
 
583
  if result.get("mock_data"):
584
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Vote ID:** {params.vote_id}"
 
600
  )
601
  async def search_motions(params: SearchMotionsInput) -> str:
602
  """
603
+ Search for parliamentary affairs (motions, postulates, interpellations, and initiatives).
604
+
605
+ Returns affair records including titles, submitters, dates, status,
606
+ and text. Can filter by submitter, status, and date range.
607
+
608
+ Note: Uses the /affairs endpoint which encompasses all types of parliamentary affairs.
609
  """
610
+ request_params: Dict[str, Any] = {
611
+ "search": params.query,
612
  "submitter_id": params.submitter_id,
613
+ "begin_date_from": params.date_from,
614
+ "begin_date_to": params.date_to,
 
615
  "level": params.level.value if params.level else None,
616
  "lang": params.language.value,
617
+ "lang_fallback": "de,fr,it",
618
+ "search_language": params.language.value,
619
  "limit": params.limit,
620
+ "offset": params.offset,
621
+ "sort_by": "-begin_date"
622
  }
623
+
624
+ if params.status:
625
+ status_str = str(params.status)
626
+ if status_str.isdigit():
627
+ request_params["state_external_id"] = status_str
628
+ else:
629
+ request_params["state_name"] = status_str
630
 
631
  request_params = {k: v for k, v in request_params.items() if v is not None}
632
 
633
  try:
634
+ result = await make_api_request("/affairs", request_params)
635
 
636
  if params.response_format == ResponseFormat.JSON:
637
+ # Use ensure_ascii=False to properly handle special characters in text content
638
+ return truncate_response(json.dumps(result, indent=2, ensure_ascii=False))
639
 
640
  if result.get("mock_data"):
641
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Endpoint:** {result['endpoint']}"
 
663
  )
664
  async def search_debates(params: SearchDebatesInput) -> str:
665
  """
666
+ Search parliamentary speeches (debate contributions).
667
+
668
+ Returns speech records including speakers, dates, and speech text excerpts.
669
  Can search by content, speaker, date range, and topic.
670
+
671
+ Note: Uses the /speeches endpoint which contains individual speech contributions.
672
  """
673
  request_params = {
674
+ # NOTE: "search" parameter causes 500 errors on /speeches endpoint - removed
675
+ # "search": params.query,
676
  "date_from": params.date_from,
677
  "date_to": params.date_to,
678
  "speaker_id": params.speaker_id,
 
680
  "parliament_id": params.parliament_id,
681
  "level": params.level.value if params.level else None,
682
  "lang": params.language.value,
683
+ "lang_fallback": "de,fr,it",
684
+ # "search_language": params.language.value, # Only used with search
685
+ # "search_mode": "natural", # Only used with search
686
+ "expand": "person,affair,meeting",
687
  "limit": params.limit,
688
  "offset": params.offset
689
  }
 
691
  request_params = {k: v for k, v in request_params.items() if v is not None}
692
 
693
  try:
694
+ result = await make_api_request("/speeches", request_params)
695
 
696
  if params.response_format == ResponseFormat.JSON:
697
+ # Use ensure_ascii=False to properly handle special characters in text content
698
+ return truncate_response(json.dumps(result, indent=2, ensure_ascii=False))
699
 
700
  if result.get("mock_data"):
701
  return f"# OpenParlData API Status\n\n{result['message']}\n\n**Endpoint:** {result['endpoint']}"
 
725
  except Exception as e:
726
  return f"Error searching debates: {str(e)}"
727
 
728
+ @mcp.tool(
729
+ name="openparldata_search_meetings",
730
+ annotations={
731
+ "title": "Search Parliamentary Meetings",
732
+ "readOnlyHint": True,
733
+ "destructiveHint": False,
734
+ "idempotentHint": True,
735
+ "openWorldHint": True
736
+ }
737
+ )
738
+ async def search_meetings(params: SearchMeetingsInput) -> str:
739
+ """
740
+ Search for parliamentary meetings and sessions.
741
+
742
+ Returns meeting metadata including titles, dates, locations, and related bodies.
743
+ """
744
+ request_params = {
745
+ "search": params.query,
746
+ "body_key": params.body_key,
747
+ "level": params.level.value if params.level else None,
748
+ "lang": params.language.value,
749
+ "lang_fallback": "de,fr,it",
750
+ "limit": params.limit,
751
+ "offset": params.offset,
752
+ "sort_by": "-begin_date"
753
+ }
754
+
755
+ request_params = {k: v for k, v in request_params.items() if v is not None}
756
+
757
+ try:
758
+ result = await make_api_request("/meetings", request_params)
759
+
760
+ if result.get("status") == "error":
761
+ return f"# API Error\n\n{result.get('message', 'Unknown error')}\n\n**Endpoint:** {result.get('endpoint', 'N/A')}"
762
+
763
+ data = result.get("data", [])
764
+
765
+ # Optional client-side date filtering since API lacks date filters
766
+ filtered_data = []
767
+ for item in data:
768
+ begin_date = item.get("begin_date")
769
+ if begin_date and (params.date_from or params.date_to):
770
+ try:
771
+ ts = datetime.fromisoformat(begin_date.replace("Z", "+00:00"))
772
+ except ValueError:
773
+ ts = None
774
+ if ts:
775
+ if params.date_from:
776
+ start = datetime.fromisoformat(params.date_from)
777
+ if ts.date() < start.date():
778
+ continue
779
+ if params.date_to:
780
+ end = datetime.fromisoformat(params.date_to)
781
+ if ts.date() > end.date():
782
+ continue
783
+ filtered_data.append(item)
784
+
785
+ # Replace data with filtered list for downstream consumers
786
+ if filtered_data is not data:
787
+ result = dict(result)
788
+ result["data"] = filtered_data
789
+ meta = dict(result.get("meta", {}))
790
+ meta["filtered_count"] = len(filtered_data)
791
+ result["meta"] = meta
792
+
793
+ if params.response_format == ResponseFormat.JSON:
794
+ return truncate_response(json.dumps(result, indent=2, ensure_ascii=False))
795
+
796
+ # Markdown formatting
797
+ lines = ["# Parliamentary Meetings\n"]
798
+ for meeting in filtered_data:
799
+ name = meeting.get("name") or {}
800
+ title = name.get("de") if isinstance(name, dict) else name or "Meeting"
801
+ lines.append(f"## {title}")
802
+ if meeting.get("begin_date"):
803
+ lines.append(f"**Start:** {format_date(meeting['begin_date'])}")
804
+ if meeting.get("end_date"):
805
+ lines.append(f"**End:** {format_date(meeting['end_date'])}")
806
+ if meeting.get("location"):
807
+ lines.append(f"**Location:** {meeting['location']}")
808
+ if meeting.get("body_key"):
809
+ lines.append(f"**Body:** {meeting['body_key']}")
810
+ if meeting.get("url_external"):
811
+ url = meeting["url_external"].get("de") if isinstance(meeting["url_external"], dict) else meeting["url_external"]
812
+ if url:
813
+ lines.append(f"[External Link]({url})")
814
+ lines.append("\n---\n")
815
+
816
+ if len(lines) == 1:
817
+ lines.append("No meetings found for the provided filters.")
818
+
819
+ return truncate_response("\n".join(lines))
820
+
821
+ except Exception as e:
822
+ return f"Error searching meetings: {str(e)}"
823
+
824
  # Main execution
825
  if __name__ == "__main__":
826
  # Run FastMCP server (synchronous, blocking call)
{mcp β†’ mcp_openparldata}/requirements.txt RENAMED
File without changes
prompts/parliament.txt CHANGED
@@ -19,6 +19,9 @@ AVAILABLE TOOLS:
19
  4. openparldata_search_debates
20
  Params: query, date_from (YYYY-MM-DD), date_to (YYYY-MM-DD), language, limit, offset, response_format
21
 
 
 
 
22
  PARAMETER CONSTRAINTS:
23
  - limit: Integer between 1-100 (default 20). NEVER exceed 100.
24
  - language: lowercase "en", "de", "fr", or "it"
@@ -26,4 +29,29 @@ PARAMETER CONSTRAINTS:
26
  - response_format: "json" or "markdown" (default "markdown")
27
  - ONLY use parameters listed for each tool. NO extra/undocumented parameters.
28
 
29
- Rules: Use YYYY-MM-DD dates. For "latest" use date_from="2024-01-01" only.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  4. openparldata_search_debates
20
  Params: query, date_from (YYYY-MM-DD), date_to (YYYY-MM-DD), language, limit, offset, response_format
21
 
22
+ 5. openparldata_search_meetings
23
+ Params: query, date_from (YYYY-MM-DD), date_to (YYYY-MM-DD), body_key, language, limit, offset, response_format
24
+
25
  PARAMETER CONSTRAINTS:
26
  - limit: Integer between 1-100 (default 20). NEVER exceed 100.
27
  - language: lowercase "en", "de", "fr", or "it"
 
29
  - response_format: "json" or "markdown" (default "markdown")
30
  - ONLY use parameters listed for each tool. NO extra/undocumented parameters.
31
 
32
+ TOOL SELECTION GUIDANCE:
33
+ - Questions about specific people (names, parliamentarians, parties) β†’ use openparldata_search_parliamentarians. Prefer the last name if a full name is provided.
34
+ - Requests about motions, initiatives, interpellations, or affairs β†’ use openparldata_search_motions.
35
+ - Questions about vote outcomes or tallies β†’ use openparldata_search_votes.
36
+ - Debate transcripts or speeches β†’ use openparldata_search_debates.
37
+ - Meeting calendars or session schedules β†’ use openparldata_search_meetings.
38
+ - When a canton is mentioned (e.g., Zurich, Genève, Bern), set `body_key` to the 2-letter code (ZH, GE, BE, etc.) for parliamentarian or meeting queries.
39
+ - Do not send empty strings or vague values (e.g., status="all"). If a filter is not explicit, omit it.
40
+
41
+ TRANSLATION:
42
+ - The API returns mostly German content. For English queries, translate search terms into German and set `language` to "de".
43
+ - Always respond with German keywords even if the question is in English. Examples: "climate change" β†’ "Klimawandel", "healthcare" β†’ "Gesundheitswesen", "immigration" β†’ "Einwanderung".
44
+
45
+ CANTON CODES (use 2-letter uppercase for canton parameter):
46
+ - Zurich β†’ ZH, Bern β†’ BE, Geneva β†’ GE, Vaud β†’ VD, Lucerne β†’ LU
47
+ - Basel-Stadt β†’ BS, Basel-Landschaft β†’ BL, Aargau β†’ AG, St. Gallen β†’ SG
48
+ - Ticino β†’ TI, GraubΓΌnden β†’ GR, Valais β†’ VS, NeuchΓ’tel β†’ NE
49
+ - Examples: "parliamentarians from Zurich" β†’ canton: "ZH", "Geneva votes" β†’ canton: "GE"
50
+
51
+ DATE RULES:
52
+ - Use YYYY-MM-DD format always
53
+ - "recent"/"latest" β†’ use last 2 years (calculate from current date)
54
+ - "this year" β†’ use current year's January 1st as date_from
55
+ - "last year" β†’ use previous year's date range
56
+ - "past month" β†’ calculate last 30 days from current date
57
+ - Always calculate dates dynamically based on current date provided in the system message
ui/__init__.py ADDED
File without changes
ui/helpers.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """UI helper functions for data formatting and display."""
2
+
3
+ import re
4
+
5
+
6
+ def prefer_language(value: str | dict | None, lang_code: str) -> str | None:
7
+ """Return the value for the requested language with sensible fallbacks."""
8
+ if isinstance(value, str):
9
+ return value or None
10
+ if not isinstance(value, dict):
11
+ return None
12
+ order = [lang_code, "de", "fr", "it", "rm", "en"]
13
+ for code in order:
14
+ text = value.get(code)
15
+ if text:
16
+ return text
17
+ for text in value.values():
18
+ if text:
19
+ return text
20
+ return None
21
+
22
+
23
+ def strip_html(text: str) -> str:
24
+ """Remove HTML tags and collapse whitespace for short summaries."""
25
+ if not text:
26
+ return ""
27
+ cleaned = re.sub(r"<[^>]+>", " ", text)
28
+ cleaned = re.sub(r"\s+", " ", cleaned).strip()
29
+ return cleaned
30
+
31
+
32
+ def pick_external_url(*candidates) -> str | None:
33
+ """Return the first non-empty external URL."""
34
+ for candidate in candidates:
35
+ if not candidate:
36
+ continue
37
+ if isinstance(candidate, dict):
38
+ # Prefer German, fall back to any available language
39
+ url = prefer_language(candidate, "de") or prefer_language(candidate, "fr") or prefer_language(candidate, "it")
40
+ if url:
41
+ return url
42
+ elif isinstance(candidate, str) and candidate.strip():
43
+ return candidate.strip()
44
+ return None
ui/styles.css ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .gradio-container {
2
+ font-family: 'Inter', sans-serif;
3
+ }
4
+ .chatbot-header {
5
+ text-align: center;
6
+ padding: 20px;
7
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
8
+ color: white !important;
9
+ border-radius: 10px;
10
+ margin-bottom: 20px;
11
+ }
12
+ .chatbot-header h1 {
13
+ color: white !important;
14
+ margin: 0;
15
+ }
16
+ .chatbot-header p {
17
+ color: white !important;
18
+ margin: 10px 0 0 0;
19
+ }
20
+ .parliament-card {
21
+ display: block;
22
+ border: 1px solid #ddd;
23
+ border-radius: 8px;
24
+ padding: 16px;
25
+ background: white;
26
+ box-shadow: 0 2px 6px rgba(0, 0, 0, 0.06);
27
+ transition: transform 0.2s ease, box-shadow 0.2s ease, border-color 0.2s ease;
28
+ cursor: pointer;
29
+ }
30
+ .parliament-card:hover {
31
+ transform: translateY(-4px);
32
+ box-shadow: 0 14px 28px rgba(0, 0, 0, 0.16);
33
+ border-color: #4c6ef5;
34
+ }
35
+ .coming-soon-row {
36
+ display: flex;
37
+ gap: 8px;
38
+ flex-wrap: wrap;
39
+ margin-top: 8px;
40
+ }
41
+ .coming-soon-pill {
42
+ padding: 4px 12px;
43
+ border-radius: 999px;
44
+ background: #4c6ef5;
45
+ color: white;
46
+ font-size: 12px;
47
+ font-weight: 600;
48
+ letter-spacing: 0.05em;
49
+ opacity: 0.45;
50
+ pointer-events: none;
51
+ text-transform: uppercase;
52
+ }
53
+ .category-badge {
54
+ display: inline-block;
55
+ padding: 2px 8px;
56
+ border-radius: 999px;
57
+ background: #edf2ff;
58
+ color: #3b5bdb;
59
+ font-size: 11px;
60
+ font-weight: 600;
61
+ text-transform: uppercase;
62
+ letter-spacing: 0.04em;
63
+ }