Roger Surf commited on
Commit
100f669
Β·
1 Parent(s): 5e7da44

feat: integrate real data - app working with embeddings

Browse files
Files changed (4) hide show
  1. app.py +74 -37
  2. data/data_loader.py +43 -0
  3. utils/display.py +71 -121
  4. utils/display_old.py +295 -0
app.py CHANGED
@@ -14,10 +14,9 @@ from pathlib import Path
14
  sys.path.append(str(Path(__file__).parent))
15
 
16
  from config import *
17
- from data.mock_data import (
18
- get_candidate_data,
19
- get_company_matches,
20
- get_network_graph_data
21
  )
22
  from utils.display import (
23
  display_candidate_profile,
@@ -124,14 +123,6 @@ def render_header():
124
 
125
  st.markdown(f'<h1 class="main-title">{APP_TITLE}</h1>', unsafe_allow_html=True)
126
  st.markdown(f'<p class="sub-title">{APP_SUBTITLE}</p>', unsafe_allow_html=True)
127
-
128
- # Demo mode indicator
129
- if DEMO_MODE:
130
- st.info(
131
- "🎭 **Demo Mode Active** - Displaying hardcoded sample data. "
132
- "This will be replaced with real matching when embeddings are loaded.",
133
- icon="ℹ️"
134
- )
135
 
136
 
137
  def render_sidebar():
@@ -170,7 +161,7 @@ def render_sidebar():
170
  st.markdown("### πŸ‘€ View Mode")
171
  view_mode = st.radio(
172
  "Select view:",
173
- ["πŸ“Š Overview", "πŸ“ Detailed Cards", "πŸ“ˆ Table View"],
174
  help="Choose how to display company matches"
175
  )
176
 
@@ -207,14 +198,48 @@ def render_sidebar():
207
  return top_k, min_score, view_mode
208
 
209
 
210
- def render_network_section(candidate_id: int, top_k: int):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  """Render interactive network visualization section."""
212
 
213
  st.markdown('<div class="section-header">πŸ•ΈοΈ Network Visualization</div>', unsafe_allow_html=True)
214
 
215
  with st.spinner("Generating interactive network graph..."):
216
  # Get graph data
217
- graph_data = get_network_graph_data(candidate_id, top_k)
218
 
219
  # Create HTML graph
220
  html_content = create_network_graph(
@@ -252,7 +277,7 @@ def render_matches_section(matches, view_mode: str):
252
  # Table view
253
  display_match_table(matches)
254
 
255
- elif view_mode == "πŸ“ Detailed Cards":
256
  # Card view - detailed
257
  for rank, (comp_id, score, comp_data) in enumerate(matches, 1):
258
  display_company_card(comp_data, score, rank)
@@ -277,12 +302,35 @@ def main():
277
  # Main content area
278
  st.markdown("---")
279
 
 
 
 
 
 
 
 
 
 
 
 
280
  # Load candidate data
281
  candidate_id = DEMO_CANDIDATE_ID
282
- candidate = get_candidate_data(candidate_id)
283
 
284
  # Load company matches
285
- matches = get_company_matches(candidate_id, top_k)
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  # Filter by minimum score
288
  matches = [(cid, score, cdata) for cid, score, cdata in matches if score >= min_score]
@@ -309,17 +357,10 @@ def main():
309
  st.markdown("---")
310
 
311
  # Network visualization (full width)
312
- render_network_section(candidate_id, len(matches))
313
 
314
  st.markdown("---")
315
 
316
- # Footer with instructions
317
- st.success(
318
- "βœ… **MVP Demo Ready!** This interface shows the core functionality. "
319
- "Next step: Replace mock data with real embeddings for dynamic matching.",
320
- icon="πŸŽ‰"
321
- )
322
-
323
  # Technical info expander
324
  with st.expander("πŸ”§ Technical Details", expanded=False):
325
  st.markdown(f"""
@@ -328,20 +369,16 @@ def main():
328
  - Similarity Metric: Cosine Similarity
329
  - Top K Matches: {top_k}
330
  - Minimum Score: {min_score:.0%}
331
- - Demo Mode: {'βœ… Enabled' if DEMO_MODE else '❌ Disabled'}
332
-
333
- **Data Sources:**
334
- - Candidates: 9,544 profiles
335
- - Companies: 180,000 entities
336
- - Job Postings: 700 (bridge data)
337
 
338
  **Algorithm:**
339
- 1. Text representation of candidates/companies
340
- 2. Sentence transformer embeddings (384D)
341
- 3. Cosine similarity calculation
342
- 4. Top-K ranking
343
  """)
344
 
345
 
346
  if __name__ == "__main__":
347
- main()
 
14
  sys.path.append(str(Path(__file__).parent))
15
 
16
  from config import *
17
+ from data.data_loader import (
18
+ load_embeddings,
19
+ find_top_matches
 
20
  )
21
  from utils.display import (
22
  display_candidate_profile,
 
123
 
124
  st.markdown(f'<h1 class="main-title">{APP_TITLE}</h1>', unsafe_allow_html=True)
125
  st.markdown(f'<p class="sub-title">{APP_SUBTITLE}</p>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
126
 
127
 
128
  def render_sidebar():
 
161
  st.markdown("### πŸ‘€ View Mode")
162
  view_mode = st.radio(
163
  "Select view:",
164
+ ["πŸ“Š Overview", "πŸ” Detailed Cards", "πŸ“ˆ Table View"],
165
  help="Choose how to display company matches"
166
  )
167
 
 
198
  return top_k, min_score, view_mode
199
 
200
 
201
+ def get_network_graph_data(candidate_id, matches):
202
+ """Generate network graph data from matches."""
203
+ nodes = []
204
+ edges = []
205
+
206
+ # Add candidate node
207
+ nodes.append({
208
+ 'id': f'C{candidate_id}',
209
+ 'label': f'Candidate #{candidate_id}',
210
+ 'color': '#4ade80',
211
+ 'shape': 'dot',
212
+ 'size': 30
213
+ })
214
+
215
+ # Add company nodes and edges
216
+ for comp_id, score, comp_data in matches:
217
+ nodes.append({
218
+ 'id': f'COMP{comp_id}',
219
+ 'label': comp_data.get('name', f'Company {comp_id}')[:30],
220
+ 'color': '#ff6b6b',
221
+ 'shape': 'box',
222
+ 'size': 20
223
+ })
224
+
225
+ edges.append({
226
+ 'from': f'C{candidate_id}',
227
+ 'to': f'COMP{comp_id}',
228
+ 'value': float(score) * 10,
229
+ 'title': f'{score:.3f}'
230
+ })
231
+
232
+ return {'nodes': nodes, 'edges': edges}
233
+
234
+
235
+ def render_network_section(candidate_id: int, matches):
236
  """Render interactive network visualization section."""
237
 
238
  st.markdown('<div class="section-header">πŸ•ΈοΈ Network Visualization</div>', unsafe_allow_html=True)
239
 
240
  with st.spinner("Generating interactive network graph..."):
241
  # Get graph data
242
+ graph_data = get_network_graph_data(candidate_id, matches)
243
 
244
  # Create HTML graph
245
  html_content = create_network_graph(
 
277
  # Table view
278
  display_match_table(matches)
279
 
280
+ elif view_mode == "πŸ” Detailed Cards":
281
  # Card view - detailed
282
  for rank, (comp_id, score, comp_data) in enumerate(matches, 1):
283
  display_company_card(comp_data, score, rank)
 
302
  # Main content area
303
  st.markdown("---")
304
 
305
+ # Load embeddings (cache in session state)
306
+ if 'embeddings_loaded' not in st.session_state:
307
+ with st.spinner("πŸ”„ Loading embeddings and data..."):
308
+ cand_emb, comp_emb, cand_df, comp_df = load_embeddings()
309
+ st.session_state.embeddings_loaded = True
310
+ st.session_state.candidate_embeddings = cand_emb
311
+ st.session_state.company_embeddings = comp_emb
312
+ st.session_state.candidates_df = cand_df
313
+ st.session_state.companies_df = comp_df
314
+ st.success("βœ… Data loaded successfully!")
315
+
316
  # Load candidate data
317
  candidate_id = DEMO_CANDIDATE_ID
318
+ candidate = st.session_state.candidates_df.iloc[candidate_id]
319
 
320
  # Load company matches
321
+ matches_list = find_top_matches(
322
+ candidate_id,
323
+ st.session_state.candidate_embeddings,
324
+ st.session_state.company_embeddings,
325
+ st.session_state.companies_df,
326
+ top_k
327
+ )
328
+
329
+ # Format matches for display
330
+ matches = [
331
+ (m['company_id'], m['score'], st.session_state.companies_df.iloc[m['company_id']])
332
+ for m in matches_list
333
+ ]
334
 
335
  # Filter by minimum score
336
  matches = [(cid, score, cdata) for cid, score, cdata in matches if score >= min_score]
 
357
  st.markdown("---")
358
 
359
  # Network visualization (full width)
360
+ render_network_section(candidate_id, matches)
361
 
362
  st.markdown("---")
363
 
 
 
 
 
 
 
 
364
  # Technical info expander
365
  with st.expander("πŸ”§ Technical Details", expanded=False):
366
  st.markdown(f"""
 
369
  - Similarity Metric: Cosine Similarity
370
  - Top K Matches: {top_k}
371
  - Minimum Score: {min_score:.0%}
372
+ - Candidates Loaded: {len(st.session_state.candidates_df):,}
373
+ - Companies Loaded: {len(st.session_state.companies_df):,}
 
 
 
 
374
 
375
  **Algorithm:**
376
+ 1. Load pre-computed embeddings (.npy files)
377
+ 2. Calculate cosine similarity
378
+ 3. Rank companies by similarity score
379
+ 4. Return top-K matches
380
  """)
381
 
382
 
383
  if __name__ == "__main__":
384
+ main()
data/data_loader.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pickle
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+
5
+ def load_embeddings():
6
+ """Load pre-computed embeddings and metadata."""
7
+
8
+ # Load embeddings
9
+ candidate_embeddings = np.load('data/processed/candidate_embeddings.npy')
10
+ company_embeddings = np.load('data/processed/company_embeddings.npy')
11
+
12
+ # Load metadata
13
+ with open('data/processed/candidates_processed.pkl', 'rb') as f:
14
+ candidates_df = pickle.load(f)
15
+
16
+ with open('data/processed/companies_processed.pkl', 'rb') as f:
17
+ companies_df = pickle.load(f)
18
+
19
+ return candidate_embeddings, company_embeddings, candidates_df, companies_df
20
+
21
+ def find_top_matches(candidate_idx, candidate_embeddings, company_embeddings, companies_df, top_k=10):
22
+ """Find top K company matches for a candidate."""
23
+
24
+ # Get candidate embedding
25
+ candidate_vec = candidate_embeddings[candidate_idx].reshape(1, -1)
26
+
27
+ # Calculate similarities
28
+ similarities = cosine_similarity(candidate_vec, company_embeddings)[0]
29
+
30
+ # Get top K indices
31
+ top_indices = np.argsort(similarities)[::-1][:top_k]
32
+
33
+ # Build results
34
+ matches = []
35
+ for idx in top_indices:
36
+ matches.append({
37
+ 'company_id': idx,
38
+ 'company_name': companies_df.iloc[idx].get('name', f'Company {idx}'),
39
+ 'job_title': companies_df.iloc[idx].get('title', 'N/A'),
40
+ 'score': float(similarities[idx])
41
+ })
42
+
43
+ return matches
utils/display.py CHANGED
@@ -5,107 +5,94 @@ Contains formatted display components for candidates and companies.
5
 
6
  import streamlit as st
7
  import pandas as pd
 
8
  from typing import Dict, Any, List, Tuple
9
 
10
 
11
- def display_candidate_profile(candidate: Dict[str, Any]):
12
  """
13
  Display comprehensive candidate profile in Streamlit.
14
 
15
  Args:
16
- candidate: Dictionary with candidate data
17
  """
18
 
19
  st.markdown("### πŸ‘€ Candidate Profile")
20
  st.markdown("---")
21
 
22
- # Basic Info
23
- col1, col2 = st.columns([2, 1])
24
-
25
- with col1:
26
- st.markdown(f"**Name:** {candidate.get('name', 'N/A')}")
27
- st.markdown(f"**Desired Position:** {candidate.get('job_position_name', 'N/A')}")
28
-
29
- with col2:
30
- st.metric("Match Score", f"{candidate.get('matched_score', 0):.2%}")
31
-
32
  # Career Objective
33
  with st.expander("🎯 Career Objective", expanded=True):
34
  st.write(candidate.get('career_objective', 'Not provided'))
35
 
36
  # Skills
37
  with st.expander("πŸ’» Skills & Expertise", expanded=True):
38
- skills = candidate.get('skills', [])
39
- if skills:
40
- # Display as tags
41
- skills_html = " ".join([f'<span style="background-color: #0066CC; color: white; padding: 5px 10px; border-radius: 15px; margin: 3px; display: inline-block;">{skill}</span>' for skill in skills[:15]])
42
- st.markdown(skills_html, unsafe_allow_html=True)
43
- else:
44
- st.write("No skills listed")
 
 
 
45
 
46
  # Education
47
  with st.expander("πŸŽ“ Education"):
48
- edu_data = {
49
- 'Institution': candidate.get('educational_institution_name', []),
50
- 'Degree': candidate.get('degree_names', []),
51
- 'Major': candidate.get('major_field_of_studies', []),
52
- 'Year': candidate.get('passing_years', []),
53
- 'GPA': candidate.get('educational_results', [])
54
- }
55
-
56
- if any(edu_data.values()):
57
- df_edu = pd.DataFrame(edu_data)
58
- st.dataframe(df_edu, use_container_width=True, hide_index=True)
59
- else:
 
 
 
 
 
 
 
 
60
  st.write("No education information provided")
61
 
62
  # Work Experience
63
  with st.expander("πŸ’Ό Work Experience"):
64
- exp_data = {
65
- 'Company': candidate.get('professional_company_names', []),
66
- 'Position': candidate.get('positions', []),
67
- 'Location': candidate.get('locations', []),
68
- 'Start': candidate.get('start_dates', []),
69
- 'End': candidate.get('end_dates', [])
70
- }
71
-
72
- if any(exp_data.values()):
73
- df_exp = pd.DataFrame(exp_data)
74
- st.dataframe(df_exp, use_container_width=True, hide_index=True)
75
 
76
- # Show responsibilities
77
- responsibilities = candidate.get('responsibilities', '')
78
- if responsibilities:
79
- st.markdown("**Key Responsibilities:**")
80
- st.text(responsibilities)
81
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  st.write("No work experience listed")
83
-
84
- # Languages
85
- with st.expander("🌍 Languages"):
86
- languages = candidate.get('languages', [])
87
- proficiency = candidate.get('proficiency_levels', [])
88
-
89
- if languages:
90
- for lang, prof in zip(languages, proficiency):
91
- st.write(f"β€’ **{lang}** - {prof}")
92
- else:
93
- st.write("No languages listed")
94
-
95
- # Certifications
96
- with st.expander("πŸ… Certifications"):
97
- providers = candidate.get('certification_providers', [])
98
- skills = candidate.get('certification_skills', [])
99
-
100
- if providers:
101
- for provider, skill in zip(providers, skills):
102
- st.write(f"β€’ **{skill}** by {provider}")
103
- else:
104
- st.write("No certifications listed")
105
 
106
 
107
  def display_company_card(
108
- company_data: Dict[str, Any],
109
  similarity_score: float,
110
  rank: int
111
  ):
@@ -113,7 +100,7 @@ def display_company_card(
113
  Display company information as a card.
114
 
115
  Args:
116
- company_data: Dictionary with company data
117
  similarity_score: Match score
118
  rank: Ranking position
119
  """
@@ -152,44 +139,19 @@ def display_company_card(
152
  )
153
 
154
  # Company details
155
- col1, col2, col3 = st.columns(3)
156
-
157
- with col1:
158
- st.markdown(f"**πŸ“ Location**")
159
- location = f"{company_data.get('city', '')}, {company_data.get('state', '')}, {company_data.get('country', '')}"
160
- st.write(location)
161
-
162
- with col2:
163
- st.markdown(f"**πŸ‘₯ Size**")
164
- st.write(company_data.get('employee_count', 'N/A'))
165
-
166
- with col3:
167
- st.markdown(f"**🏭 Industry**")
168
- industries = company_data.get('industries_list', 'N/A')
169
- st.write(industries.split(',')[0] if ',' in str(industries) else industries)
170
 
171
  # Description
172
- description = company_data.get('description', 'No description available')
 
 
173
  st.markdown(f"**About:** {description}")
174
 
175
- # Required skills
176
- required_skills = company_data.get('required_skills', '')
177
- if required_skills:
178
- st.markdown("**πŸ”§ Required Skills:**")
179
- skills_list = [s.strip() for s in str(required_skills).split('|')[:8]]
180
- skills_html = " ".join([f'<span style="background-color: #CC0000; color: white; padding: 5px 10px; border-radius: 15px; margin: 3px; display: inline-block; font-size: 12px;">{skill}</span>' for skill in skills_list])
181
- st.markdown(skills_html, unsafe_allow_html=True)
182
-
183
- # Job postings
184
- job_titles = company_data.get('posted_job_titles', '')
185
- if job_titles:
186
- st.markdown(f"**πŸ’Ό Open Positions:** {job_titles}")
187
-
188
  st.markdown("---")
189
 
190
 
191
  def display_match_table(
192
- matches: List[Tuple[int, float, Dict[str, Any]]],
193
  show_top_n: int = 10
194
  ):
195
  """
@@ -207,21 +169,11 @@ def display_match_table(
207
  table_data = []
208
 
209
  for rank, (comp_id, score, comp_data) in enumerate(matches[:show_top_n], 1):
210
- # Get key skills (first 3)
211
- skills = comp_data.get('required_skills', 'N/A')
212
- if skills and skills != 'N/A':
213
- skills_list = [s.strip() for s in str(skills).split('|')[:3]]
214
- skills_display = ', '.join(skills_list)
215
- else:
216
- skills_display = 'N/A'
217
-
218
  table_data.append({
219
  'Rank': f"#{rank}",
220
- 'Company': comp_data.get('name', 'N/A'),
221
  'Score': f"{score:.1%}",
222
- 'Location': f"{comp_data.get('city', 'N/A')}, {comp_data.get('state', 'N/A')}",
223
- 'Top Skills': skills_display,
224
- 'Employees': comp_data.get('employee_count', 'N/A')
225
  })
226
 
227
  # Display as dataframe
@@ -235,10 +187,8 @@ def display_match_table(
235
  column_config={
236
  "Rank": st.column_config.TextColumn(width="small"),
237
  "Score": st.column_config.TextColumn(width="small"),
238
- "Company": st.column_config.TextColumn(width="medium"),
239
- "Location": st.column_config.TextColumn(width="medium"),
240
- "Top Skills": st.column_config.TextColumn(width="large"),
241
- "Employees": st.column_config.TextColumn(width="small")
242
  }
243
  )
244
 
@@ -246,8 +196,8 @@ def display_match_table(
246
 
247
 
248
  def display_stats_overview(
249
- candidate_data: Dict[str, Any],
250
- matches: List[Tuple[int, float, Dict[str, Any]]]
251
  ):
252
  """
253
  Display overview statistics about the matching results.
@@ -292,4 +242,4 @@ def display_stats_overview(
292
  help="Highest similarity score"
293
  )
294
 
295
- st.markdown("---")
 
5
 
6
  import streamlit as st
7
  import pandas as pd
8
+ import ast
9
  from typing import Dict, Any, List, Tuple
10
 
11
 
12
+ def display_candidate_profile(candidate):
13
  """
14
  Display comprehensive candidate profile in Streamlit.
15
 
16
  Args:
17
+ candidate: Pandas Series with candidate data
18
  """
19
 
20
  st.markdown("### πŸ‘€ Candidate Profile")
21
  st.markdown("---")
22
 
 
 
 
 
 
 
 
 
 
 
23
  # Career Objective
24
  with st.expander("🎯 Career Objective", expanded=True):
25
  st.write(candidate.get('career_objective', 'Not provided'))
26
 
27
  # Skills
28
  with st.expander("πŸ’» Skills & Expertise", expanded=True):
29
+ try:
30
+ skills = ast.literal_eval(candidate.get('skills', '[]'))
31
+ if skills:
32
+ # Display as tags
33
+ skills_html = " ".join([f'<span style="background-color: #0066CC; color: white; padding: 5px 10px; border-radius: 15px; margin: 3px; display: inline-block;">{skill}</span>' for skill in skills[:15]])
34
+ st.markdown(skills_html, unsafe_allow_html=True)
35
+ else:
36
+ st.write("No skills listed")
37
+ except:
38
+ st.write(candidate.get('skills', 'No skills listed'))
39
 
40
  # Education
41
  with st.expander("πŸŽ“ Education"):
42
+ try:
43
+ institutions = ast.literal_eval(candidate.get('educational_institution_name', '[]'))
44
+ degrees = ast.literal_eval(candidate.get('degree_names', '[]'))
45
+ majors = ast.literal_eval(candidate.get('major_field_of_studies', '[]'))
46
+ years = ast.literal_eval(candidate.get('passing_years', '[]'))
47
+
48
+ if institutions and any(institutions):
49
+ for i in range(len(institutions)):
50
+ degree = degrees[i] if i < len(degrees) else 'N/A'
51
+ major = majors[i] if i < len(majors) else 'N/A'
52
+ year = years[i] if i < len(years) else 'N/A'
53
+
54
+ st.write(f"**{degree}** in {major}")
55
+ st.write(f"πŸ“ {institutions[i]}")
56
+ st.write(f"πŸ“… {year}")
57
+ if i < len(institutions) - 1:
58
+ st.write("---")
59
+ else:
60
+ st.write("No education information provided")
61
+ except:
62
  st.write("No education information provided")
63
 
64
  # Work Experience
65
  with st.expander("πŸ’Ό Work Experience"):
66
+ try:
67
+ companies = ast.literal_eval(candidate.get('professional_company_names', '[]'))
68
+ positions = ast.literal_eval(candidate.get('positions', '[]'))
69
+ starts = ast.literal_eval(candidate.get('start_dates', '[]'))
70
+ ends = ast.literal_eval(candidate.get('end_dates', '[]'))
 
 
 
 
 
 
71
 
72
+ if companies and any(companies):
73
+ for i in range(len(companies)):
74
+ position = positions[i] if i < len(positions) else 'N/A'
75
+ start = starts[i] if i < len(starts) else 'N/A'
76
+ end = ends[i] if i < len(ends) else 'N/A'
77
+
78
+ st.write(f"**{position}** at {companies[i]}")
79
+ st.write(f"πŸ“… {start} - {end}")
80
+ if i < len(companies) - 1:
81
+ st.write("---")
82
+
83
+ # Show responsibilities
84
+ responsibilities = candidate.get('responsibilities', '')
85
+ if responsibilities:
86
+ st.markdown("**Key Responsibilities:**")
87
+ st.text(responsibilities)
88
+ else:
89
+ st.write("No work experience listed")
90
+ except:
91
  st.write("No work experience listed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
 
94
  def display_company_card(
95
+ company_data,
96
  similarity_score: float,
97
  rank: int
98
  ):
 
100
  Display company information as a card.
101
 
102
  Args:
103
+ company_data: Pandas Series with company data
104
  similarity_score: Match score
105
  rank: Ranking position
106
  """
 
139
  )
140
 
141
  # Company details
142
+ st.markdown(f"**Company ID:** {company_data.name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  # Description
145
+ description = company_data.get('description', company_data.get('text', 'No description available'))
146
+ if len(str(description)) > 200:
147
+ description = str(description)[:200] + "..."
148
  st.markdown(f"**About:** {description}")
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  st.markdown("---")
151
 
152
 
153
  def display_match_table(
154
+ matches: List[Tuple[int, float, Any]],
155
  show_top_n: int = 10
156
  ):
157
  """
 
169
  table_data = []
170
 
171
  for rank, (comp_id, score, comp_data) in enumerate(matches[:show_top_n], 1):
 
 
 
 
 
 
 
 
172
  table_data.append({
173
  'Rank': f"#{rank}",
174
+ 'Company ID': comp_id,
175
  'Score': f"{score:.1%}",
176
+ 'Match Quality': 'πŸ”₯ Excellent' if score >= 0.7 else '✨ Very Good' if score >= 0.6 else 'πŸ‘ Good' if score >= 0.5 else '⭐ Fair'
 
 
177
  })
178
 
179
  # Display as dataframe
 
187
  column_config={
188
  "Rank": st.column_config.TextColumn(width="small"),
189
  "Score": st.column_config.TextColumn(width="small"),
190
+ "Company ID": st.column_config.TextColumn(width="medium"),
191
+ "Match Quality": st.column_config.TextColumn(width="medium")
 
 
192
  }
193
  )
194
 
 
196
 
197
 
198
  def display_stats_overview(
199
+ candidate_data,
200
+ matches: List[Tuple[int, float, Any]]
201
  ):
202
  """
203
  Display overview statistics about the matching results.
 
242
  help="Highest similarity score"
243
  )
244
 
245
+ st.markdown("---")
utils/display_old.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Display utilities for HRHUB Streamlit UI.
3
+ Contains formatted display components for candidates and companies.
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ from typing import Dict, Any, List, Tuple
9
+
10
+
11
+ def display_candidate_profile(candidate: Dict[str, Any]):
12
+ """
13
+ Display comprehensive candidate profile in Streamlit.
14
+
15
+ Args:
16
+ candidate: Dictionary with candidate data
17
+ """
18
+
19
+ st.markdown("### πŸ‘€ Candidate Profile")
20
+ st.markdown("---")
21
+
22
+ # Basic Info
23
+ col1, col2 = st.columns([2, 1])
24
+
25
+ with col1:
26
+ st.markdown(f"**Name:** {candidate.get('name', 'N/A')}")
27
+ st.markdown(f"**Desired Position:** {candidate.get('job_position_name', 'N/A')}")
28
+
29
+ with col2:
30
+ st.metric("Match Score", f"{candidate.get('matched_score', 0):.2%}")
31
+
32
+ # Career Objective
33
+ with st.expander("🎯 Career Objective", expanded=True):
34
+ st.write(candidate.get('career_objective', 'Not provided'))
35
+
36
+ # Skills
37
+ with st.expander("πŸ’» Skills & Expertise", expanded=True):
38
+ skills = candidate.get('skills', [])
39
+ if skills:
40
+ # Display as tags
41
+ skills_html = " ".join([f'<span style="background-color: #0066CC; color: white; padding: 5px 10px; border-radius: 15px; margin: 3px; display: inline-block;">{skill}</span>' for skill in skills[:15]])
42
+ st.markdown(skills_html, unsafe_allow_html=True)
43
+ else:
44
+ st.write("No skills listed")
45
+
46
+ # Education
47
+ with st.expander("πŸŽ“ Education"):
48
+ edu_data = {
49
+ 'Institution': candidate.get('educational_institution_name', []),
50
+ 'Degree': candidate.get('degree_names', []),
51
+ 'Major': candidate.get('major_field_of_studies', []),
52
+ 'Year': candidate.get('passing_years', []),
53
+ 'GPA': candidate.get('educational_results', [])
54
+ }
55
+
56
+ if any(edu_data.values()):
57
+ df_edu = pd.DataFrame(edu_data)
58
+ st.dataframe(df_edu, use_container_width=True, hide_index=True)
59
+ else:
60
+ st.write("No education information provided")
61
+
62
+ # Work Experience
63
+ with st.expander("πŸ’Ό Work Experience"):
64
+ exp_data = {
65
+ 'Company': candidate.get('professional_company_names', []),
66
+ 'Position': candidate.get('positions', []),
67
+ 'Location': candidate.get('locations', []),
68
+ 'Start': candidate.get('start_dates', []),
69
+ 'End': candidate.get('end_dates', [])
70
+ }
71
+
72
+ if any(exp_data.values()):
73
+ df_exp = pd.DataFrame(exp_data)
74
+ st.dataframe(df_exp, use_container_width=True, hide_index=True)
75
+
76
+ # Show responsibilities
77
+ responsibilities = candidate.get('responsibilities', '')
78
+ if responsibilities:
79
+ st.markdown("**Key Responsibilities:**")
80
+ st.text(responsibilities)
81
+ else:
82
+ st.write("No work experience listed")
83
+
84
+ # Languages
85
+ with st.expander("🌍 Languages"):
86
+ languages = candidate.get('languages', [])
87
+ proficiency = candidate.get('proficiency_levels', [])
88
+
89
+ if languages:
90
+ for lang, prof in zip(languages, proficiency):
91
+ st.write(f"β€’ **{lang}** - {prof}")
92
+ else:
93
+ st.write("No languages listed")
94
+
95
+ # Certifications
96
+ with st.expander("πŸ… Certifications"):
97
+ providers = candidate.get('certification_providers', [])
98
+ skills = candidate.get('certification_skills', [])
99
+
100
+ if providers:
101
+ for provider, skill in zip(providers, skills):
102
+ st.write(f"β€’ **{skill}** by {provider}")
103
+ else:
104
+ st.write("No certifications listed")
105
+
106
+
107
+ def display_company_card(
108
+ company_data: Dict[str, Any],
109
+ similarity_score: float,
110
+ rank: int
111
+ ):
112
+ """
113
+ Display company information as a card.
114
+
115
+ Args:
116
+ company_data: Dictionary with company data
117
+ similarity_score: Match score
118
+ rank: Ranking position
119
+ """
120
+
121
+ with st.container():
122
+ # Header with rank and score
123
+ col1, col2, col3 = st.columns([1, 4, 2])
124
+
125
+ with col1:
126
+ st.markdown(f"### #{rank}")
127
+
128
+ with col2:
129
+ st.markdown(f"### 🏒 {company_data.get('name', 'Unknown Company')}")
130
+
131
+ with col3:
132
+ # Color-coded score
133
+ if similarity_score >= 0.7:
134
+ color = "#00FF00" # Green
135
+ label = "Excellent"
136
+ elif similarity_score >= 0.6:
137
+ color = "#FFD700" # Gold
138
+ label = "Very Good"
139
+ elif similarity_score >= 0.5:
140
+ color = "#FFA500" # Orange
141
+ label = "Good"
142
+ else:
143
+ color = "#FF6347" # Red
144
+ label = "Fair"
145
+
146
+ st.markdown(
147
+ f'<div style="text-align: center; padding: 10px; background-color: {color}20; border: 2px solid {color}; border-radius: 10px;">'
148
+ f'<span style="font-size: 24px; font-weight: bold; color: {color};">{similarity_score:.1%}</span><br>'
149
+ f'<span style="font-size: 12px;">{label} Match</span>'
150
+ f'</div>',
151
+ unsafe_allow_html=True
152
+ )
153
+
154
+ # Company details
155
+ col1, col2, col3 = st.columns(3)
156
+
157
+ with col1:
158
+ st.markdown(f"**πŸ“ Location**")
159
+ location = f"{company_data.get('city', '')}, {company_data.get('state', '')}, {company_data.get('country', '')}"
160
+ st.write(location)
161
+
162
+ with col2:
163
+ st.markdown(f"**πŸ‘₯ Size**")
164
+ st.write(company_data.get('employee_count', 'N/A'))
165
+
166
+ with col3:
167
+ st.markdown(f"**🏭 Industry**")
168
+ industries = company_data.get('industries_list', 'N/A')
169
+ st.write(industries.split(',')[0] if ',' in str(industries) else industries)
170
+
171
+ # Description
172
+ description = company_data.get('description', 'No description available')
173
+ st.markdown(f"**About:** {description}")
174
+
175
+ # Required skills
176
+ required_skills = company_data.get('required_skills', '')
177
+ if required_skills:
178
+ st.markdown("**πŸ”§ Required Skills:**")
179
+ skills_list = [s.strip() for s in str(required_skills).split('|')[:8]]
180
+ skills_html = " ".join([f'<span style="background-color: #CC0000; color: white; padding: 5px 10px; border-radius: 15px; margin: 3px; display: inline-block; font-size: 12px;">{skill}</span>' for skill in skills_list])
181
+ st.markdown(skills_html, unsafe_allow_html=True)
182
+
183
+ # Job postings
184
+ job_titles = company_data.get('posted_job_titles', '')
185
+ if job_titles:
186
+ st.markdown(f"**πŸ’Ό Open Positions:** {job_titles}")
187
+
188
+ st.markdown("---")
189
+
190
+
191
+ def display_match_table(
192
+ matches: List[Tuple[int, float, Dict[str, Any]]],
193
+ show_top_n: int = 10
194
+ ):
195
+ """
196
+ Display match results as a formatted table.
197
+
198
+ Args:
199
+ matches: List of (company_id, score, company_data) tuples
200
+ show_top_n: Number of matches to display
201
+ """
202
+
203
+ st.markdown(f"### 🎯 Top {show_top_n} Company Matches")
204
+ st.markdown("---")
205
+
206
+ # Prepare data for table
207
+ table_data = []
208
+
209
+ for rank, (comp_id, score, comp_data) in enumerate(matches[:show_top_n], 1):
210
+ # Get key skills (first 3)
211
+ skills = comp_data.get('required_skills', 'N/A')
212
+ if skills and skills != 'N/A':
213
+ skills_list = [s.strip() for s in str(skills).split('|')[:3]]
214
+ skills_display = ', '.join(skills_list)
215
+ else:
216
+ skills_display = 'N/A'
217
+
218
+ table_data.append({
219
+ 'Rank': f"#{rank}",
220
+ 'Company': comp_data.get('name', 'N/A'),
221
+ 'Score': f"{score:.1%}",
222
+ 'Location': f"{comp_data.get('city', 'N/A')}, {comp_data.get('state', 'N/A')}",
223
+ 'Top Skills': skills_display,
224
+ 'Employees': comp_data.get('employee_count', 'N/A')
225
+ })
226
+
227
+ # Display as dataframe
228
+ df = pd.DataFrame(table_data)
229
+
230
+ # Style the dataframe
231
+ st.dataframe(
232
+ df,
233
+ width='stretch',
234
+ hide_index=True,
235
+ column_config={
236
+ "Rank": st.column_config.TextColumn(width="small"),
237
+ "Score": st.column_config.TextColumn(width="small"),
238
+ "Company": st.column_config.TextColumn(width="medium"),
239
+ "Location": st.column_config.TextColumn(width="medium"),
240
+ "Top Skills": st.column_config.TextColumn(width="large"),
241
+ "Employees": st.column_config.TextColumn(width="small")
242
+ }
243
+ )
244
+
245
+ st.info("πŸ’‘ **Tip:** Scores above 0.6 indicate strong alignment between candidate skills and company requirements!")
246
+
247
+
248
+ def display_stats_overview(
249
+ candidate_data: Dict[str, Any],
250
+ matches: List[Tuple[int, float, Dict[str, Any]]]
251
+ ):
252
+ """
253
+ Display overview statistics about the matching results.
254
+
255
+ Args:
256
+ candidate_data: Candidate information
257
+ matches: List of matches
258
+ """
259
+
260
+ st.markdown("### πŸ“Š Matching Overview")
261
+
262
+ col1, col2, col3, col4 = st.columns(4)
263
+
264
+ with col1:
265
+ st.metric(
266
+ "Total Matches",
267
+ len(matches),
268
+ help="Number of companies analyzed"
269
+ )
270
+
271
+ with col2:
272
+ avg_score = sum(score for _, score, _ in matches) / len(matches) if matches else 0
273
+ st.metric(
274
+ "Average Score",
275
+ f"{avg_score:.1%}",
276
+ help="Average similarity score"
277
+ )
278
+
279
+ with col3:
280
+ excellent = sum(1 for _, score, _ in matches if score >= 0.7)
281
+ st.metric(
282
+ "Excellent Matches",
283
+ excellent,
284
+ help="Matches with score β‰₯ 70%"
285
+ )
286
+
287
+ with col4:
288
+ best_score = max((score for _, score, _ in matches), default=0)
289
+ st.metric(
290
+ "Best Match",
291
+ f"{best_score:.1%}",
292
+ help="Highest similarity score"
293
+ )
294
+
295
+ st.markdown("---")