from pdfminer.high_level import extract_pages from pdfminer.layout import LTTextContainer from tqdm import tqdm import re import gradio as gr import os import accelerate import spaces import subprocess from huggingface_hub import hf_hub_download from llama_cpp import Llama from huggingface_hub import login login(token = os.getenv('HF_TOKEN')) repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF" model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf" local_dir = "models" hf_hub_download( repo_id=repo_id, filename=model_id, local_dir = local_dir ) def convert_to_json(llm, cv_text, maxtokens, temperature, top_probability): json_format = """ You are an expert at structuring resumes in JSON format. Given a modified resume text, extract the relevant details and convert them into the following structured JSON format: { "profileDetails": { "firstName": "", "lastName": "", "email": "", "contact": "", "country": "", "jobTitle": "", "social": "", "profileDesc": "", "address": "", "city": "", "state": "", "zipCode": "" }, "professionalExperience": [ { "positionTitle": "", "location": "", "company": "", "description": "", "startDate": "", "endDate": "" } ], "education": [ { "institute": "", "schoolLocation": "", "degree": "", "field": "", "grade": "", "startDate": "", "endDate": "" } ], "skills": [""], "hobbies": [""], "languages": [""], "certifications": [""], "projects": [ { "title": "", "description": "" } ], "jobPreferences": { "compTarget": "", "strength": "", "roleTarget": "" }, "jobDescription": "" } Instructions: - Extract details accurately from the given resume. - Ensure proper structuring of dates, responsibilities, and projects. - If a field is missing in the input, leave it as an empty string or an empty list where applicable. - Maintain proper formatting and avoid unnecessary additions. Provide the response in a valid JSON format with no additional explanations. """ output = llm.create_chat_completion( messages=[ {"role": "assistant", "content": json_format}, { "role": "user", "content": cv_text } ], max_tokens=maxtokens, temperature=temperature ) output = output['choices'][0]['message']['content'] return output def craft_cover_letter(llm, cv_text, job_description, maxtokens, temperature, top_probability): instruction = "Given input CV and job description. Please prepare cover letter according to the given job description and give as an output." output = llm.create_chat_completion( messages=[ {"role": "assistant", "content": instruction}, { "role": "user", "content": ' Input CV: ' + cv_text + ' , Job Description: ' + job_description } ], max_tokens=maxtokens, temperature=temperature ) output = output['choices'][0]['message']['content'] return cv_text, output @spaces.GPU(duration=150) def pdf_to_text(cv_text, job_description="", function="Convert to JSON", maxtokens=2048, temperature=0, top_probability=0.95): llm = Llama( model_path="models/" + model_id, flash_attn=True, n_gpu_layers=81, n_batch=1024, n_ctx=8192, ) if(function == 'Cover Letter'): _, crafted_cv = craft_cover_letter(llm, cv_text, job_description, maxtokens, temperature, top_probability) else: crafted_cv = convert_to_json(llm, cv_text, maxtokens, temperature, top_probability) return crafted_cv temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value") prob_slider = gr.Slider(minimum=0, maximum=1, value=0.95, label="Max Probability Value") max_tokens = gr.Number(value=600, label="Max Tokens") cv_file = gr.File(label='Upload the CV') function = gr.Radio(["Convert to JSON", "Cover Letter"]) prompt_text = gr.Textbox(label='Enter the job description') output_text = gr.Textbox() iface = gr.Interface( fn=pdf_to_text, inputs=['text', prompt_text, function], outputs=['text'], title='Create a Cover Letter or convert PDF to JSON', description="This application assists to create a cover letter based on input job description", theme=gr.themes.Soft(), ) iface.launch()