Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import json | |
| import os | |
| from io import BytesIO | |
| import gradio as gr | |
| from huggingface_hub import upload_file | |
| default_question = """ | |
| We're going to use the <a href="https://huggingface.co/datasets/wikitext" target="_blank"><code>wikitext (link)</a></code> dataset with the <code><a href="https://huggingface.co/distilbert-base-cased" target="_blank">distilbert-base-cased (link)</a></code> model checkpoint. | |
| <br/><br/> | |
| Start by loading the <code>wikitext-2-raw-v1</code> version of that dataset, and take the 11th example (index 10) of the <code>train</code> split.<br/> | |
| We'll tokenize this using the appropriate tokenizer, and we'll mask the sixth token (index 5) the sequence. | |
| <br/><br/> | |
| When using the <code>distilbert-base-cased</code> checkpoint to unmask that (sixth token, index 5) token, what is the most probable predicted token (please provide the decoded token, and not the ID)? | |
| <br/> | |
| <br/> | |
| Tips: | |
| <br/> | |
| - You might find the <a href="https://huggingface.co/docs/transformers/index" target="_blank">transformers docs (link)</a> useful. | |
| <br/> | |
| - You might find the <a href="https://huggingface.co/docs/datasets/index" target="_blank">datasets docs (link)</a> useful. | |
| <br/> | |
| - You might also be interested in the <a href="https://huggingface.co/course" target="_blank">Hugging Face course (link)</a>. | |
| """ | |
| skops_question = """ | |
| 1. Create a python environment[1] and install `scikit-learn` version `1.0` in that environment. | |
| <br/> | |
| 2. Using that environment, create a `LogisticRegression` model[2] and fit it on the Iris dataset[3]. | |
| <br/> | |
| 3. Save the trained model using `pickle`[4] or `joblib`[5]. | |
| <br/> | |
| 4. Create a second environment, and install `scikit-learn` version `1.1` in it. | |
| <br/> | |
| 5. Try loading the model you saved in step 3 in this second environment. | |
| <br/> | |
| <br/> | |
| Question: | |
| <br/> | |
| Is there a warning or error you receive while trying to load the model? If yes, what exactly is it. | |
| <br/> | |
| <br/> | |
| References | |
| <br/> | |
| - [1] You can use any tool you want to create the environment. Two of the options are: | |
| <br/> | |
| - `venv`: https://docs.python.org/3/library/venv.html | |
| <br/> | |
| - `mamba`: https://github.com/mamba-org/mamba | |
| <br/> | |
| - [2] `LogisticRegression` API guide: https://scikit-learn.org/dev/modules/generated/sklearn.linear_model.LogisticRegression.html | |
| <br/> | |
| - [3] `load_iris` API guide: https://scikit-learn.org/dev/modules/generated/sklearn.datasets.load_iris.html | |
| <br/> | |
| - [4] `pickle`: https://docs.python.org/3/library/pickle.html | |
| <br/> | |
| - [5] - `joblib`: https://joblib.readthedocs.io/en/latest/ | |
| """ | |
| code_question = """ | |
| You are probing your code generation model on a program synthesis benchmark and | |
| 1 out of 4 the candidate solutions produced by your model pass the unit tests of a coding challenge. | |
| <br/> | |
| <br/> | |
| What’s the pass@2 metric (in percent) as introduced in the | |
| Codex paper (see section 2.1)? | |
| <br/> | |
| <br/> | |
| References | |
| <br/> | |
| - Codex paper: https://arxiv.org/abs/2107.03374 | |
| """ | |
| evaluate_question = """ | |
| Use the `evaluate` library to compute the BLEU score of the model generation `"Evaluate is a library to evaluate Machine Learning models"` and the reference solution `"Evaluate is a library to evaluate ML models"`. Round the result to two digits after the comma. | |
| <br/> | |
| <br/> | |
| References | |
| <br/> | |
| - `evaluate` library: https://huggingface.co/docs/evaluate/index | |
| - BLEU score: https://en.wikipedia.org/wiki/BLEU | |
| """ | |
| embodied_question = """ | |
| We are going to use <a href="https://github.com/huggingface/simulate"> Simulate </a> to create a basic RL environment. | |
| <br/><br/> | |
| Instructions: | |
| <br/> | |
| pip install simulate | |
| <br/> | |
| create a scene with the unity engine | |
| <br/> | |
| add a box to the scene at position [0, 0, 1], add a camera named "cam" at default position | |
| <br/> | |
| show the scene, step the scene once | |
| <br/> | |
| what is the mean pixel value from the frames from "cam". | |
| <br/><br/> | |
| For some resources, you may want to check out: | |
| * <a href="https://huggingface.co/docs/simulate/main/en/quicktour"> Simulate quick start </a> for installation, | |
| * <a href="https://huggingface.co/docs/simulate/main/en/tutorials/running_the_simulation#running-the-simulation" simulation stepping <a> for running the simulation. | |
| """ | |
| fast_distributed_framework_question = """ | |
| We are going to understand how many operations does a matrix multiplication hold using the simplest algorithm. | |
| <br/> | |
| <br/> | |
| Let A,B two matrices of size 256x64 and 128x64 respectively. When computing the matrix multiplication of A and (B^T), how many scalar multiplications are done? How many scalar additions are done? Please answer in the following format: | |
| <br/> | |
| multiplications: {YOUR_ANSWER_AS_A_SINGLE_NUMBER} | |
| <br/> | |
| additions: {YOUR_ANSWER_AS_A_SINGLE_NUMBER} | |
| <br/> | |
| """ | |
| internships = { | |
| 'Accelerate': default_question, | |
| # 'Skops & Scikit-Learn': skops_question, | |
| # "Evaluate": evaluate_question, | |
| "Speech": default_question, | |
| # "ML for Code/Code Generation": code_question, | |
| # "Model forgetting": default_question, | |
| # "Multimodal AI": default_question, | |
| # "OCR": default_question, | |
| # "Efficient video pretraining": default_question, | |
| # "Retrieval augmentation as prompting": default_question, | |
| "Embodied AI": embodied_question, | |
| # "Toolkit for detecting distribution shift/Robustness": default_question, | |
| "Social impact evaluations": default_question, | |
| # "Gradio as an ecosystem": default_question, | |
| # "Benchmarking transformers on various AI hardware accelerators": default_question, | |
| "AI Art Tooling Residency": default_question, | |
| "Datasets for Large Language Models": default_question, | |
| "Fast Distributed Training Framework": fast_distributed_framework_question, | |
| } | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # Internship introduction | |
| Please select the internship you would like to apply to and answer the question asked in the Answer box. | |
| """ | |
| ) | |
| internship_choice = gr.Dropdown(label='Internship', choices=list(internships.keys())) | |
| with gr.Column(visible=False) as details_col: | |
| summary = gr.HTML(label='Question') | |
| details = gr.Textbox(label="Answer") | |
| username = gr.Textbox(label="Hugging Face Username") | |
| comment = gr.Textbox(label="Any comment?") | |
| generate_btn = gr.Button("Submit") | |
| output = gr.Label() | |
| def filter_species(species): | |
| return gr.Label.update( | |
| internships[species] | |
| ), gr.update(visible=True) | |
| internship_choice.change(filter_species, internship_choice, [summary, details_col]) | |
| def on_click(_details, _username, _internship_choice, _comment): | |
| response = {'response': _details, "internship": _internship_choice, "comment": _comment} | |
| upload_file( | |
| path_or_fileobj=BytesIO(bytes(json.dumps(response), 'utf-8')), | |
| path_in_repo=_username, | |
| repo_id='internships/internships-2023', | |
| repo_type='dataset', | |
| token=os.environ['HF_TOKEN'] | |
| ) | |
| return f"Submitted: '{_details}' for user '{_username}'" | |
| generate_btn.click(on_click, inputs=[details, username, internship_choice, comment], outputs=[output]) | |
| if __name__ == "__main__": | |
| demo.launch() | |
