Pushing LoRA adapter to Hugging Face Hub

Browse files

Files changed (7) hide show

README.md +12 -48
adapter_config.json +6 -11
adapter_model.safetensors +2 -2
runs/May18_01-31-30_default/events.out.tfevents.1747531900.default.1325.0 +3 -0
runs/May18_03-51-07_default/events.out.tfevents.1747540269.default.2105.0 +3 -0
runs/May18_21-40-06_default/events.out.tfevents.1747604414.default.18508.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -7,67 +7,29 @@ tags:
 - trl
 - sft
 licence: license
-license: apache-2.0
-datasets:
-- garystafford/fine-tune-nvidia-blackwell
-language:
-- en
-pipeline_tag: text-generation
 ---
 # Model Card for Llama-3.2-3B-Instruct-lora-nvidia-blackwell-vllm
-This model is a fine-tuned version of meta-llama/Llama-3.2-3B-Instruct. It has been trained using TRL.
-It was trained with Low-Rank Adaptation (LoRA) and a supervised fine-tuning method within the
-Parameter-Efficient Fine-Tuning (PEFT) framework. This LoRA was optimized for use with vLLM.
 ## Quick start
 ```python
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-HF_TOKEN = "<YOUR_HF_TOKEN_GOES_HERE>"
-base_model_id = "meta-llama/Llama-3.2-3B-Instruct"
-peft_model_id = "garystafford/Llama-3.2-3B-Instruct-lora-nvidia-blackwell"
-base_model = AutoModelForCausalLM.from_pretrained(base_model_id, token=HF_TOKEN)
-model = PeftModel.from_pretrained(base_model, peft_model_id).to(device)
-tokenizer = AutoTokenizer.from_pretrained(base_model_id, token=HF_TOKEN)
-test_prompt = [
-  {
-      "role": "user",
-      "content": "Describe the NVIDIA Blackwell architecture.",
-  }
-]
-inputs = tokenizer.apply_chat_template(
-    test_prompt,
-    tokenize=True,
-    add_generation_prompt=True,
-    return_tensors="pt",
-).to(device)
-output = model.generate(
-    input_ids=inputs,
-    max_new_tokens=128,
-    temperature=0.1,
-    pad_token_id=tokenizer.pad_token_id,
-).to(device)
-output = tokenizer.decode(output[0], skip_special_tokens=True)
-output = output.split('assistant\n\n')[1].strip()
-print(output)
 ```
 ## Training procedure
 This model was trained with SFT.
 ### Framework versions
@@ -80,6 +42,8 @@ This model was trained with SFT.
 ## Citations
 Cite TRL as:
 ```bibtex

 - trl
 - sft
 licence: license
 ---
 # Model Card for Llama-3.2-3B-Instruct-lora-nvidia-blackwell-vllm
+This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct).
+It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="garystafford/Llama-3.2-3B-Instruct-lora-nvidia-blackwell-vllm", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
 ```
 ## Training procedure
 This model was trained with SFT.
 ### Framework versions
 ## Citations
 Cite TRL as:
 ```bibtex

adapter_config.json CHANGED Viewed

@@ -8,32 +8,27 @@
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "inference_mode": true,
-  "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
   "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "o_proj",
-    "down_proj",
-    "up_proj",
-    "k_proj",
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
-  "use_rslora": false
 }

   "exclude_modules": null,
   "fan_in_fan_out": false,
   "inference_mode": true,
+  "init_lora_weights": "gaussian",
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 16,
   "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
+  "use_rslora": true
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29051ad9e6f2241651ac9fc27845fbd701c03b60d6f0d44533a52f206f739fa7
-size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:653ee47d9f39146ae607b9b7d5a92a1f90184be0ae38b46d96d95c9baaf10d00
+size 73415408

runs/May18_01-31-30_default/events.out.tfevents.1747531900.default.1325.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f40d841017fae947778587574d9ba61cf4273b5983038c9b8916599006c2ae9
+size 43036

runs/May18_03-51-07_default/events.out.tfevents.1747540269.default.2105.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a71c645368cf3f720fdaa8a9079134a3209874b347e0e4c5705330e9d587205
+size 43036

runs/May18_21-40-06_default/events.out.tfevents.1747604414.default.18508.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0260a9f2696d68e11bac2f476ba338b9ae8de64f7fa50e83ed81817a3b8b2022
+size 43041

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bde91c7cc438931dce25564e3184cf95163b292676333c944ae57ab90033898
 size 6161

 version https://git-lfs.github.com/spec/v1
+oid sha256:066b86d7b059120c0663688df28e0fe88d23a92db35a313e053416f37147b2ea
 size 6161