echoboi
/

veganism_and_vegetarianism-distilbert-classifier

@@ -50,45 +50,166 @@ Note: Label order in predictions matches the order above.
 ## Usage
 ```python
-import torch
 from transformers import DistilBertTokenizer
-import tempfile
 from huggingface_hub import snapshot_download
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# Download and load model
-model_link = "sanchow/veganism_and_vegetarianism-distilbert-classifier"
 with tempfile.TemporaryDirectory() as temp_dir:
-    snapshot_download(repo_id=model_link, local_dir=temp_dir, local_dir_use_symlinks=False)
-    sys.path.insert(0, temp_dir)
-    from model_class import MultilabelClassifier
-    tokenizer = DistilBertTokenizer.from_pretrained(temp_dir)
-    checkpoint = torch.load(os.path.join(temp_dir, 'model.pt'), map_location='cpu')
-    model = MultilabelClassifier(checkpoint['model_name'], len(checkpoint['label_names']))
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-# Predict
-text = "Your text here"
-inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
-with torch.no_grad():
-    predictions = model(**inputs).cpu().numpy()
-# Get scores
-label_scores = {label: float(score) for label, score in zip(checkpoint['label_names'], predictions[0])}
 ```
-## Applications
-- Content analysis of social media discussions
-- Research on public sentiment and discourse
-- Policy analysis of key topics and concerns
-- Market research on trends and interests
 ## Performance
@@ -104,17 +225,6 @@ Dataset: ~900 GPT-labeled samples per sector (600 train, 150 validation, 150 tes
 ## Optimal Thresholds
-Use these thresholds for best performance:
-- Animal Welfare: 0.481
-- Environmental Impact: 0.459
-- Health: 0.201
-- Lab Grown And Alt Proteins: 0.341
-- Psychology And Identity: 0.525
-- Systemic Vs Individual Action: 0.375
-- Taste And Convenience: 0.664
-Usage:
 ```python
 optimal_thresholds = {'Animal Welfare': 0.48107979620047003, 'Environmental Impact': 0.45919171852850427, 'Health': 0.20115313966833437, 'Lab Grown And Alt Proteins': 0.3414601502146817, 'Psychology And Identity': 0.5246278637433214, 'Systemic Vs Individual Action': 0.37517437676211585, 'Taste And Convenience': 0.6635140143644325}
 for label, score in zip(label_names, predictions[0]):

 ## Usage
 ```python
+import torch, sys, os, tempfile
 from transformers import DistilBertTokenizer
 from huggingface_hub import snapshot_download
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+def print_sorted_label_scores(label_scores):
+    # Sort label_scores dict by score descending
+    sorted_items = sorted(label_scores.items(), key=lambda x: x[1], reverse=True)
+    for label, score in sorted_items:
+        print(f"  {label}: {score:.6f}")
+# ------------------ TRANSPORT ------------------
+transport_model_link = 'sanchow/electric_vehicles-distilbert-classifier'
+transport_examples = [
+    "Switching to electric cars can cut down on smog and carbon output."
+]
+print(f"\n{'='*60}")
+print("MODEL: TRANSPORT SECTOR")
+print(f"{'='*60}")
+print(f"Downloading model: {transport_model_link}")
 with tempfile.TemporaryDirectory() as temp_dir:
+    snapshot_download(
+        repo_id=transport_model_link,
+        local_dir=temp_dir,
+        local_dir_use_symlinks=False
+    )
+    model_class_path = os.path.join(temp_dir, 'model_class.py')
+    if not os.path.exists(model_class_path):
+        print(f"model_class.py not found in downloaded files")
+        print(f"   Available files: {os.listdir(temp_dir)}")
+    else:
+        sys.path.insert(0, temp_dir)
+        from model_class import MultilabelClassifier
+        tokenizer = DistilBertTokenizer.from_pretrained(temp_dir)
+        checkpoint = torch.load(os.path.join(temp_dir, 'model.pt'), map_location='cpu', weights_only=False)
+        model = MultilabelClassifier(checkpoint['model_name'], len(checkpoint['label_names']))
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.to(device)
+        model.eval()
+        print("Model loaded successfully")
+        print(f"   Labels: {checkpoint['label_names']}")
+        print("\nTransport classifier results for transport_examples:\n")
+        for i, test_text in enumerate(transport_examples):
+            inputs = tokenizer(
+                test_text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512,
+                padding=True
+            ).to(device)
+            with torch.no_grad():
+                outputs = model(**inputs)
+                predictions = outputs.cpu().numpy() if isinstance(outputs, (tuple, list)) else outputs.cpu().numpy()
+            label_scores = {label: float(score) for label, score in zip(checkpoint['label_names'], predictions[0])}
+            print(f"Example {i+1}: '{test_text}'")
+            print("Predictions (all label scores, highest first):")
+            print_sorted_label_scores(label_scores)
+            print("-" * 40)
+# ------------------ HOUSING ------------------
+housing_model_link = 'sanchow/solar_energy-distilbert-classifier'
+housing_examples = [
+    "Solar panels on rooftops can significantly reduce electricity bills."
+]
+print(f"\n{'='*60}")
+print("MODEL: HOUSING SECTOR")
+print(f"{'='*60}")
+print(f"Downloading model: {housing_model_link}")
+with tempfile.TemporaryDirectory() as temp_dir:
+    snapshot_download(
+        repo_id=housing_model_link,
+        local_dir=temp_dir,
+        local_dir_use_symlinks=False
+    )
+    model_class_path = os.path.join(temp_dir, 'model_class.py')
+    if not os.path.exists(model_class_path):
+        print(f"model_class.py not found in downloaded files")
+        print(f"   Available files: {os.listdir(temp_dir)}")
+    else:
+        sys.path.insert(0, temp_dir)
+        from model_class import MultilabelClassifier
+        tokenizer = DistilBertTokenizer.from_pretrained(temp_dir)
+        checkpoint = torch.load(os.path.join(temp_dir, 'model.pt'), map_location='cpu', weights_only=False)
+        model = MultilabelClassifier(checkpoint['model_name'], len(checkpoint['label_names']))
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.to(device)
+        model.eval()
+        print("Model loaded successfully")
+        print(f"   Labels: {checkpoint['label_names']}")
+        print("\nHousing classifier results for housing_examples:\n")
+        for i, test_text in enumerate(housing_examples):
+            inputs = tokenizer(
+                test_text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512,
+                padding=True
+            ).to(device)
+            with torch.no_grad():
+                outputs = model(**inputs)
+                predictions = outputs.cpu().numpy() if isinstance(outputs, (tuple, list)) else outputs.cpu().numpy()
+            label_scores = {label: float(score) for label, score in zip(checkpoint['label_names'], predictions[0])}
+            print(f"Example {i+1}: '{test_text}'")
+            print("Predictions (all label scores, highest first):")
+            print_sorted_label_scores(label_scores)
+            print("-" * 40)
+# ------------------ FOOD ------------------
+food_model_link = 'sanchow/veganism_and_vegetarianism-distilbert-classifier'
+food_examples = [
+    "Plant-based diets can help reduce environmental impact of food production."
+]
+print(f"\n{'='*60}")
+print("MODEL: FOOD SECTOR")
+print(f"{'='*60}")
+print(f"Downloading model: {food_model_link}")
+with tempfile.TemporaryDirectory() as temp_dir:
+    snapshot_download(
+        repo_id=food_model_link,
+        local_dir=temp_dir,
+        local_dir_use_symlinks=False
+    )
+    model_class_path = os.path.join(temp_dir, 'model_class.py')
+    if not os.path.exists(model_class_path):
+        print(f"model_class.py not found in downloaded files")
+        print(f"   Available files: {os.listdir(temp_dir)}")
+    else:
+        sys.path.insert(0, temp_dir)
+        from model_class import MultilabelClassifier
+        tokenizer = DistilBertTokenizer.from_pretrained(temp_dir)
+        checkpoint = torch.load(os.path.join(temp_dir, 'model.pt'), map_location='cpu', weights_only=False)
+        model = MultilabelClassifier(checkpoint['model_name'], len(checkpoint['label_names']))
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.to(device)
+        model.eval()
+        print("Model loaded successfully")
+        print(f"   Labels: {checkpoint['label_names']}")
+        print("\nFood classifier results for food_examples:\n")
+        for i, test_text in enumerate(food_examples):
+            inputs = tokenizer(
+                test_text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512,
+                padding=True
+            ).to(device)
+            with torch.no_grad():
+                outputs = model(**inputs)
+                predictions = outputs.cpu().numpy() if isinstance(outputs, (tuple, list)) else outputs.cpu().numpy()
+            label_scores = {label: float(score) for label, score in zip(checkpoint['label_names'], predictions[0])}
+            print(f"Example {i+1}: '{test_text}'")
+            print("Predictions (all label scores, highest first):")
+            print_sorted_label_scores(label_scores)
+            print("-" * 40)
 ```
 ## Performance
 ## Optimal Thresholds
 ```python
 optimal_thresholds = {'Animal Welfare': 0.48107979620047003, 'Environmental Impact': 0.45919171852850427, 'Health': 0.20115313966833437, 'Lab Grown And Alt Proteins': 0.3414601502146817, 'Psychology And Identity': 0.5246278637433214, 'Systemic Vs Individual Action': 0.37517437676211585, 'Taste And Convenience': 0.6635140143644325}
 for label, score in zip(label_names, predictions[0]):