Upload folder using huggingface_hub
Browse files- rag_fns/setup_load.py +9 -2
rag_fns/setup_load.py
CHANGED
|
@@ -7,6 +7,8 @@ import numpy as np
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
from openai import OpenAI
|
| 9 |
from pyprojroot import here
|
|
|
|
|
|
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
|
@@ -38,8 +40,13 @@ def import_data() -> tuple[list[str], np.ndarray, dict[str, Any]]:
|
|
| 38 |
tuple[pd.DataFrame, dict, dict]: A tuple containing the talks dataframe, transcript dictionaries, and full embeddings.
|
| 39 |
"""
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
talk_ids = data2load["talk_ids"]
|
| 45 |
embeds = data2load["embeds"]
|
|
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
from openai import OpenAI
|
| 9 |
from pyprojroot import here
|
| 10 |
+
import requests
|
| 11 |
+
import io
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
|
|
|
| 40 |
tuple[pd.DataFrame, dict, dict]: A tuple containing the talks dataframe, transcript dictionaries, and full embeddings.
|
| 41 |
"""
|
| 42 |
|
| 43 |
+
target_file_url = "https://raw.githubusercontent.com/AlanFeder/dcr-multi-frameworks/main/data/interim/embeds_talks_dcr.pkl"
|
| 44 |
+
|
| 45 |
+
response = requests.get(target_file_url)
|
| 46 |
+
response.raise_for_status() # Ensure we got a successful response
|
| 47 |
+
|
| 48 |
+
# Load the .pkl file into a Python object
|
| 49 |
+
data2load = pickle.load(io.BytesIO(response.content))
|
| 50 |
|
| 51 |
talk_ids = data2load["talk_ids"]
|
| 52 |
embeds = data2load["embeds"]
|