Spaces:
Running
Running
Commit
·
bd72568
1
Parent(s):
28ebe52
add mammal prompt, fix bug
Browse files
api_cost/api_cost.yaml
CHANGED
|
@@ -102,6 +102,10 @@ MISTRAL_SMALL:
|
|
| 102 |
################
|
| 103 |
# Local Models
|
| 104 |
################
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
LOCAL_MIXTRAL_8X7B_INSTRUCT_V01:
|
| 106 |
in: 0.0
|
| 107 |
out: 0.0
|
|
@@ -113,4 +117,4 @@ LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF:
|
|
| 113 |
out: 0.0
|
| 114 |
phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05:
|
| 115 |
in: 0.0
|
| 116 |
-
out: 0.0
|
|
|
|
| 102 |
################
|
| 103 |
# Local Models
|
| 104 |
################
|
| 105 |
+
# mistralai/Mistral-Nemo-Instruct-2407
|
| 106 |
+
LOCAL_MISTRAL_NEMO_INSTRUCT_2407:
|
| 107 |
+
in: 0.0
|
| 108 |
+
out: 0.0
|
| 109 |
LOCAL_MIXTRAL_8X7B_INSTRUCT_V01:
|
| 110 |
in: 0.0
|
| 111 |
out: 0.0
|
|
|
|
| 117 |
out: 0.0
|
| 118 |
phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05:
|
| 119 |
in: 0.0
|
| 120 |
+
out: 0.0
|
app.py
CHANGED
|
@@ -2226,13 +2226,13 @@ def content_collage_overlay():
|
|
| 2226 |
# Set the options for the radio button with corresponding indices
|
| 2227 |
# Set the options for the transcription method radio button
|
| 2228 |
options = {
|
| 2229 |
-
0: "Use
|
| 2230 |
-
1: "Use
|
| 2231 |
2: "Use specimen collage for transcriptions"
|
| 2232 |
}
|
| 2233 |
|
| 2234 |
# Determine the default index based on the current configuration
|
| 2235 |
-
default_index = st.session_state.config['leafmachine'].get('use_RGB_label_images',
|
| 2236 |
|
| 2237 |
# Create the radio button for transcription method selection
|
| 2238 |
selected_option = st.radio(
|
|
|
|
| 2226 |
# Set the options for the radio button with corresponding indices
|
| 2227 |
# Set the options for the transcription method radio button
|
| 2228 |
options = {
|
| 2229 |
+
0: "Use original images for transcriptions",
|
| 2230 |
+
1: "Use LeafMachine2 label collage for transcriptions",
|
| 2231 |
2: "Use specimen collage for transcriptions"
|
| 2232 |
}
|
| 2233 |
|
| 2234 |
# Determine the default index based on the current configuration
|
| 2235 |
+
default_index = st.session_state.config['leafmachine'].get('use_RGB_label_images', 1)
|
| 2236 |
|
| 2237 |
# Create the radio button for transcription method selection
|
| 2238 |
selected_option = st.radio(
|
pages/prompt_builder.py
CHANGED
|
@@ -19,6 +19,20 @@ def create_download_button_yaml(file_path, selected_yaml_file, key_val):
|
|
| 19 |
)
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def upload_local_prompt_to_server(dir_prompt):
|
| 23 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
| 24 |
if uploaded_file is not None:
|
|
@@ -31,10 +45,14 @@ def upload_local_prompt_to_server(dir_prompt):
|
|
| 31 |
with open(file_path, 'wb') as f:
|
| 32 |
f.write(uploaded_file.getbuffer())
|
| 33 |
st.success(f"Saved file {file_name} in {dir_prompt}")
|
|
|
|
|
|
|
|
|
|
| 34 |
else:
|
| 35 |
st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
|
| 36 |
|
| 37 |
|
|
|
|
| 38 |
def save_prompt_yaml(filename, col):
|
| 39 |
yaml_content = {
|
| 40 |
'prompt_author': st.session_state['prompt_author'],
|
|
@@ -207,6 +225,9 @@ def build_LLM_prompt_config():
|
|
| 207 |
st.write('##')
|
| 208 |
create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
|
| 209 |
|
|
|
|
|
|
|
|
|
|
| 210 |
# Prompt Author Information
|
| 211 |
st.write("---")
|
| 212 |
st.header("Prompt Author Information")
|
|
|
|
| 19 |
)
|
| 20 |
|
| 21 |
|
| 22 |
+
# def upload_local_prompt_to_server(dir_prompt):
|
| 23 |
+
# uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
| 24 |
+
# if uploaded_file is not None:
|
| 25 |
+
# # Check the file extension
|
| 26 |
+
# file_name = uploaded_file.name
|
| 27 |
+
# if file_name.endswith('.yaml'):
|
| 28 |
+
# file_path = os.path.join(dir_prompt, file_name)
|
| 29 |
+
|
| 30 |
+
# # Save the file
|
| 31 |
+
# with open(file_path, 'wb') as f:
|
| 32 |
+
# f.write(uploaded_file.getbuffer())
|
| 33 |
+
# st.success(f"Saved file {file_name} in {dir_prompt}")
|
| 34 |
+
# else:
|
| 35 |
+
# st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
|
| 36 |
def upload_local_prompt_to_server(dir_prompt):
|
| 37 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
| 38 |
if uploaded_file is not None:
|
|
|
|
| 45 |
with open(file_path, 'wb') as f:
|
| 46 |
f.write(uploaded_file.getbuffer())
|
| 47 |
st.success(f"Saved file {file_name} in {dir_prompt}")
|
| 48 |
+
|
| 49 |
+
# Update the prompt list
|
| 50 |
+
st.session_state['yaml_files'] = [f for f in os.listdir(dir_prompt) if f.endswith('.yaml')]
|
| 51 |
else:
|
| 52 |
st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
|
| 53 |
|
| 54 |
|
| 55 |
+
|
| 56 |
def save_prompt_yaml(filename, col):
|
| 57 |
yaml_content = {
|
| 58 |
'prompt_author': st.session_state['prompt_author'],
|
|
|
|
| 225 |
st.write('##')
|
| 226 |
create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
|
| 227 |
|
| 228 |
+
|
| 229 |
+
upload_local_prompt_to_server(dir_prompt)
|
| 230 |
+
|
| 231 |
# Prompt Author Information
|
| 232 |
st.write("---")
|
| 233 |
st.header("Prompt Author Information")
|
vouchervision/general_utils.py
CHANGED
|
@@ -1311,12 +1311,12 @@ def create_specimen_collage(cfg, logger, dir_home, Project, Dirs):
|
|
| 1311 |
|
| 1312 |
# After processing, delete the original images, leaving only the _collage images
|
| 1313 |
# This is used just in case the HF version puts them there
|
| 1314 |
-
for filename in filenames:
|
| 1315 |
-
|
| 1316 |
-
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
|
| 1320 |
|
| 1321 |
def crop_component_from_yolo_coords(anno_type, Dirs, analysis, all_detections, full_image, filename, save_per_image, save_per_class, save_list):
|
| 1322 |
height = analysis['height']
|
|
|
|
| 1311 |
|
| 1312 |
# After processing, delete the original images, leaving only the _collage images
|
| 1313 |
# This is used just in case the HF version puts them there
|
| 1314 |
+
# for filename in filenames:
|
| 1315 |
+
# if not filename.endswith('_collage.jpg'):
|
| 1316 |
+
# file_path = os.path.join(Dirs.save_original, filename)
|
| 1317 |
+
# if os.path.exists(file_path):
|
| 1318 |
+
# os.remove(file_path)
|
| 1319 |
+
# logger.info(f"Deleted original image: {file_path}")
|
| 1320 |
|
| 1321 |
def crop_component_from_yolo_coords(anno_type, Dirs, analysis, all_detections, full_image, filename, save_per_image, save_per_class, save_list):
|
| 1322 |
height = analysis['height']
|
vouchervision/model_maps.py
CHANGED
|
@@ -34,6 +34,8 @@ class ModelMaps:
|
|
| 34 |
|
| 35 |
'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01': '#000000', # Black
|
| 36 |
'LOCAL_MISTRAL_7B_INSTRUCT_V02': '#4a4a4a', # Gray
|
|
|
|
|
|
|
| 37 |
|
| 38 |
'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF': '#bababa', # Gray
|
| 39 |
|
|
@@ -78,7 +80,8 @@ class ModelMaps:
|
|
| 78 |
'Open Mistral 7B',
|
| 79 |
]
|
| 80 |
|
| 81 |
-
MODELS_LOCAL = ['LOCAL
|
|
|
|
| 82 |
'LOCAL Mistral 7B Instruct v0.2',
|
| 83 |
'LOCAL CPU Mistral 7B Instruct v0.2 GGUF',
|
| 84 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
|
|
@@ -124,6 +127,7 @@ class ModelMaps:
|
|
| 124 |
'Open Mixtral 8x7B': 'OPEN_MIXTRAL_8X7B',
|
| 125 |
'Open Mistral 7B': 'OPEN_MISTRAL_7B',
|
| 126 |
|
|
|
|
| 127 |
'LOCAL Mixtral 8x7B Instruct v0.1': 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01',
|
| 128 |
'LOCAL Mistral 7B Instruct v0.2': 'LOCAL_MISTRAL_7B_INSTRUCT_V02',
|
| 129 |
|
|
@@ -166,6 +170,7 @@ class ModelMaps:
|
|
| 166 |
'Open Mixtral 8x7B': has_key_mistral,
|
| 167 |
'Open Mistral 7B': has_key_mistral,
|
| 168 |
|
|
|
|
| 169 |
'LOCAL Mixtral 8x7B Instruct v0.1': True,
|
| 170 |
'LOCAL Mistral 7B Instruct v0.2': True,
|
| 171 |
|
|
@@ -208,6 +213,7 @@ class ModelMaps:
|
|
| 208 |
'Open Mixtral 8x7B': False,
|
| 209 |
'Open Mistral 7B': False,
|
| 210 |
|
|
|
|
| 211 |
'LOCAL Mixtral 8x7B Instruct v0.1': False,
|
| 212 |
'LOCAL Mistral 7B Instruct v0.2': False,
|
| 213 |
|
|
@@ -304,11 +310,15 @@ class ModelMaps:
|
|
| 304 |
|
| 305 |
|
| 306 |
### Mistral LOCAL
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
elif key == 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01':
|
| 308 |
return 'Mixtral-8x7B-Instruct-v0.1'
|
| 309 |
|
| 310 |
elif key == 'LOCAL_MISTRAL_7B_INSTRUCT_V02':
|
| 311 |
-
return 'Mistral-7B-Instruct-v0.
|
| 312 |
|
| 313 |
### Mistral LOCAL CPU
|
| 314 |
elif key == 'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF':
|
|
|
|
| 34 |
|
| 35 |
'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01': '#000000', # Black
|
| 36 |
'LOCAL_MISTRAL_7B_INSTRUCT_V02': '#4a4a4a', # Gray
|
| 37 |
+
# mistralai/Mistral-Nemo-Instruct-2407
|
| 38 |
+
'LOCAL_MISTRAL_NEMO_INSTRUCT_2407': '#000000', # Black
|
| 39 |
|
| 40 |
'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF': '#bababa', # Gray
|
| 41 |
|
|
|
|
| 80 |
'Open Mistral 7B',
|
| 81 |
]
|
| 82 |
|
| 83 |
+
MODELS_LOCAL = ['LOCAL Mistral Nemo Instruct 2407',
|
| 84 |
+
'LOCAL Mixtral 8x7B Instruct v0.1',
|
| 85 |
'LOCAL Mistral 7B Instruct v0.2',
|
| 86 |
'LOCAL CPU Mistral 7B Instruct v0.2 GGUF',
|
| 87 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
|
|
|
|
| 127 |
'Open Mixtral 8x7B': 'OPEN_MIXTRAL_8X7B',
|
| 128 |
'Open Mistral 7B': 'OPEN_MISTRAL_7B',
|
| 129 |
|
| 130 |
+
'LOCAL Mistral Nemo Instruct 2407': 'LOCAL_MISTRAL_NEMO_INSTRUCT_2407',
|
| 131 |
'LOCAL Mixtral 8x7B Instruct v0.1': 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01',
|
| 132 |
'LOCAL Mistral 7B Instruct v0.2': 'LOCAL_MISTRAL_7B_INSTRUCT_V02',
|
| 133 |
|
|
|
|
| 170 |
'Open Mixtral 8x7B': has_key_mistral,
|
| 171 |
'Open Mistral 7B': has_key_mistral,
|
| 172 |
|
| 173 |
+
'LOCAL Mistral Nemo Instruct 2407': True,
|
| 174 |
'LOCAL Mixtral 8x7B Instruct v0.1': True,
|
| 175 |
'LOCAL Mistral 7B Instruct v0.2': True,
|
| 176 |
|
|
|
|
| 213 |
'Open Mixtral 8x7B': False,
|
| 214 |
'Open Mistral 7B': False,
|
| 215 |
|
| 216 |
+
'LOCAL Mistral Nemo Instruct 2407': False,
|
| 217 |
'LOCAL Mixtral 8x7B Instruct v0.1': False,
|
| 218 |
'LOCAL Mistral 7B Instruct v0.2': False,
|
| 219 |
|
|
|
|
| 310 |
|
| 311 |
|
| 312 |
### Mistral LOCAL
|
| 313 |
+
#LOCAL_MISTRAL_NEMO_INSTRUCT_2407 'LOCAL Mistral Nemo Instruct 2407 mistralai/Mistral-Nemo-Instruct-2407
|
| 314 |
+
elif key == 'LOCAL_MISTRAL_NEMO_INSTRUCT_2407':
|
| 315 |
+
return 'Mistral-Nemo-Instruct-2407'
|
| 316 |
+
|
| 317 |
elif key == 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01':
|
| 318 |
return 'Mixtral-8x7B-Instruct-v0.1'
|
| 319 |
|
| 320 |
elif key == 'LOCAL_MISTRAL_7B_INSTRUCT_V02':
|
| 321 |
+
return 'Mistral-7B-Instruct-v0.3'
|
| 322 |
|
| 323 |
### Mistral LOCAL CPU
|
| 324 |
elif key == 'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF':
|
vouchervision/utils_LLM_JSON_validation.py
CHANGED
|
@@ -12,8 +12,8 @@ def validate_and_align_JSON_keys_with_template(data, JSON_dict_structure):
|
|
| 12 |
data[key] = ''
|
| 13 |
elif isinstance(value, str):
|
| 14 |
if value.lower() in ['unknown','not provided', 'missing', 'na', 'none', 'n/a', 'null', 'unspecified',
|
| 15 |
-
'TBD',
|
| 16 |
-
'not provided in the text', 'not found in the text',
|
| 17 |
'not in the text', 'not provided', 'not found',
|
| 18 |
'not provided in the ocr', 'not found in the ocr',
|
| 19 |
'not in the ocr',
|
|
@@ -29,7 +29,7 @@ def validate_and_align_JSON_keys_with_template(data, JSON_dict_structure):
|
|
| 29 |
'not in the ocr text',
|
| 30 |
'Not provided in ocr text',
|
| 31 |
'not provided in ocr text',
|
| 32 |
-
'n/a n/a','n/a, n/a',
|
| 33 |
'n/a, n/a, n/a','n/a n/a, n/a','n/a, n/a n/a','n/a n/a n/a',
|
| 34 |
'n/a, n/a, n/a, n/a','n/a n/a n/a n/a','n/a n/a, n/a, n/a','n/a, n/a n/a, n/a','n/a, n/a, n/a n/a',
|
| 35 |
'n/a n/a n/a, n/a','n/a, n/a n/a n/a',
|
|
|
|
| 12 |
data[key] = ''
|
| 13 |
elif isinstance(value, str):
|
| 14 |
if value.lower() in ['unknown','not provided', 'missing', 'na', 'none', 'n/a', 'null', 'unspecified',
|
| 15 |
+
'TBD', 'tbd',
|
| 16 |
+
'not provided in the text', 'not found in the text', 'Not found in OCR text', 'not found in ocr text',
|
| 17 |
'not in the text', 'not provided', 'not found',
|
| 18 |
'not provided in the ocr', 'not found in the ocr',
|
| 19 |
'not in the ocr',
|
|
|
|
| 29 |
'not in the ocr text',
|
| 30 |
'Not provided in ocr text',
|
| 31 |
'not provided in ocr text',
|
| 32 |
+
'n/a n/a','n/a, n/a','Not applicable','not applicable',
|
| 33 |
'n/a, n/a, n/a','n/a n/a, n/a','n/a, n/a n/a','n/a n/a n/a',
|
| 34 |
'n/a, n/a, n/a, n/a','n/a n/a n/a n/a','n/a n/a, n/a, n/a','n/a, n/a n/a, n/a','n/a, n/a, n/a n/a',
|
| 35 |
'n/a n/a n/a, n/a','n/a, n/a n/a n/a',
|
vouchervision/utils_VoucherVision.py
CHANGED
|
@@ -164,7 +164,7 @@ class VoucherVision():
|
|
| 164 |
|
| 165 |
|
| 166 |
def map_dir_labels(self):
|
| 167 |
-
if self.cfg['leafmachine']['use_RGB_label_images']:
|
| 168 |
self.dir_labels = os.path.join(self.Dirs.save_per_annotation_class,'label')
|
| 169 |
else:
|
| 170 |
self.dir_labels = self.Dirs.save_original
|
|
@@ -353,7 +353,7 @@ class VoucherVision():
|
|
| 353 |
elif header.value == "path_to_crop":
|
| 354 |
sheet.cell(row=next_row, column=i, value=path_to_crop)
|
| 355 |
elif header.value == "path_to_original":
|
| 356 |
-
if self.cfg['leafmachine']['use_RGB_label_images']:
|
| 357 |
fname = os.path.basename(path_to_crop)
|
| 358 |
base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(path_to_crop))))
|
| 359 |
path_to_original = os.path.join(base, 'Original_Images', fname)
|
|
|
|
| 164 |
|
| 165 |
|
| 166 |
def map_dir_labels(self):
|
| 167 |
+
if self.cfg['leafmachine']['use_RGB_label_images'] in [1,2]:
|
| 168 |
self.dir_labels = os.path.join(self.Dirs.save_per_annotation_class,'label')
|
| 169 |
else:
|
| 170 |
self.dir_labels = self.Dirs.save_original
|
|
|
|
| 353 |
elif header.value == "path_to_crop":
|
| 354 |
sheet.cell(row=next_row, column=i, value=path_to_crop)
|
| 355 |
elif header.value == "path_to_original":
|
| 356 |
+
if self.cfg['leafmachine']['use_RGB_label_images'] in [1,2]:
|
| 357 |
fname = os.path.basename(path_to_crop)
|
| 358 |
base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(path_to_crop))))
|
| 359 |
path_to_original = os.path.join(base, 'Original_Images', fname)
|
vouchervision/utils_VoucherVision_parallel.py
CHANGED
|
@@ -704,8 +704,8 @@ class VoucherVision():
|
|
| 704 |
json_report.set_text(text_main='Sending batch to OCR and LLM')
|
| 705 |
|
| 706 |
num_files = len(self.img_paths)
|
| 707 |
-
|
| 708 |
-
num_threads = 128
|
| 709 |
counter = AtomicCounter()
|
| 710 |
|
| 711 |
# Setup for parallel execution
|
|
|
|
| 704 |
json_report.set_text(text_main='Sending batch to OCR and LLM')
|
| 705 |
|
| 706 |
num_files = len(self.img_paths)
|
| 707 |
+
num_threads = min(num_files, 128)
|
| 708 |
+
# num_threads = 128
|
| 709 |
counter = AtomicCounter()
|
| 710 |
|
| 711 |
# Setup for parallel execution
|