Safetensors
EarthSpeciesProject
NatureLM
Cheeky Sparrow commited on
Commit
240e8e1
·
1 Parent(s): b77c285

handler, req and inference config for ifnerence endpoint

Browse files
Files changed (3) hide show
  1. handler.py +44 -0
  2. inference.yml +61 -0
  3. requirements.txt +1 -0
handler.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ from NatureLM.models import NatureLM
3
+ from NatureLM.infer import Pipeline
4
+ import numpy as np
5
+
6
+ class EndpointHandler():
7
+ def __init__(self, path=""):
8
+ model = NatureLM.from_pretrained("EarthSpeciesProject/NatureLM-audio", device="cpu")
9
+ self.model = model.eval().to("cpu")
10
+ self.pipeline = Pipeline(model=self.model, cfg_path="inference.yml")
11
+
12
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
13
+ """
14
+ Process audio numpy arrays with NatureLM-audio model.
15
+
16
+ Args:
17
+ data: Dictionary containing:
18
+ - audio: numpy array of audio data
19
+ - query: Question to ask about the audio
20
+ - sample_rate: (optional) Audio sample rate, default 16000
21
+ """
22
+
23
+ audio = data.get("audio")
24
+ query = data.get("query", "")
25
+ sample_rate = data.get("sample_rate", 16000)
26
+
27
+ if audio is None:
28
+ return [{"error": "No audio data provided"}]
29
+
30
+ if not query:
31
+ return [{"error": "No query provided"}]
32
+
33
+ try:
34
+ # Run inference using the pipeline
35
+ results = self.pipeline(
36
+ audios=[audio],
37
+ queries=query,
38
+ input_sample_rate=sample_rate
39
+ )
40
+
41
+ return [{"result": results[0], "query": query}]
42
+
43
+ except Exception as e:
44
+ return [{"error": f"Error processing audio: {str(e)}"}]
inference.yml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ llama_path: "meta-llama/Meta-Llama-3.1-8B-Instruct"
3
+
4
+ freeze_beats: True
5
+
6
+ use_audio_Qformer: True
7
+ max_pooling: False
8
+ downsample_factor: 8
9
+ freeze_audio_QFormer: False
10
+ window_level_Qformer: True
11
+ num_audio_query_token: 1
12
+ second_per_window: 0.333333
13
+ second_stride: 0.333333
14
+
15
+ audio_llama_proj_model: ""
16
+ freeze_audio_llama_proj: False
17
+
18
+ lora: True
19
+ lora_rank: 32
20
+ lora_alpha: 32
21
+ lora_dropout: 0.1
22
+
23
+ prompt_template: "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
24
+ max_txt_len: 160
25
+ end_sym: <|end_of_text|>
26
+
27
+ beats_cfg:
28
+ input_patch_size: 16
29
+ embed_dim: 512
30
+ conv_bias: False
31
+ encoder_layers: 12
32
+ encoder_embed_dim: 768
33
+ encoder_ffn_embed_dim: 3072
34
+ encoder_attention_heads: 12
35
+ activation_fn: "gelu"
36
+ layer_wise_gradient_decay_ratio: 0.6
37
+ layer_norm_first: False
38
+ deep_norm: True
39
+ dropout: 0.0
40
+ attention_dropout: 0.0
41
+ activation_dropout: 0.0
42
+ encoder_layerdrop: 0.05
43
+ dropout_input: 0.0
44
+ conv_pos: 128
45
+ conv_pos_groups: 16
46
+ relative_position_embedding: True
47
+ num_buckets: 320
48
+ max_distance: 800
49
+ gru_rel_pos: True
50
+ finetuned_model: True
51
+ predictor_dropout: 0.0
52
+ predictor_class: 527
53
+
54
+ generate:
55
+ max_new_tokens: 300
56
+ num_beams: 2
57
+ do_sample: False
58
+ min_length: 1
59
+ temperature: 0.1
60
+ repetition_penalty: 1.0
61
+ length_penalty: 1.0
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ git+https://github.com/earthspecies/naturelm-audio.git