Hatef Otroshi commited on
Commit
9340724
·
0 Parent(s):

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +37 -0
  2. .gitignore +39 -0
  3. LICENSE +22 -0
  4. README.md +13 -0
  5. __init__.py +0 -0
  6. app.py +415 -0
  7. data/id_0.png +0 -0
  8. data/id_1.png +0 -0
  9. data/id_2.png +0 -0
  10. data/id_3.png +0 -0
  11. data/id_4.png +0 -0
  12. data/id_5.png +0 -0
  13. data/id_6_0.png +0 -0
  14. data/id_6_1.png +0 -0
  15. data/id_6_2.png +0 -0
  16. face_alignment/LICENSE +21 -0
  17. face_alignment/README.md +1 -0
  18. face_alignment/align.py +40 -0
  19. face_alignment/mtcnn.py +175 -0
  20. face_alignment/mtcnn_pytorch/.gitignore +3 -0
  21. face_alignment/mtcnn_pytorch/LICENSE +21 -0
  22. face_alignment/mtcnn_pytorch/README.md +26 -0
  23. face_alignment/mtcnn_pytorch/caffe_models/det1.caffemodel +3 -0
  24. face_alignment/mtcnn_pytorch/caffe_models/det1.prototxt +177 -0
  25. face_alignment/mtcnn_pytorch/caffe_models/det2.caffemodel +3 -0
  26. face_alignment/mtcnn_pytorch/caffe_models/det2.prototxt +228 -0
  27. face_alignment/mtcnn_pytorch/caffe_models/det3.caffemodel +3 -0
  28. face_alignment/mtcnn_pytorch/caffe_models/det3.prototxt +294 -0
  29. face_alignment/mtcnn_pytorch/caffe_models/det4.caffemodel +3 -0
  30. face_alignment/mtcnn_pytorch/caffe_models/det4.prototxt +995 -0
  31. face_alignment/mtcnn_pytorch/extract_weights_from_caffe_models.py +47 -0
  32. face_alignment/mtcnn_pytorch/get_aligned_face_from_mtcnn.ipynb +0 -0
  33. face_alignment/mtcnn_pytorch/refine_faces.ipynb +315 -0
  34. face_alignment/mtcnn_pytorch/src/__init__.py +2 -0
  35. face_alignment/mtcnn_pytorch/src/align_trans.py +304 -0
  36. face_alignment/mtcnn_pytorch/src/box_utils.py +238 -0
  37. face_alignment/mtcnn_pytorch/src/detector.py +131 -0
  38. face_alignment/mtcnn_pytorch/src/first_stage.py +99 -0
  39. face_alignment/mtcnn_pytorch/src/get_nets.py +169 -0
  40. face_alignment/mtcnn_pytorch/src/matlab_cp2tform.py +350 -0
  41. face_alignment/mtcnn_pytorch/src/visualization_utils.py +31 -0
  42. face_alignment/mtcnn_pytorch/src/weights/onet.npy +3 -0
  43. face_alignment/mtcnn_pytorch/src/weights/pnet.npy +3 -0
  44. face_alignment/mtcnn_pytorch/src/weights/rnet.npy +3 -0
  45. face_alignment/mtcnn_pytorch/test_on_images.ipynb +0 -0
  46. face_alignment/mtcnn_pytorch/try_mtcnn_step_by_step.ipynb +0 -0
  47. net.py +413 -0
  48. requirements.txt +7 -0
  49. static/idiap-black.png +0 -0
  50. static/idiap-white.png +0 -0
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.caffemodel filter=lfs diff=lfs merge=lfs -text
6
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
7
+ *.ftz filter=lfs diff=lfs merge=lfs -text
8
+ *.gz filter=lfs diff=lfs merge=lfs -text
9
+ *.h5 filter=lfs diff=lfs merge=lfs -text
10
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
11
+ *.joblib filter=lfs diff=lfs merge=lfs -text
12
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
13
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
14
+ *.model filter=lfs diff=lfs merge=lfs -text
15
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
16
+ *.npy filter=lfs diff=lfs merge=lfs -text
17
+ *.npz filter=lfs diff=lfs merge=lfs -text
18
+ *.onnx filter=lfs diff=lfs merge=lfs -text
19
+ *.ot filter=lfs diff=lfs merge=lfs -text
20
+ *.parquet filter=lfs diff=lfs merge=lfs -text
21
+ *.pb filter=lfs diff=lfs merge=lfs -text
22
+ *.pickle filter=lfs diff=lfs merge=lfs -text
23
+ *.pkl filter=lfs diff=lfs merge=lfs -text
24
+ *.pt filter=lfs diff=lfs merge=lfs -text
25
+ *.pth filter=lfs diff=lfs merge=lfs -text
26
+ *.rar filter=lfs diff=lfs merge=lfs -text
27
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
28
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
30
+ *.tar filter=lfs diff=lfs merge=lfs -text
31
+ *.tflite filter=lfs diff=lfs merge=lfs -text
32
+ *.tgz filter=lfs diff=lfs merge=lfs -text
33
+ *.wasm filter=lfs diff=lfs merge=lfs -text
34
+ *.xz filter=lfs diff=lfs merge=lfs -text
35
+ *.zip filter=lfs diff=lfs merge=lfs -text
36
+ *.zst filter=lfs diff=lfs merge=lfs -text
37
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
10
+ *.joblib filter=lfs diff=lfs merge=lfs -text
11
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ __pycache__
38
+ .gradio
39
+ models
LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Hatef OTROSHI SHAHREZA ([email protected])
4
+ Idiap Research Institute, Switzerland
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: HyperFace
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.33.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: HyperFace Demo
12
+ python_version: '3.11'
13
+ ---
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: 2025 Idiap Research Institute
2
+ # SPDX-FileContributor: Hatef Otroshi <[email protected]>
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """HyperFace demo"""
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+ import cv2
11
+ import gradio as gr
12
+ import numpy as np
13
+ import torch
14
+ import torch.nn.functional as F
15
+ from torchvision import transforms
16
+ from huggingface_hub import hf_hub_download
17
+
18
+ from title import title_css, title_with_logo
19
+
20
+ from face_alignment import align
21
+ from PIL import Image
22
+ import net
23
+
24
+
25
+ model_configs = {
26
+ "HyperFace-10k-LDM": {
27
+ "repo": "idiap/HyperFace-10k-LDM",
28
+ "filename": "HyperFace_10k_LDM.ckpt",
29
+ },
30
+ "HyperFace-10k-StyleGAN": {
31
+ "repo": "idiap/HyperFace-10k-StyleGAN",
32
+ "filename": "HyperFace_10k_StyleGAN.ckpt",
33
+ },
34
+ "HyperFace-50k-StyleGAN": {
35
+ "repo": "idiap/HyperFace-50k-StyleGAN",
36
+ "filename": "HyperFace_50k_StyleGAN.ckpt",
37
+ },
38
+ }
39
+
40
+
41
+ # ───────────────────────────────
42
+ # Data & models
43
+ # ───────────────────────────────
44
+ DATA_DIR = Path("data")
45
+ EXTS = (".jpg", ".jpeg", ".png", ".bmp", ".webp")
46
+ PRELOADED = sorted(p for p in DATA_DIR.iterdir() if p.suffix.lower() in EXTS)
47
+
48
+ HYPERFACE_MODELS = [
49
+ "HyperFace-10k-LDM",
50
+ "HyperFace-10k-StyleGAN",
51
+ "HyperFace-50k-StyleGAN",
52
+ ]
53
+
54
+ # ───────────────────────────────
55
+ # Styling (orange palette)
56
+ # ───────────────────────────────
57
+ PRIMARY = "#F97316"
58
+ PRIMARY_DARK = "#C2410C"
59
+ ACCENT_LIGHT = "#FFEAD2"
60
+ BG_LIGHT = "#FFFBF7"
61
+ CARD_BG_DARK = "#473f38"
62
+ BG_DARK = "#332a22"
63
+ TEXT_DARK = "#0F172A"
64
+ TEXT_LIGHT = "#f8fafc"
65
+
66
+ CSS = f"""
67
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
68
+
69
+ /* ─── palette ───────────────────────────────────────────── */
70
+ body, .gradio-container {{
71
+ font-family: 'Inter', sans-serif;
72
+ background: {BG_LIGHT};
73
+ color: {TEXT_DARK};
74
+ }}
75
+
76
+ a {{
77
+ color: {PRIMARY};
78
+ text-decoration: none;
79
+ font-weight: 600;
80
+ }}
81
+ a:hover {{ color: {PRIMARY_DARK}; }}
82
+
83
+ /* ─── headline ──────────────────────────────────────────── */
84
+ #titlebar {{
85
+ text-align: center;
86
+ margin-top: 2.4rem;
87
+ margin-bottom: .9rem;
88
+ }}
89
+
90
+ /* ─── card look ─────────────────────────────────────────── */
91
+ .gr-block,
92
+ .gr-box,
93
+ .gr-row,
94
+ #cite-wrapper {{
95
+ border: 1px solid #F8C89B;
96
+ border-radius: 10px;
97
+ background: #fff;
98
+ box-shadow: 0 3px 6px rgba(0, 0, 0, .05);
99
+ }}
100
+
101
+ .gr-gallery-item {{ background: #fff; }}
102
+
103
+ /* ─── controls / inputs ─────────────────────────────────── */
104
+ .gr-button-primary,
105
+ #copy-btn {{
106
+ background: linear-gradient(90deg, {PRIMARY} 0%, {PRIMARY_DARK} 100%);
107
+ border: none;
108
+ color: #fff;
109
+ border-radius: 6px;
110
+ font-weight: 600;
111
+ transition: transform .12s ease, box-shadow .12s ease;
112
+ }}
113
+
114
+ .gr-button-primary:hover,
115
+ #copy-btn:hover {{
116
+ transform: translateY(-2px);
117
+ box-shadow: 0 4px 12px rgba(249, 115, 22, .35);
118
+ }}
119
+
120
+ .gr-dropdown input {{
121
+ border: 1px solid {PRIMARY}99;
122
+ }}
123
+
124
+ .preview img,
125
+ .preview canvas {{ object-fit: contain !important; }}
126
+
127
+ /* ─── hero section ─────────────────────────────────────── */
128
+ #hero-wrapper {{ text-align: center; }}
129
+ #hero-badge {{
130
+ display: inline-block;
131
+ padding: .85rem 1.2rem;
132
+ border-radius: 8px;
133
+ background: {ACCENT_LIGHT};
134
+ border: 1px solid {PRIMARY}55;
135
+ font-size: .95rem;
136
+ font-weight: 600;
137
+ margin-bottom: .5rem;
138
+ }}
139
+
140
+ #hero-links {{
141
+ font-size: .95rem;
142
+ font-weight: 600;
143
+ margin-bottom: 1.6rem;
144
+ }}
145
+
146
+ #hero-links img {{
147
+ height: 22px;
148
+ vertical-align: middle;
149
+ margin-left: .55rem;
150
+ }}
151
+
152
+ /* ─── score area ───────────────────────────────────────── */
153
+ #score-area {{
154
+ text-align: center;
155
+ }}
156
+ .title-container {{
157
+ display: flex;
158
+ align-items: center;
159
+ gap: 12px;
160
+ justify-content: center;
161
+ margin-bottom: 10px;
162
+ text-align: center;
163
+ }}
164
+ .match-badge {{
165
+ display: inline-block;
166
+ padding: .35rem .9rem;
167
+ border-radius: 9999px;
168
+ font-weight: 600;
169
+ font-size: 1.25rem;
170
+ }}
171
+
172
+ /* ─── citation card ────────────────────────────────────── */
173
+ #cite-wrapper {{
174
+ position: relative;
175
+ padding: .9rem 1rem;
176
+ margin-top: 2rem;
177
+ }}
178
+
179
+ #cite-wrapper code {{
180
+ font-family: SFMono-Regular, Consolas, monospace;
181
+ font-size: .84rem;
182
+ white-space: pre-wrap;
183
+ color: {TEXT_DARK};
184
+ }}
185
+
186
+ #copy-btn {{
187
+ position: absolute;
188
+ top: .55rem;
189
+ right: .6rem;
190
+ padding: .18rem .7rem;
191
+ font-size: .72rem;
192
+ line-height: 1;
193
+ }}
194
+
195
+ /* ─── dark mode ────────────────────────────────────── */
196
+ .dark body,
197
+ .dark .gradio-container {{
198
+ background-color: {BG_DARK};
199
+ color: #e5e7eb;
200
+ }}
201
+
202
+ .dark .gr-block,
203
+ .dark .gr-box,
204
+ .dark .gr-row {{
205
+ background-color: {BG_DARK};
206
+ border: 1px solid #4b5563;
207
+ }}
208
+
209
+ .dark .gr-dropdown input {{
210
+ background-color: {BG_DARK};
211
+ color: #f1f5f9;
212
+ border: 1px solid {PRIMARY}aa;
213
+ }}
214
+
215
+ .dark #hero-badge {{
216
+ background: #334155;
217
+ border: 1px solid {PRIMARY}55;
218
+ color: #fefefe;
219
+ }}
220
+
221
+ .dark #cite-wrapper {{
222
+ background-color: {CARD_BG_DARK};
223
+
224
+ }}
225
+
226
+ .dark #bibtex {{
227
+ color: {TEXT_LIGHT} !important;
228
+ }}
229
+
230
+ .dark .card {{
231
+ background-color: {CARD_BG_DARK};
232
+ }}
233
+
234
+ /* ─── switch logo for light/dark theme ─────────────── */
235
+ .logo-dark {{ display: none; }}
236
+ .dark .logo-light {{ display: none; }}
237
+ .dark .logo-dark {{ display: inline; }}
238
+ """
239
+
240
+
241
+ FULL_CSS = CSS + title_css(TEXT_DARK, PRIMARY, PRIMARY_DARK, TEXT_LIGHT)
242
+
243
+
244
+ # ───────────────────────────────
245
+ # Torch / transforms
246
+ # ───────────────────────────────
247
+ def to_input(pil_rgb_image):
248
+ np_img = np.array(pil_rgb_image)
249
+ brg_img = ((np_img[:,:,::-1] / 255.) - 0.5) / 0.5
250
+ tensor = torch.tensor([brg_img.transpose(2,0,1)]).float()
251
+ return tensor
252
+
253
+
254
+ def get_face_rec_model(name: str) -> torch.nn.Module:
255
+ if name not in get_face_rec_model.cache:
256
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
257
+ model_path = hf_hub_download(
258
+ repo_id=model_configs[name]["repo"],
259
+ filename=model_configs[name]["filename"],
260
+ local_dir="models",
261
+ )
262
+
263
+ model = net.build_model(model_name='ir_50')
264
+ statedict = torch.load(model_path, map_location=device)['state_dict']
265
+ model_statedict = {key[6:]:val for key, val in statedict.items() if key.startswith('model.')}
266
+ model.load_state_dict(model_statedict)
267
+ model.eval()
268
+ model.to(device)
269
+
270
+ get_face_rec_model.cache[name] = model
271
+ return get_face_rec_model.cache[name]
272
+
273
+
274
+ get_face_rec_model.cache = {}
275
+
276
+
277
+ # ───────────────────────────────
278
+ # Helpers
279
+ # ───────────────────────────────
280
+ def _as_rgb(path: Path) -> np.ndarray:
281
+ return cv2.cvtColor(cv2.imread(str(path)), cv2.COLOR_BGR2RGB)
282
+
283
+
284
+ def badge(text: str, colour: str) -> str:
285
+ return f'<div class="match-badge" style="background:{colour}22;color:{colour}">{text}</div>'
286
+
287
+
288
+ # ───────────────────────────────
289
+ # Face comparison
290
+ # ───────────────────────────────
291
+ def compare(img_left, img_right, variant):
292
+ img_left = Image.fromarray(img_left).convert('RGB')
293
+ img_right = Image.fromarray(img_right).convert('RGB')
294
+
295
+ crop_a, crop_b = align.get_aligned_face(None, img_left), align.get_aligned_face(None, img_right)
296
+ if crop_a is None and crop_b is None:
297
+ return None, None, badge("No face detected", "#DC2626")
298
+ if crop_a is None:
299
+ return None, None, badge("No face in A", "#DC2626")
300
+ if crop_b is None:
301
+ return None, None, badge("No face in B", "#DC2626")
302
+ mdl = get_face_rec_model(variant)
303
+ dev = next(mdl.parameters()).device
304
+ with torch.no_grad():
305
+ ea = mdl(to_input(crop_a).to(dev))[0]
306
+ eb = mdl(to_input(crop_b).to(dev))[0]
307
+ pct = float(F.cosine_similarity(ea, eb).item() * 100)
308
+ pct = max(0, min(100, pct))
309
+ colour = "#15803D" if pct >= 70 else "#CA8A04" if pct >= 40 else "#DC2626"
310
+ return crop_a, crop_b, badge(f"{pct:.2f}% match", colour)
311
+
312
+
313
+ # ───────────────────────────────
314
+ # Static HTML
315
+ # ───────────────────────────────
316
+ TITLE_HTML = title_with_logo(
317
+ """<span class="brand">HyperFace:</span> Generating Synthetic Face Recognition Datasets by Exploring Face Embedding Hypersphere
318
+ """
319
+ )
320
+
321
+ HERO_HTML = f"""
322
+ <div id="hero-wrapper">
323
+ <div id="hero-links">
324
+ <a href="https://www.idiap.ch/paper/hyperface/">Project</a>&nbsp;•&nbsp;
325
+ <a href="https://openreview.net/pdf?id=4YzVF9isgD">Paper</a>&nbsp;•&nbsp;
326
+ <a href="https://arxiv.org/abs/2411.08470v2">arXiv</a>&nbsp;•&nbsp;
327
+ <a href="https://gitlab.idiap.ch/biometric/code.iclr2025_hyperface">Code</a>&nbsp;•&nbsp;
328
+ <a href="https://huggingface.co/collections/Idiap/hyperface-682485119ccbd3ba5c42bde1">Models</a>&nbsp;•&nbsp;
329
+ <a href="https://zenodo.org/records/15087238">Dataset</a>&nbsp;•&nbsp;
330
+ </div>
331
+ </div>
332
+ """
333
+
334
+
335
+ CITATION_HTML = """
336
+ <div id="cite-wrapper">
337
+ <button id="copy-btn" onclick="
338
+ navigator.clipboard.writeText(document.getElementById('bibtex').innerText)
339
+ .then(()=>{this.textContent='✔︎';setTimeout(()=>this.textContent='Copy',1500);});
340
+ ">Copy</button>
341
+ <code id="bibtex">
342
+ @inproceedings{shahreza2025hyperface,
343
+ title={HyperFace: Generating Synthetic Face Recognition Datasets by Exploring Face Embedding Hypersphere},
344
+ author={Hatef Otroshi Shahreza and S{\'e}bastien Marcel},
345
+ booktitle={The Thirteenth International Conference on Learning Representations},
346
+ year={2025}
347
+ }</code>
348
+ </div>
349
+ """
350
+
351
+ # ───────────────────────────────
352
+ # Gradio UI
353
+ # ───────────────────────────────
354
+ with gr.Blocks(css=FULL_CSS, title="HyperFace Demo") as demo:
355
+ gr.HTML(TITLE_HTML, elem_id="titlebar")
356
+ gr.HTML(HERO_HTML)
357
+
358
+ with gr.Row():
359
+ gal_a = gr.Gallery(
360
+ PRELOADED,
361
+ columns=[5],
362
+ height=120,
363
+ label="Image A",
364
+ object_fit="contain",
365
+ elem_classes="card",
366
+ )
367
+ gal_b = gr.Gallery(
368
+ PRELOADED,
369
+ columns=[5],
370
+ height=120,
371
+ label="Image B",
372
+ object_fit="contain",
373
+ elem_classes="card",
374
+ )
375
+
376
+ with gr.Row():
377
+
378
+ img_a = gr.Image(
379
+ type="numpy",
380
+ height=300,
381
+ label="Image A (click or drag-drop)",
382
+ interactive=True,
383
+ elem_classes="preview card",
384
+ )
385
+ img_b = gr.Image(
386
+ type="numpy",
387
+ height=300,
388
+ label="Image B (click or drag-drop)",
389
+ interactive=True,
390
+ elem_classes="preview card",
391
+ )
392
+
393
+ def _fill(evt: gr.SelectData):
394
+ return _as_rgb(PRELOADED[evt.index]) if evt.index is not None else None
395
+
396
+ gal_a.select(_fill, outputs=img_a)
397
+ gal_b.select(_fill, outputs=img_b)
398
+
399
+ variant_dd = gr.Dropdown(
400
+ HYPERFACE_MODELS, value="HyperFace-10k-LDM", label="Model variant", elem_classes="card"
401
+ )
402
+ btn = gr.Button("Compare", variant="primary")
403
+
404
+ with gr.Row():
405
+ out_a = gr.Image(label="Aligned A (112×112)", elem_classes="card")
406
+ out_b = gr.Image(label="Aligned B (112×112)", elem_classes="card")
407
+ score_html = gr.HTML(elem_id="score-area")
408
+
409
+ btn.click(compare, [img_a, img_b, variant_dd], [out_a, out_b, score_html])
410
+
411
+ gr.HTML(CITATION_HTML)
412
+
413
+ # ───────────────────────────────
414
+ if __name__ == "__main__":
415
+ demo.launch(share=True)
data/id_0.png ADDED
data/id_1.png ADDED
data/id_2.png ADDED
data/id_3.png ADDED
data/id_4.png ADDED
data/id_5.png ADDED
data/id_6_0.png ADDED
data/id_6_1.png ADDED
data/id_6_2.png ADDED
face_alignment/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Minchul Kim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
face_alignment/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Face alignment script is from [AdaFace](https://github.com/mk-minchul/AdaFace) repository: https://github.com/mk-minchul/AdaFace/tree/master/face_alignment
face_alignment/align.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ from face_alignment import mtcnn
5
+ import argparse
6
+ from PIL import Image
7
+ from tqdm import tqdm
8
+ import random
9
+ from datetime import datetime
10
+
11
+ import torch
12
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
+ mtcnn_model = mtcnn.MTCNN(device=device, crop_size=(112, 112))
14
+
15
+ def add_padding(pil_img, top, right, bottom, left, color=(0,0,0)):
16
+ width, height = pil_img.size
17
+ new_width = width + right + left
18
+ new_height = height + top + bottom
19
+ result = Image.new(pil_img.mode, (new_width, new_height), color)
20
+ result.paste(pil_img, (left, top))
21
+ return result
22
+
23
+ def get_aligned_face(image_path, rgb_pil_image=None):
24
+ if rgb_pil_image is None:
25
+ img = Image.open(image_path).convert('RGB')
26
+ else:
27
+ assert isinstance(rgb_pil_image, Image.Image), 'Face alignment module requires PIL image or path to the image'
28
+ img = rgb_pil_image
29
+ # find face
30
+ try:
31
+ bboxes, faces = mtcnn_model.align_multi(img, limit=1)
32
+ face = faces[0]
33
+ except Exception as e:
34
+ print('Face detection Failed due to error.')
35
+ print(e)
36
+ face = None
37
+
38
+ return face
39
+
40
+
face_alignment/mtcnn.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+ import numpy as np
3
+ import torch
4
+ from PIL import Image
5
+ from torch.autograd import Variable
6
+
7
+ import sys
8
+ import os
9
+
10
+ sys.path.insert(0, os.path.dirname(__file__))
11
+
12
+ from mtcnn_pytorch.src.get_nets import PNet, RNet, ONet
13
+ from mtcnn_pytorch.src.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
14
+ from mtcnn_pytorch.src.first_stage import run_first_stage
15
+ from mtcnn_pytorch.src.align_trans import get_reference_facial_points, warp_and_crop_face
16
+
17
+
18
+ class MTCNN():
19
+ def __init__(self, device: str = 'cuda:0', crop_size: Tuple[int, int] = (112, 112)):
20
+
21
+ assert device in ['cuda:0', 'cpu']
22
+ self.device = torch.device(device)
23
+ assert crop_size in [(112, 112), (96, 112)]
24
+ self.crop_size = crop_size
25
+
26
+ # change working dir to this file location to load npz files. Then switch back
27
+ cwd = os.getcwd()
28
+ os.chdir(os.path.dirname(__file__))
29
+
30
+ self.pnet = PNet().to(self.device)
31
+ self.rnet = RNet().to(self.device)
32
+ self.onet = ONet().to(self.device)
33
+ self.pnet.eval()
34
+ self.rnet.eval()
35
+ self.onet.eval()
36
+ self.refrence = get_reference_facial_points(default_square=crop_size[0] == crop_size[1])
37
+
38
+ self.min_face_size = 20
39
+ self.thresholds = [0.6,0.7,0.9]
40
+ self.nms_thresholds = [0.7, 0.7, 0.7]
41
+ self.factor = 0.85
42
+
43
+
44
+ os.chdir(cwd)
45
+
46
+ def align(self, img):
47
+ _, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
48
+ facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
49
+ warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
50
+ return Image.fromarray(warped_face)
51
+
52
+ def align_multi(self, img, limit=None):
53
+ boxes, landmarks = self.detect_faces(img, self.min_face_size, self.thresholds, self.nms_thresholds, self.factor)
54
+ if limit:
55
+ boxes = boxes[:limit]
56
+ landmarks = landmarks[:limit]
57
+ faces = []
58
+ for landmark in landmarks:
59
+ facial5points = [[landmark[j], landmark[j + 5]] for j in range(5)]
60
+ warped_face = warp_and_crop_face(np.array(img), facial5points, self.refrence, crop_size=self.crop_size)
61
+ faces.append(Image.fromarray(warped_face))
62
+ return boxes, faces
63
+
64
+ def detect_faces(self, image, min_face_size, thresholds, nms_thresholds, factor):
65
+ """
66
+ Arguments:
67
+ image: an instance of PIL.Image.
68
+ min_face_size: a float number.
69
+ thresholds: a list of length 3.
70
+ nms_thresholds: a list of length 3.
71
+
72
+ Returns:
73
+ two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
74
+ bounding boxes and facial landmarks.
75
+ """
76
+
77
+ # BUILD AN IMAGE PYRAMID
78
+ width, height = image.size
79
+ min_length = min(height, width)
80
+
81
+ min_detection_size = 12
82
+ # factor = 0.707 # sqrt(0.5)
83
+
84
+ # scales for scaling the image
85
+ scales = []
86
+
87
+ # scales the image so that
88
+ # minimum size that we can detect equals to
89
+ # minimum face size that we want to detect
90
+ m = min_detection_size / min_face_size
91
+ min_length *= m
92
+
93
+ factor_count = 0
94
+ while min_length > min_detection_size:
95
+ scales.append(m * factor**factor_count)
96
+ min_length *= factor
97
+ factor_count += 1
98
+
99
+ # STAGE 1
100
+
101
+ # it will be returned
102
+ bounding_boxes = []
103
+
104
+ with torch.no_grad():
105
+ # run P-Net on different scales
106
+ for s in scales:
107
+ boxes = run_first_stage(image, self.pnet, scale=s, threshold=thresholds[0])
108
+ bounding_boxes.append(boxes)
109
+
110
+ # collect boxes (and offsets, and scores) from different scales
111
+ bounding_boxes = [i for i in bounding_boxes if i is not None]
112
+ if len(bounding_boxes) == 0:
113
+ return [], []
114
+ bounding_boxes = np.vstack(bounding_boxes)
115
+
116
+ keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
117
+ bounding_boxes = bounding_boxes[keep]
118
+
119
+ # use offsets predicted by pnet to transform bounding boxes
120
+ bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
121
+ # shape [n_boxes, 5]
122
+
123
+ bounding_boxes = convert_to_square(bounding_boxes)
124
+ bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
125
+
126
+ # STAGE 2
127
+
128
+ img_boxes = get_image_boxes(bounding_boxes, image, size=24)
129
+ img_boxes = torch.FloatTensor(img_boxes).to(self.device)
130
+
131
+ output = self.rnet(img_boxes)
132
+ offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4]
133
+ probs = output[1].cpu().data.numpy() # shape [n_boxes, 2]
134
+
135
+ keep = np.where(probs[:, 1] > thresholds[1])[0]
136
+ bounding_boxes = bounding_boxes[keep]
137
+ bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
138
+ offsets = offsets[keep]
139
+
140
+ keep = nms(bounding_boxes, nms_thresholds[1])
141
+ bounding_boxes = bounding_boxes[keep]
142
+ bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
143
+ bounding_boxes = convert_to_square(bounding_boxes)
144
+ bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
145
+
146
+ # STAGE 3
147
+
148
+ img_boxes = get_image_boxes(bounding_boxes, image, size=48)
149
+ if len(img_boxes) == 0:
150
+ return [], []
151
+ img_boxes = torch.FloatTensor(img_boxes).to(self.device)
152
+ output = self.onet(img_boxes)
153
+ landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10]
154
+ offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4]
155
+ probs = output[2].cpu().data.numpy() # shape [n_boxes, 2]
156
+
157
+ keep = np.where(probs[:, 1] > thresholds[2])[0]
158
+ bounding_boxes = bounding_boxes[keep]
159
+ bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
160
+ offsets = offsets[keep]
161
+ landmarks = landmarks[keep]
162
+
163
+ # compute landmark points
164
+ width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
165
+ height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
166
+ xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
167
+ landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
168
+ landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
169
+
170
+ bounding_boxes = calibrate_box(bounding_boxes, offsets)
171
+ keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
172
+ bounding_boxes = bounding_boxes[keep]
173
+ landmarks = landmarks[keep]
174
+
175
+ return bounding_boxes, landmarks
face_alignment/mtcnn_pytorch/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .ipynb_checkpoints
2
+ __pycache__
3
+
face_alignment/mtcnn_pytorch/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Dan Antoshchenko
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
face_alignment/mtcnn_pytorch/README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MTCNN
2
+
3
+ `pytorch` implementation of **inference stage** of face detection algorithm described in
4
+ [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://arxiv.org/abs/1604.02878).
5
+
6
+ ## Example
7
+ ![example of a face detection](images/example.png)
8
+
9
+ ## How to use it
10
+ Just download the repository and then do this
11
+ ```python
12
+ from src import detect_faces
13
+ from PIL import Image
14
+
15
+ image = Image.open('image.jpg')
16
+ bounding_boxes, landmarks = detect_faces(image)
17
+ ```
18
+ For examples see `test_on_images.ipynb`.
19
+
20
+ ## Requirements
21
+ * pytorch 0.2
22
+ * Pillow, numpy
23
+
24
+ ## Credit
25
+ This implementation is heavily inspired by:
26
+ * [pangyupo/mxnet_mtcnn_face_detection](https://github.com/pangyupo/mxnet_mtcnn_face_detection)
face_alignment/mtcnn_pytorch/caffe_models/det1.caffemodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6085e7f48ba7e6b6f1b58964595f6bce5b97bcc4866751f7b4bdc98f920c096
3
+ size 28163
face_alignment/mtcnn_pytorch/caffe_models/det1.prototxt ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "PNet"
2
+ input: "data"
3
+ input_dim: 1
4
+ input_dim: 3
5
+ input_dim: 12
6
+ input_dim: 12
7
+
8
+ layer {
9
+ name: "conv1"
10
+ type: "Convolution"
11
+ bottom: "data"
12
+ top: "conv1"
13
+ param {
14
+ lr_mult: 1
15
+ decay_mult: 1
16
+ }
17
+ param {
18
+ lr_mult: 2
19
+ decay_mult: 0
20
+ }
21
+ convolution_param {
22
+ num_output: 10
23
+ kernel_size: 3
24
+ stride: 1
25
+ weight_filler {
26
+ type: "xavier"
27
+ }
28
+ bias_filler {
29
+ type: "constant"
30
+ value: 0
31
+ }
32
+ }
33
+ }
34
+ layer {
35
+ name: "PReLU1"
36
+ type: "PReLU"
37
+ bottom: "conv1"
38
+ top: "conv1"
39
+ }
40
+ layer {
41
+ name: "pool1"
42
+ type: "Pooling"
43
+ bottom: "conv1"
44
+ top: "pool1"
45
+ pooling_param {
46
+ pool: MAX
47
+ kernel_size: 2
48
+ stride: 2
49
+ }
50
+ }
51
+
52
+ layer {
53
+ name: "conv2"
54
+ type: "Convolution"
55
+ bottom: "pool1"
56
+ top: "conv2"
57
+ param {
58
+ lr_mult: 1
59
+ decay_mult: 1
60
+ }
61
+ param {
62
+ lr_mult: 2
63
+ decay_mult: 0
64
+ }
65
+ convolution_param {
66
+ num_output: 16
67
+ kernel_size: 3
68
+ stride: 1
69
+ weight_filler {
70
+ type: "xavier"
71
+ }
72
+ bias_filler {
73
+ type: "constant"
74
+ value: 0
75
+ }
76
+ }
77
+ }
78
+ layer {
79
+ name: "PReLU2"
80
+ type: "PReLU"
81
+ bottom: "conv2"
82
+ top: "conv2"
83
+ }
84
+
85
+ layer {
86
+ name: "conv3"
87
+ type: "Convolution"
88
+ bottom: "conv2"
89
+ top: "conv3"
90
+ param {
91
+ lr_mult: 1
92
+ decay_mult: 1
93
+ }
94
+ param {
95
+ lr_mult: 2
96
+ decay_mult: 0
97
+ }
98
+ convolution_param {
99
+ num_output: 32
100
+ kernel_size: 3
101
+ stride: 1
102
+ weight_filler {
103
+ type: "xavier"
104
+ }
105
+ bias_filler {
106
+ type: "constant"
107
+ value: 0
108
+ }
109
+ }
110
+ }
111
+ layer {
112
+ name: "PReLU3"
113
+ type: "PReLU"
114
+ bottom: "conv3"
115
+ top: "conv3"
116
+ }
117
+
118
+
119
+ layer {
120
+ name: "conv4-1"
121
+ type: "Convolution"
122
+ bottom: "conv3"
123
+ top: "conv4-1"
124
+ param {
125
+ lr_mult: 1
126
+ decay_mult: 1
127
+ }
128
+ param {
129
+ lr_mult: 2
130
+ decay_mult: 0
131
+ }
132
+ convolution_param {
133
+ num_output: 2
134
+ kernel_size: 1
135
+ stride: 1
136
+ weight_filler {
137
+ type: "xavier"
138
+ }
139
+ bias_filler {
140
+ type: "constant"
141
+ value: 0
142
+ }
143
+ }
144
+ }
145
+
146
+ layer {
147
+ name: "conv4-2"
148
+ type: "Convolution"
149
+ bottom: "conv3"
150
+ top: "conv4-2"
151
+ param {
152
+ lr_mult: 1
153
+ decay_mult: 1
154
+ }
155
+ param {
156
+ lr_mult: 2
157
+ decay_mult: 0
158
+ }
159
+ convolution_param {
160
+ num_output: 4
161
+ kernel_size: 1
162
+ stride: 1
163
+ weight_filler {
164
+ type: "xavier"
165
+ }
166
+ bias_filler {
167
+ type: "constant"
168
+ value: 0
169
+ }
170
+ }
171
+ }
172
+ layer {
173
+ name: "prob1"
174
+ type: "Softmax"
175
+ bottom: "conv4-1"
176
+ top: "prob1"
177
+ }
face_alignment/mtcnn_pytorch/caffe_models/det2.caffemodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39b20f7a57bb8176cc9466cea4dfd52da6a6f876de60c7ab222a309f2d0ca08c
3
+ size 407910
face_alignment/mtcnn_pytorch/caffe_models/det2.prototxt ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "RNet"
2
+ input: "data"
3
+ input_dim: 1
4
+ input_dim: 3
5
+ input_dim: 24
6
+ input_dim: 24
7
+
8
+
9
+ ##########################
10
+ ######################
11
+ layer {
12
+ name: "conv1"
13
+ type: "Convolution"
14
+ bottom: "data"
15
+ top: "conv1"
16
+ param {
17
+ lr_mult: 0
18
+ decay_mult: 0
19
+ }
20
+ param {
21
+ lr_mult: 0
22
+ decay_mult: 0
23
+ }
24
+ convolution_param {
25
+ num_output: 28
26
+ kernel_size: 3
27
+ stride: 1
28
+ weight_filler {
29
+ type: "xavier"
30
+ }
31
+ bias_filler {
32
+ type: "constant"
33
+ value: 0
34
+ }
35
+ }
36
+ }
37
+ layer {
38
+ name: "prelu1"
39
+ type: "PReLU"
40
+ bottom: "conv1"
41
+ top: "conv1"
42
+ propagate_down: true
43
+ }
44
+ layer {
45
+ name: "pool1"
46
+ type: "Pooling"
47
+ bottom: "conv1"
48
+ top: "pool1"
49
+ pooling_param {
50
+ pool: MAX
51
+ kernel_size: 3
52
+ stride: 2
53
+ }
54
+ }
55
+
56
+ layer {
57
+ name: "conv2"
58
+ type: "Convolution"
59
+ bottom: "pool1"
60
+ top: "conv2"
61
+ param {
62
+ lr_mult: 0
63
+ decay_mult: 0
64
+ }
65
+ param {
66
+ lr_mult: 0
67
+ decay_mult: 0
68
+ }
69
+ convolution_param {
70
+ num_output: 48
71
+ kernel_size: 3
72
+ stride: 1
73
+ weight_filler {
74
+ type: "xavier"
75
+ }
76
+ bias_filler {
77
+ type: "constant"
78
+ value: 0
79
+ }
80
+ }
81
+ }
82
+ layer {
83
+ name: "prelu2"
84
+ type: "PReLU"
85
+ bottom: "conv2"
86
+ top: "conv2"
87
+ propagate_down: true
88
+ }
89
+ layer {
90
+ name: "pool2"
91
+ type: "Pooling"
92
+ bottom: "conv2"
93
+ top: "pool2"
94
+ pooling_param {
95
+ pool: MAX
96
+ kernel_size: 3
97
+ stride: 2
98
+ }
99
+ }
100
+ ####################################
101
+
102
+ ##################################
103
+ layer {
104
+ name: "conv3"
105
+ type: "Convolution"
106
+ bottom: "pool2"
107
+ top: "conv3"
108
+ param {
109
+ lr_mult: 0
110
+ decay_mult: 0
111
+ }
112
+ param {
113
+ lr_mult: 0
114
+ decay_mult: 0
115
+ }
116
+ convolution_param {
117
+ num_output: 64
118
+ kernel_size: 2
119
+ stride: 1
120
+ weight_filler {
121
+ type: "xavier"
122
+ }
123
+ bias_filler {
124
+ type: "constant"
125
+ value: 0
126
+ }
127
+ }
128
+ }
129
+ layer {
130
+ name: "prelu3"
131
+ type: "PReLU"
132
+ bottom: "conv3"
133
+ top: "conv3"
134
+ propagate_down: true
135
+ }
136
+ ###############################
137
+
138
+ ###############################
139
+
140
+ layer {
141
+ name: "conv4"
142
+ type: "InnerProduct"
143
+ bottom: "conv3"
144
+ top: "conv4"
145
+ param {
146
+ lr_mult: 0
147
+ decay_mult: 0
148
+ }
149
+ param {
150
+ lr_mult: 0
151
+ decay_mult: 0
152
+ }
153
+ inner_product_param {
154
+ num_output: 128
155
+ weight_filler {
156
+ type: "xavier"
157
+ }
158
+ bias_filler {
159
+ type: "constant"
160
+ value: 0
161
+ }
162
+ }
163
+ }
164
+ layer {
165
+ name: "prelu4"
166
+ type: "PReLU"
167
+ bottom: "conv4"
168
+ top: "conv4"
169
+ }
170
+
171
+ layer {
172
+ name: "conv5-1"
173
+ type: "InnerProduct"
174
+ bottom: "conv4"
175
+ top: "conv5-1"
176
+ param {
177
+ lr_mult: 0
178
+ decay_mult: 0
179
+ }
180
+ param {
181
+ lr_mult: 0
182
+ decay_mult: 0
183
+ }
184
+ inner_product_param {
185
+ num_output: 2
186
+ #kernel_size: 1
187
+ #stride: 1
188
+ weight_filler {
189
+ type: "xavier"
190
+ }
191
+ bias_filler {
192
+ type: "constant"
193
+ value: 0
194
+ }
195
+ }
196
+ }
197
+ layer {
198
+ name: "conv5-2"
199
+ type: "InnerProduct"
200
+ bottom: "conv4"
201
+ top: "conv5-2"
202
+ param {
203
+ lr_mult: 1
204
+ decay_mult: 1
205
+ }
206
+ param {
207
+ lr_mult: 2
208
+ decay_mult: 1
209
+ }
210
+ inner_product_param {
211
+ num_output: 4
212
+ #kernel_size: 1
213
+ #stride: 1
214
+ weight_filler {
215
+ type: "xavier"
216
+ }
217
+ bias_filler {
218
+ type: "constant"
219
+ value: 0
220
+ }
221
+ }
222
+ }
223
+ layer {
224
+ name: "prob1"
225
+ type: "Softmax"
226
+ bottom: "conv5-1"
227
+ top: "prob1"
228
+ }
face_alignment/mtcnn_pytorch/caffe_models/det3.caffemodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d6098829a4d6d318f37cec42142465637fafe4c673f2e93b69495bf7ca23d2d
3
+ size 1558412
face_alignment/mtcnn_pytorch/caffe_models/det3.prototxt ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "ONet"
2
+ input: "data"
3
+ input_dim: 1
4
+ input_dim: 3
5
+ input_dim: 48
6
+ input_dim: 48
7
+ ##################################
8
+ layer {
9
+ name: "conv1"
10
+ type: "Convolution"
11
+ bottom: "data"
12
+ top: "conv1"
13
+ param {
14
+ lr_mult: 1
15
+ decay_mult: 1
16
+ }
17
+ param {
18
+ lr_mult: 2
19
+ decay_mult: 1
20
+ }
21
+ convolution_param {
22
+ num_output: 32
23
+ kernel_size: 3
24
+ stride: 1
25
+ weight_filler {
26
+ type: "xavier"
27
+ }
28
+ bias_filler {
29
+ type: "constant"
30
+ value: 0
31
+ }
32
+ }
33
+ }
34
+ layer {
35
+ name: "prelu1"
36
+ type: "PReLU"
37
+ bottom: "conv1"
38
+ top: "conv1"
39
+ }
40
+ layer {
41
+ name: "pool1"
42
+ type: "Pooling"
43
+ bottom: "conv1"
44
+ top: "pool1"
45
+ pooling_param {
46
+ pool: MAX
47
+ kernel_size: 3
48
+ stride: 2
49
+ }
50
+ }
51
+ layer {
52
+ name: "conv2"
53
+ type: "Convolution"
54
+ bottom: "pool1"
55
+ top: "conv2"
56
+ param {
57
+ lr_mult: 1
58
+ decay_mult: 1
59
+ }
60
+ param {
61
+ lr_mult: 2
62
+ decay_mult: 1
63
+ }
64
+ convolution_param {
65
+ num_output: 64
66
+ kernel_size: 3
67
+ stride: 1
68
+ weight_filler {
69
+ type: "xavier"
70
+ }
71
+ bias_filler {
72
+ type: "constant"
73
+ value: 0
74
+ }
75
+ }
76
+ }
77
+
78
+ layer {
79
+ name: "prelu2"
80
+ type: "PReLU"
81
+ bottom: "conv2"
82
+ top: "conv2"
83
+ }
84
+ layer {
85
+ name: "pool2"
86
+ type: "Pooling"
87
+ bottom: "conv2"
88
+ top: "pool2"
89
+ pooling_param {
90
+ pool: MAX
91
+ kernel_size: 3
92
+ stride: 2
93
+ }
94
+ }
95
+
96
+ layer {
97
+ name: "conv3"
98
+ type: "Convolution"
99
+ bottom: "pool2"
100
+ top: "conv3"
101
+ param {
102
+ lr_mult: 1
103
+ decay_mult: 1
104
+ }
105
+ param {
106
+ lr_mult: 2
107
+ decay_mult: 1
108
+ }
109
+ convolution_param {
110
+ num_output: 64
111
+ kernel_size: 3
112
+ weight_filler {
113
+ type: "xavier"
114
+ }
115
+ bias_filler {
116
+ type: "constant"
117
+ value: 0
118
+ }
119
+ }
120
+ }
121
+ layer {
122
+ name: "prelu3"
123
+ type: "PReLU"
124
+ bottom: "conv3"
125
+ top: "conv3"
126
+ }
127
+ layer {
128
+ name: "pool3"
129
+ type: "Pooling"
130
+ bottom: "conv3"
131
+ top: "pool3"
132
+ pooling_param {
133
+ pool: MAX
134
+ kernel_size: 2
135
+ stride: 2
136
+ }
137
+ }
138
+ layer {
139
+ name: "conv4"
140
+ type: "Convolution"
141
+ bottom: "pool3"
142
+ top: "conv4"
143
+ param {
144
+ lr_mult: 1
145
+ decay_mult: 1
146
+ }
147
+ param {
148
+ lr_mult: 2
149
+ decay_mult: 1
150
+ }
151
+ convolution_param {
152
+ num_output: 128
153
+ kernel_size: 2
154
+ weight_filler {
155
+ type: "xavier"
156
+ }
157
+ bias_filler {
158
+ type: "constant"
159
+ value: 0
160
+ }
161
+ }
162
+ }
163
+ layer {
164
+ name: "prelu4"
165
+ type: "PReLU"
166
+ bottom: "conv4"
167
+ top: "conv4"
168
+ }
169
+
170
+
171
+ layer {
172
+ name: "conv5"
173
+ type: "InnerProduct"
174
+ bottom: "conv4"
175
+ top: "conv5"
176
+ param {
177
+ lr_mult: 1
178
+ decay_mult: 1
179
+ }
180
+ param {
181
+ lr_mult: 2
182
+ decay_mult: 1
183
+ }
184
+ inner_product_param {
185
+ #kernel_size: 3
186
+ num_output: 256
187
+ weight_filler {
188
+ type: "xavier"
189
+ }
190
+ bias_filler {
191
+ type: "constant"
192
+ value: 0
193
+ }
194
+ }
195
+ }
196
+
197
+ layer {
198
+ name: "drop5"
199
+ type: "Dropout"
200
+ bottom: "conv5"
201
+ top: "conv5"
202
+ dropout_param {
203
+ dropout_ratio: 0.25
204
+ }
205
+ }
206
+ layer {
207
+ name: "prelu5"
208
+ type: "PReLU"
209
+ bottom: "conv5"
210
+ top: "conv5"
211
+ }
212
+
213
+
214
+ layer {
215
+ name: "conv6-1"
216
+ type: "InnerProduct"
217
+ bottom: "conv5"
218
+ top: "conv6-1"
219
+ param {
220
+ lr_mult: 1
221
+ decay_mult: 1
222
+ }
223
+ param {
224
+ lr_mult: 2
225
+ decay_mult: 1
226
+ }
227
+ inner_product_param {
228
+ #kernel_size: 1
229
+ num_output: 2
230
+ weight_filler {
231
+ type: "xavier"
232
+ }
233
+ bias_filler {
234
+ type: "constant"
235
+ value: 0
236
+ }
237
+ }
238
+ }
239
+ layer {
240
+ name: "conv6-2"
241
+ type: "InnerProduct"
242
+ bottom: "conv5"
243
+ top: "conv6-2"
244
+ param {
245
+ lr_mult: 1
246
+ decay_mult: 1
247
+ }
248
+ param {
249
+ lr_mult: 2
250
+ decay_mult: 1
251
+ }
252
+ inner_product_param {
253
+ #kernel_size: 1
254
+ num_output: 4
255
+ weight_filler {
256
+ type: "xavier"
257
+ }
258
+ bias_filler {
259
+ type: "constant"
260
+ value: 0
261
+ }
262
+ }
263
+ }
264
+ layer {
265
+ name: "conv6-3"
266
+ type: "InnerProduct"
267
+ bottom: "conv5"
268
+ top: "conv6-3"
269
+ param {
270
+ lr_mult: 1
271
+ decay_mult: 1
272
+ }
273
+ param {
274
+ lr_mult: 2
275
+ decay_mult: 1
276
+ }
277
+ inner_product_param {
278
+ #kernel_size: 1
279
+ num_output: 10
280
+ weight_filler {
281
+ type: "xavier"
282
+ }
283
+ bias_filler {
284
+ type: "constant"
285
+ value: 0
286
+ }
287
+ }
288
+ }
289
+ layer {
290
+ name: "prob1"
291
+ type: "Softmax"
292
+ bottom: "conv6-1"
293
+ top: "prob1"
294
+ }
face_alignment/mtcnn_pytorch/caffe_models/det4.caffemodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23dfa3f889a8dd5d1ffe7429229270892bcb19221ede13aaac8896ea060bfb76
3
+ size 3798152
face_alignment/mtcnn_pytorch/caffe_models/det4.prototxt ADDED
@@ -0,0 +1,995 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "LNet"
2
+ input: "data"
3
+ input_dim: 1
4
+ input_dim: 15
5
+ input_dim: 24
6
+ input_dim: 24
7
+
8
+ layer {
9
+ name: "slicer_data"
10
+ type: "Slice"
11
+ bottom: "data"
12
+ top: "data241"
13
+ top: "data242"
14
+ top: "data243"
15
+ top: "data244"
16
+ top: "data245"
17
+ slice_param {
18
+ axis: 1
19
+ slice_point: 3
20
+ slice_point: 6
21
+ slice_point: 9
22
+ slice_point: 12
23
+ }
24
+ }
25
+ layer {
26
+ name: "conv1_1"
27
+ type: "Convolution"
28
+ bottom: "data241"
29
+ top: "conv1_1"
30
+ param {
31
+ lr_mult: 1
32
+ decay_mult: 1
33
+ }
34
+ param {
35
+ lr_mult: 2
36
+ decay_mult: 1
37
+ }
38
+ convolution_param {
39
+ num_output: 28
40
+ kernel_size: 3
41
+ stride: 1
42
+ weight_filler {
43
+ type: "xavier"
44
+ }
45
+ bias_filler {
46
+ type: "constant"
47
+ value: 0
48
+ }
49
+ }
50
+
51
+ }
52
+ layer {
53
+ name: "prelu1_1"
54
+ type: "PReLU"
55
+ bottom: "conv1_1"
56
+ top: "conv1_1"
57
+
58
+ }
59
+ layer {
60
+ name: "pool1_1"
61
+ type: "Pooling"
62
+ bottom: "conv1_1"
63
+ top: "pool1_1"
64
+ pooling_param {
65
+ pool: MAX
66
+ kernel_size: 3
67
+ stride: 2
68
+ }
69
+ }
70
+
71
+ layer {
72
+ name: "conv2_1"
73
+ type: "Convolution"
74
+ bottom: "pool1_1"
75
+ top: "conv2_1"
76
+ param {
77
+ lr_mult: 1
78
+ decay_mult: 1
79
+ }
80
+ param {
81
+ lr_mult: 2
82
+ decay_mult: 1
83
+ }
84
+ convolution_param {
85
+ num_output: 48
86
+ kernel_size: 3
87
+ stride: 1
88
+ weight_filler {
89
+ type: "xavier"
90
+ }
91
+ bias_filler {
92
+ type: "constant"
93
+ value: 0
94
+ }
95
+ }
96
+
97
+ }
98
+ layer {
99
+ name: "prelu2_1"
100
+ type: "PReLU"
101
+ bottom: "conv2_1"
102
+ top: "conv2_1"
103
+ }
104
+ layer {
105
+ name: "pool2_1"
106
+ type: "Pooling"
107
+ bottom: "conv2_1"
108
+ top: "pool2_1"
109
+ pooling_param {
110
+ pool: MAX
111
+ kernel_size: 3
112
+ stride: 2
113
+ }
114
+
115
+ }
116
+ layer {
117
+ name: "conv3_1"
118
+ type: "Convolution"
119
+ bottom: "pool2_1"
120
+ top: "conv3_1"
121
+ param {
122
+ lr_mult: 1
123
+ decay_mult: 1
124
+ }
125
+ param {
126
+ lr_mult: 2
127
+ decay_mult: 1
128
+ }
129
+ convolution_param {
130
+ num_output: 64
131
+ kernel_size: 2
132
+ stride: 1
133
+ weight_filler {
134
+ type: "xavier"
135
+ }
136
+ bias_filler {
137
+ type: "constant"
138
+ value: 0
139
+ }
140
+ }
141
+
142
+ }
143
+ layer {
144
+ name: "prelu3_1"
145
+ type: "PReLU"
146
+ bottom: "conv3_1"
147
+ top: "conv3_1"
148
+ }
149
+ ##########################
150
+ layer {
151
+ name: "conv1_2"
152
+ type: "Convolution"
153
+ bottom: "data242"
154
+ top: "conv1_2"
155
+ param {
156
+ lr_mult: 1
157
+ decay_mult: 1
158
+ }
159
+ param {
160
+ lr_mult: 2
161
+ decay_mult: 1
162
+ }
163
+ convolution_param {
164
+ num_output: 28
165
+ kernel_size: 3
166
+ stride: 1
167
+ weight_filler {
168
+ type: "xavier"
169
+ }
170
+ bias_filler {
171
+ type: "constant"
172
+ value: 0
173
+ }
174
+ }
175
+
176
+ }
177
+ layer {
178
+ name: "prelu1_2"
179
+ type: "PReLU"
180
+ bottom: "conv1_2"
181
+ top: "conv1_2"
182
+
183
+ }
184
+ layer {
185
+ name: "pool1_2"
186
+ type: "Pooling"
187
+ bottom: "conv1_2"
188
+ top: "pool1_2"
189
+ pooling_param {
190
+ pool: MAX
191
+ kernel_size: 3
192
+ stride: 2
193
+ }
194
+ }
195
+
196
+ layer {
197
+ name: "conv2_2"
198
+ type: "Convolution"
199
+ bottom: "pool1_2"
200
+ top: "conv2_2"
201
+ param {
202
+ lr_mult: 1
203
+ decay_mult: 1
204
+ }
205
+ param {
206
+ lr_mult: 2
207
+ decay_mult: 1
208
+ }
209
+ convolution_param {
210
+ num_output: 48
211
+ kernel_size: 3
212
+ stride: 1
213
+ weight_filler {
214
+ type: "xavier"
215
+ }
216
+ bias_filler {
217
+ type: "constant"
218
+ value: 0
219
+ }
220
+ }
221
+
222
+ }
223
+ layer {
224
+ name: "prelu2_2"
225
+ type: "PReLU"
226
+ bottom: "conv2_2"
227
+ top: "conv2_2"
228
+ }
229
+ layer {
230
+ name: "pool2_2"
231
+ type: "Pooling"
232
+ bottom: "conv2_2"
233
+ top: "pool2_2"
234
+ pooling_param {
235
+ pool: MAX
236
+ kernel_size: 3
237
+ stride: 2
238
+ }
239
+
240
+ }
241
+ layer {
242
+ name: "conv3_2"
243
+ type: "Convolution"
244
+ bottom: "pool2_2"
245
+ top: "conv3_2"
246
+ param {
247
+ lr_mult: 1
248
+ decay_mult: 1
249
+ }
250
+ param {
251
+ lr_mult: 2
252
+ decay_mult: 1
253
+ }
254
+ convolution_param {
255
+ num_output: 64
256
+ kernel_size: 2
257
+ stride: 1
258
+ weight_filler {
259
+ type: "xavier"
260
+ }
261
+ bias_filler {
262
+ type: "constant"
263
+ value: 0
264
+ }
265
+ }
266
+
267
+ }
268
+ layer {
269
+ name: "prelu3_2"
270
+ type: "PReLU"
271
+ bottom: "conv3_2"
272
+ top: "conv3_2"
273
+ }
274
+ ##########################
275
+ ##########################
276
+ layer {
277
+ name: "conv1_3"
278
+ type: "Convolution"
279
+ bottom: "data243"
280
+ top: "conv1_3"
281
+ param {
282
+ lr_mult: 1
283
+ decay_mult: 1
284
+ }
285
+ param {
286
+ lr_mult: 2
287
+ decay_mult: 1
288
+ }
289
+ convolution_param {
290
+ num_output: 28
291
+ kernel_size: 3
292
+ stride: 1
293
+ weight_filler {
294
+ type: "xavier"
295
+ }
296
+ bias_filler {
297
+ type: "constant"
298
+ value: 0
299
+ }
300
+ }
301
+
302
+ }
303
+ layer {
304
+ name: "prelu1_3"
305
+ type: "PReLU"
306
+ bottom: "conv1_3"
307
+ top: "conv1_3"
308
+
309
+ }
310
+ layer {
311
+ name: "pool1_3"
312
+ type: "Pooling"
313
+ bottom: "conv1_3"
314
+ top: "pool1_3"
315
+ pooling_param {
316
+ pool: MAX
317
+ kernel_size: 3
318
+ stride: 2
319
+ }
320
+ }
321
+
322
+ layer {
323
+ name: "conv2_3"
324
+ type: "Convolution"
325
+ bottom: "pool1_3"
326
+ top: "conv2_3"
327
+ param {
328
+ lr_mult: 1
329
+ decay_mult: 1
330
+ }
331
+ param {
332
+ lr_mult: 2
333
+ decay_mult: 1
334
+ }
335
+ convolution_param {
336
+ num_output: 48
337
+ kernel_size: 3
338
+ stride: 1
339
+ weight_filler {
340
+ type: "xavier"
341
+ }
342
+ bias_filler {
343
+ type: "constant"
344
+ value: 0
345
+ }
346
+ }
347
+
348
+ }
349
+ layer {
350
+ name: "prelu2_3"
351
+ type: "PReLU"
352
+ bottom: "conv2_3"
353
+ top: "conv2_3"
354
+ }
355
+ layer {
356
+ name: "pool2_3"
357
+ type: "Pooling"
358
+ bottom: "conv2_3"
359
+ top: "pool2_3"
360
+ pooling_param {
361
+ pool: MAX
362
+ kernel_size: 3
363
+ stride: 2
364
+ }
365
+
366
+ }
367
+ layer {
368
+ name: "conv3_3"
369
+ type: "Convolution"
370
+ bottom: "pool2_3"
371
+ top: "conv3_3"
372
+ param {
373
+ lr_mult: 1
374
+ decay_mult: 1
375
+ }
376
+ param {
377
+ lr_mult: 2
378
+ decay_mult: 1
379
+ }
380
+ convolution_param {
381
+ num_output: 64
382
+ kernel_size: 2
383
+ stride: 1
384
+ weight_filler {
385
+ type: "xavier"
386
+ }
387
+ bias_filler {
388
+ type: "constant"
389
+ value: 0
390
+ }
391
+ }
392
+
393
+ }
394
+ layer {
395
+ name: "prelu3_3"
396
+ type: "PReLU"
397
+ bottom: "conv3_3"
398
+ top: "conv3_3"
399
+ }
400
+ ##########################
401
+ ##########################
402
+ layer {
403
+ name: "conv1_4"
404
+ type: "Convolution"
405
+ bottom: "data244"
406
+ top: "conv1_4"
407
+ param {
408
+ lr_mult: 1
409
+ decay_mult: 1
410
+ }
411
+ param {
412
+ lr_mult: 2
413
+ decay_mult: 1
414
+ }
415
+ convolution_param {
416
+ num_output: 28
417
+ kernel_size: 3
418
+ stride: 1
419
+ weight_filler {
420
+ type: "xavier"
421
+ }
422
+ bias_filler {
423
+ type: "constant"
424
+ value: 0
425
+ }
426
+ }
427
+
428
+ }
429
+ layer {
430
+ name: "prelu1_4"
431
+ type: "PReLU"
432
+ bottom: "conv1_4"
433
+ top: "conv1_4"
434
+
435
+ }
436
+ layer {
437
+ name: "pool1_4"
438
+ type: "Pooling"
439
+ bottom: "conv1_4"
440
+ top: "pool1_4"
441
+ pooling_param {
442
+ pool: MAX
443
+ kernel_size: 3
444
+ stride: 2
445
+ }
446
+ }
447
+
448
+ layer {
449
+ name: "conv2_4"
450
+ type: "Convolution"
451
+ bottom: "pool1_4"
452
+ top: "conv2_4"
453
+ param {
454
+ lr_mult: 1
455
+ decay_mult: 1
456
+ }
457
+ param {
458
+ lr_mult: 2
459
+ decay_mult: 1
460
+ }
461
+ convolution_param {
462
+ num_output: 48
463
+ kernel_size: 3
464
+ stride: 1
465
+ weight_filler {
466
+ type: "xavier"
467
+ }
468
+ bias_filler {
469
+ type: "constant"
470
+ value: 0
471
+ }
472
+ }
473
+
474
+ }
475
+ layer {
476
+ name: "prelu2_4"
477
+ type: "PReLU"
478
+ bottom: "conv2_4"
479
+ top: "conv2_4"
480
+ }
481
+ layer {
482
+ name: "pool2_4"
483
+ type: "Pooling"
484
+ bottom: "conv2_4"
485
+ top: "pool2_4"
486
+ pooling_param {
487
+ pool: MAX
488
+ kernel_size: 3
489
+ stride: 2
490
+ }
491
+
492
+ }
493
+ layer {
494
+ name: "conv3_4"
495
+ type: "Convolution"
496
+ bottom: "pool2_4"
497
+ top: "conv3_4"
498
+ param {
499
+ lr_mult: 1
500
+ decay_mult: 1
501
+ }
502
+ param {
503
+ lr_mult: 2
504
+ decay_mult: 1
505
+ }
506
+ convolution_param {
507
+ num_output: 64
508
+ kernel_size: 2
509
+ stride: 1
510
+ weight_filler {
511
+ type: "xavier"
512
+ }
513
+ bias_filler {
514
+ type: "constant"
515
+ value: 0
516
+ }
517
+ }
518
+
519
+ }
520
+ layer {
521
+ name: "prelu3_4"
522
+ type: "PReLU"
523
+ bottom: "conv3_4"
524
+ top: "conv3_4"
525
+ }
526
+ ##########################
527
+ ##########################
528
+ layer {
529
+ name: "conv1_5"
530
+ type: "Convolution"
531
+ bottom: "data245"
532
+ top: "conv1_5"
533
+ param {
534
+ lr_mult: 1
535
+ decay_mult: 1
536
+ }
537
+ param {
538
+ lr_mult: 2
539
+ decay_mult: 1
540
+ }
541
+ convolution_param {
542
+ num_output: 28
543
+ kernel_size: 3
544
+ stride: 1
545
+ weight_filler {
546
+ type: "xavier"
547
+ }
548
+ bias_filler {
549
+ type: "constant"
550
+ value: 0
551
+ }
552
+ }
553
+
554
+ }
555
+ layer {
556
+ name: "prelu1_5"
557
+ type: "PReLU"
558
+ bottom: "conv1_5"
559
+ top: "conv1_5"
560
+
561
+ }
562
+ layer {
563
+ name: "pool1_5"
564
+ type: "Pooling"
565
+ bottom: "conv1_5"
566
+ top: "pool1_5"
567
+ pooling_param {
568
+ pool: MAX
569
+ kernel_size: 3
570
+ stride: 2
571
+ }
572
+ }
573
+
574
+ layer {
575
+ name: "conv2_5"
576
+ type: "Convolution"
577
+ bottom: "pool1_5"
578
+ top: "conv2_5"
579
+ param {
580
+ lr_mult: 1
581
+ decay_mult: 1
582
+ }
583
+ param {
584
+ lr_mult: 2
585
+ decay_mult: 1
586
+ }
587
+ convolution_param {
588
+ num_output: 48
589
+ kernel_size: 3
590
+ stride: 1
591
+ weight_filler {
592
+ type: "xavier"
593
+ }
594
+ bias_filler {
595
+ type: "constant"
596
+ value: 0
597
+ }
598
+ }
599
+
600
+ }
601
+ layer {
602
+ name: "prelu2_5"
603
+ type: "PReLU"
604
+ bottom: "conv2_5"
605
+ top: "conv2_5"
606
+ }
607
+ layer {
608
+ name: "pool2_5"
609
+ type: "Pooling"
610
+ bottom: "conv2_5"
611
+ top: "pool2_5"
612
+ pooling_param {
613
+ pool: MAX
614
+ kernel_size: 3
615
+ stride: 2
616
+ }
617
+
618
+ }
619
+ layer {
620
+ name: "conv3_5"
621
+ type: "Convolution"
622
+ bottom: "pool2_5"
623
+ top: "conv3_5"
624
+ param {
625
+ lr_mult: 1
626
+ decay_mult: 1
627
+ }
628
+ param {
629
+ lr_mult: 2
630
+ decay_mult: 1
631
+ }
632
+ convolution_param {
633
+ num_output: 64
634
+ kernel_size: 2
635
+ stride: 1
636
+ weight_filler {
637
+ type: "xavier"
638
+ }
639
+ bias_filler {
640
+ type: "constant"
641
+ value: 0
642
+ }
643
+ }
644
+
645
+ }
646
+ layer {
647
+ name: "prelu3_5"
648
+ type: "PReLU"
649
+ bottom: "conv3_5"
650
+ top: "conv3_5"
651
+ }
652
+ ##########################
653
+ layer {
654
+ name: "concat"
655
+ bottom: "conv3_1"
656
+ bottom: "conv3_2"
657
+ bottom: "conv3_3"
658
+ bottom: "conv3_4"
659
+ bottom: "conv3_5"
660
+ top: "conv3"
661
+ type: "Concat"
662
+ concat_param {
663
+ axis: 1
664
+ }
665
+ }
666
+ ##########################
667
+ layer {
668
+ name: "fc4"
669
+ type: "InnerProduct"
670
+ bottom: "conv3"
671
+ top: "fc4"
672
+ param {
673
+ lr_mult: 1
674
+ decay_mult: 1
675
+ }
676
+ param {
677
+ lr_mult: 2
678
+ decay_mult: 1
679
+ }
680
+ inner_product_param {
681
+ num_output: 256
682
+ weight_filler {
683
+ type: "xavier"
684
+ }
685
+ bias_filler {
686
+ type: "constant"
687
+ value: 0
688
+ }
689
+ }
690
+
691
+ }
692
+ layer {
693
+ name: "prelu4"
694
+ type: "PReLU"
695
+ bottom: "fc4"
696
+ top: "fc4"
697
+ }
698
+ ############################
699
+ layer {
700
+ name: "fc4_1"
701
+ type: "InnerProduct"
702
+ bottom: "fc4"
703
+ top: "fc4_1"
704
+ param {
705
+ lr_mult: 1
706
+ decay_mult: 1
707
+ }
708
+ param {
709
+ lr_mult: 2
710
+ decay_mult: 1
711
+ }
712
+ inner_product_param {
713
+ num_output: 64
714
+ weight_filler {
715
+ type: "xavier"
716
+ }
717
+ bias_filler {
718
+ type: "constant"
719
+ value: 0
720
+ }
721
+ }
722
+
723
+ }
724
+ layer {
725
+ name: "prelu4_1"
726
+ type: "PReLU"
727
+ bottom: "fc4_1"
728
+ top: "fc4_1"
729
+ }
730
+ layer {
731
+ name: "fc5_1"
732
+ type: "InnerProduct"
733
+ bottom: "fc4_1"
734
+ top: "fc5_1"
735
+ param {
736
+ lr_mult: 1
737
+ decay_mult: 1
738
+ }
739
+ param {
740
+ lr_mult: 2
741
+ decay_mult: 1
742
+ }
743
+ inner_product_param {
744
+ num_output: 2
745
+ weight_filler {
746
+ type: "xavier"
747
+ #type: "constant"
748
+ #value: 0
749
+ }
750
+ bias_filler {
751
+ type: "constant"
752
+ value: 0
753
+ }
754
+ }
755
+ }
756
+
757
+
758
+ #########################
759
+ layer {
760
+ name: "fc4_2"
761
+ type: "InnerProduct"
762
+ bottom: "fc4"
763
+ top: "fc4_2"
764
+ param {
765
+ lr_mult: 1
766
+ decay_mult: 1
767
+ }
768
+ param {
769
+ lr_mult: 2
770
+ decay_mult: 1
771
+ }
772
+ inner_product_param {
773
+ num_output: 64
774
+ weight_filler {
775
+ type: "xavier"
776
+ }
777
+ bias_filler {
778
+ type: "constant"
779
+ value: 0
780
+ }
781
+ }
782
+
783
+ }
784
+ layer {
785
+ name: "prelu4_2"
786
+ type: "PReLU"
787
+ bottom: "fc4_2"
788
+ top: "fc4_2"
789
+ }
790
+ layer {
791
+ name: "fc5_2"
792
+ type: "InnerProduct"
793
+ bottom: "fc4_2"
794
+ top: "fc5_2"
795
+ param {
796
+ lr_mult: 1
797
+ decay_mult: 1
798
+ }
799
+ param {
800
+ lr_mult: 2
801
+ decay_mult: 1
802
+ }
803
+ inner_product_param {
804
+ num_output: 2
805
+ weight_filler {
806
+ type: "xavier"
807
+ #type: "constant"
808
+ #value: 0
809
+ }
810
+ bias_filler {
811
+ type: "constant"
812
+ value: 0
813
+ }
814
+ }
815
+ }
816
+
817
+ #########################
818
+ layer {
819
+ name: "fc4_3"
820
+ type: "InnerProduct"
821
+ bottom: "fc4"
822
+ top: "fc4_3"
823
+ param {
824
+ lr_mult: 1
825
+ decay_mult: 1
826
+ }
827
+ param {
828
+ lr_mult: 2
829
+ decay_mult: 1
830
+ }
831
+ inner_product_param {
832
+ num_output: 64
833
+ weight_filler {
834
+ type: "xavier"
835
+ }
836
+ bias_filler {
837
+ type: "constant"
838
+ value: 0
839
+ }
840
+ }
841
+
842
+ }
843
+ layer {
844
+ name: "prelu4_3"
845
+ type: "PReLU"
846
+ bottom: "fc4_3"
847
+ top: "fc4_3"
848
+ }
849
+ layer {
850
+ name: "fc5_3"
851
+ type: "InnerProduct"
852
+ bottom: "fc4_3"
853
+ top: "fc5_3"
854
+ param {
855
+ lr_mult: 1
856
+ decay_mult: 1
857
+ }
858
+ param {
859
+ lr_mult: 2
860
+ decay_mult: 1
861
+ }
862
+ inner_product_param {
863
+ num_output: 2
864
+ weight_filler {
865
+ type: "xavier"
866
+ #type: "constant"
867
+ #value: 0
868
+ }
869
+ bias_filler {
870
+ type: "constant"
871
+ value: 0
872
+ }
873
+ }
874
+ }
875
+
876
+ #########################
877
+ layer {
878
+ name: "fc4_4"
879
+ type: "InnerProduct"
880
+ bottom: "fc4"
881
+ top: "fc4_4"
882
+ param {
883
+ lr_mult: 1
884
+ decay_mult: 1
885
+ }
886
+ param {
887
+ lr_mult: 2
888
+ decay_mult: 1
889
+ }
890
+ inner_product_param {
891
+ num_output: 64
892
+ weight_filler {
893
+ type: "xavier"
894
+ }
895
+ bias_filler {
896
+ type: "constant"
897
+ value: 0
898
+ }
899
+ }
900
+
901
+ }
902
+ layer {
903
+ name: "prelu4_4"
904
+ type: "PReLU"
905
+ bottom: "fc4_4"
906
+ top: "fc4_4"
907
+ }
908
+ layer {
909
+ name: "fc5_4"
910
+ type: "InnerProduct"
911
+ bottom: "fc4_4"
912
+ top: "fc5_4"
913
+ param {
914
+ lr_mult: 1
915
+ decay_mult: 1
916
+ }
917
+ param {
918
+ lr_mult: 2
919
+ decay_mult: 1
920
+ }
921
+ inner_product_param {
922
+ num_output: 2
923
+ weight_filler {
924
+ type: "xavier"
925
+ #type: "constant"
926
+ #value: 0
927
+ }
928
+ bias_filler {
929
+ type: "constant"
930
+ value: 0
931
+ }
932
+ }
933
+ }
934
+
935
+ #########################
936
+ layer {
937
+ name: "fc4_5"
938
+ type: "InnerProduct"
939
+ bottom: "fc4"
940
+ top: "fc4_5"
941
+ param {
942
+ lr_mult: 1
943
+ decay_mult: 1
944
+ }
945
+ param {
946
+ lr_mult: 2
947
+ decay_mult: 1
948
+ }
949
+ inner_product_param {
950
+ num_output: 64
951
+ weight_filler {
952
+ type: "xavier"
953
+ }
954
+ bias_filler {
955
+ type: "constant"
956
+ value: 0
957
+ }
958
+ }
959
+
960
+ }
961
+ layer {
962
+ name: "prelu4_5"
963
+ type: "PReLU"
964
+ bottom: "fc4_5"
965
+ top: "fc4_5"
966
+ }
967
+ layer {
968
+ name: "fc5_5"
969
+ type: "InnerProduct"
970
+ bottom: "fc4_5"
971
+ top: "fc5_5"
972
+ param {
973
+ lr_mult: 1
974
+ decay_mult: 1
975
+ }
976
+ param {
977
+ lr_mult: 2
978
+ decay_mult: 1
979
+ }
980
+ inner_product_param {
981
+ num_output: 2
982
+ weight_filler {
983
+ type: "xavier"
984
+ #type: "constant"
985
+ #value: 0
986
+ }
987
+ bias_filler {
988
+ type: "constant"
989
+ value: 0
990
+ }
991
+ }
992
+ }
993
+
994
+ #########################
995
+
face_alignment/mtcnn_pytorch/extract_weights_from_caffe_models.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import caffe
2
+ import numpy as np
3
+
4
+ """
5
+ The purpose of this script is to convert pretrained weights taken from
6
+ official implementation here:
7
+ https://github.com/kpzhang93/MTCNN_face_detection_alignment/tree/master/code/codes/MTCNNv2
8
+ to required format.
9
+
10
+ In a nutshell, it just renames and transposes some of the weights.
11
+ You don't have to use this script because weights are already in `src/weights`.
12
+ """
13
+
14
+
15
+ def get_all_weights(net):
16
+ all_weights = {}
17
+ for p in net.params:
18
+ if 'conv' in p:
19
+ name = 'features.' + p
20
+ if '-' in p:
21
+ s = list(p)
22
+ s[-2] = '_'
23
+ s = ''.join(s)
24
+ all_weights[s + '.weight'] = net.params[p][0].data
25
+ all_weights[s + '.bias'] = net.params[p][1].data
26
+ elif len(net.params[p][0].data.shape) == 4:
27
+ all_weights[name + '.weight'] = net.params[p][0].data.transpose((0, 1, 3, 2))
28
+ all_weights[name + '.bias'] = net.params[p][1].data
29
+ else:
30
+ all_weights[name + '.weight'] = net.params[p][0].data
31
+ all_weights[name + '.bias'] = net.params[p][1].data
32
+ elif 'prelu' in p.lower():
33
+ all_weights['features.' + p.lower() + '.weight'] = net.params[p][0].data
34
+ return all_weights
35
+
36
+
37
+ # P-Net
38
+ net = caffe.Net('caffe_models/det1.prototxt', 'caffe_models/det1.caffemodel', caffe.TEST)
39
+ np.save('src/weights/pnet.npy', get_all_weights(net))
40
+
41
+ # R-Net
42
+ net = caffe.Net('caffe_models/det2.prototxt', 'caffe_models/det2.caffemodel', caffe.TEST)
43
+ np.save('src/weights/rnet.npy', get_all_weights(net))
44
+
45
+ # O-Net
46
+ net = caffe.Net('caffe_models/det3.prototxt', 'caffe_models/det3.caffemodel', caffe.TEST)
47
+ np.save('src/weights/onet.npy', get_all_weights(net))
face_alignment/mtcnn_pytorch/get_aligned_face_from_mtcnn.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
face_alignment/mtcnn_pytorch/refine_faces.ipynb ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {
7
+ "ExecuteTime": {
8
+ "end_time": "2018-07-21T07:06:15.533290Z",
9
+ "start_time": "2018-07-21T07:06:15.509560Z"
10
+ }
11
+ },
12
+ "outputs": [
13
+ {
14
+ "name": "stdout",
15
+ "output_type": "stream",
16
+ "text": [
17
+ "The autoreload extension is already loaded. To reload it, use:\n",
18
+ " %reload_ext autoreload\n"
19
+ ]
20
+ }
21
+ ],
22
+ "source": [
23
+ "%load_ext autoreload\n",
24
+ "%autoreload 2\n",
25
+ "\n",
26
+ "from src import detect_faces, show_bboxes\n",
27
+ "from PIL import Image\n",
28
+ "import cv2\n",
29
+ "import numpy as np\n",
30
+ "from src.align_trans import get_reference_facial_points, warp_and_crop_face\n",
31
+ "import mxnet as mx\n",
32
+ "import io\n",
33
+ "from pathlib import Path"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 3,
39
+ "metadata": {
40
+ "ExecuteTime": {
41
+ "end_time": "2018-07-21T07:08:15.237357Z",
42
+ "start_time": "2018-07-21T07:08:15.214563Z"
43
+ }
44
+ },
45
+ "outputs": [],
46
+ "source": [
47
+ "face_folder = Path('/home/f/learning/Dataset/faces_vgg_112x112')\n",
48
+ "bin_path = face_folder/'train.rec'\n",
49
+ "idx_path = face_folder/'train.idx'"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 4,
55
+ "metadata": {
56
+ "ExecuteTime": {
57
+ "end_time": "2018-07-21T07:08:20.176501Z",
58
+ "start_time": "2018-07-21T07:08:17.337626Z"
59
+ }
60
+ },
61
+ "outputs": [],
62
+ "source": [
63
+ "imgrec = mx.recordio.MXIndexedRecordIO(str(idx_path), str(bin_path), 'r')"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 25,
69
+ "metadata": {
70
+ "ExecuteTime": {
71
+ "end_time": "2018-07-21T07:10:25.708722Z",
72
+ "start_time": "2018-07-21T07:10:25.687476Z"
73
+ }
74
+ },
75
+ "outputs": [
76
+ {
77
+ "name": "stdout",
78
+ "output_type": "stream",
79
+ "text": [
80
+ "HEADER(flag=0, label=2.0, id=813, id2=0)\n"
81
+ ]
82
+ },
83
+ {
84
+ "data": {
85
+ "image/png": "\n",
86
+ "text/plain": [
87
+ "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=112x112 at 0x7FCAB5C7C048>"
88
+ ]
89
+ },
90
+ "execution_count": 25,
91
+ "metadata": {},
92
+ "output_type": "execute_result"
93
+ }
94
+ ],
95
+ "source": [
96
+ "i =813\n",
97
+ "\n",
98
+ "img_info = imgrec.read_idx(i)\n",
99
+ "\n",
100
+ "header, img = mx.recordio.unpack(img_info)\n",
101
+ "\n",
102
+ "encoded_jpg_io = io.BytesIO(img)\n",
103
+ "\n",
104
+ "image = Image.open(encoded_jpg_io)\n",
105
+ "\n",
106
+ "print(header)\n",
107
+ "image"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 26,
113
+ "metadata": {
114
+ "ExecuteTime": {
115
+ "end_time": "2018-07-21T07:10:26.732578Z",
116
+ "start_time": "2018-07-21T07:10:26.711066Z"
117
+ }
118
+ },
119
+ "outputs": [
120
+ {
121
+ "data": {
122
+ "text/plain": [
123
+ "(112, 112)"
124
+ ]
125
+ },
126
+ "execution_count": 26,
127
+ "metadata": {},
128
+ "output_type": "execute_result"
129
+ }
130
+ ],
131
+ "source": [
132
+ "image.size"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 27,
138
+ "metadata": {
139
+ "ExecuteTime": {
140
+ "end_time": "2018-07-21T07:10:29.714824Z",
141
+ "start_time": "2018-07-21T07:10:29.676756Z"
142
+ }
143
+ },
144
+ "outputs": [],
145
+ "source": [
146
+ "bounding_boxes, landmarks = detect_faces(image)"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 28,
152
+ "metadata": {
153
+ "ExecuteTime": {
154
+ "end_time": "2018-07-21T07:10:30.404858Z",
155
+ "start_time": "2018-07-21T07:10:30.386340Z"
156
+ }
157
+ },
158
+ "outputs": [
159
+ {
160
+ "data": {
161
+ "text/plain": [
162
+ "(array([[ 13.36201936, 5.58984986, 78.93511893, 104.44713098,\n",
163
+ " 0.99996698]]),\n",
164
+ " array([[45.040733, 73.22949 , 67.01588 , 46.294598, 68.35203 , 47.975132,\n",
165
+ " 46.75182 , 68.91486 , 85.37722 , 84.38674 ]], dtype=float32))"
166
+ ]
167
+ },
168
+ "execution_count": 28,
169
+ "metadata": {},
170
+ "output_type": "execute_result"
171
+ }
172
+ ],
173
+ "source": [
174
+ "bounding_boxes,landmarks"
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "execution_count": 36,
180
+ "metadata": {
181
+ "ExecuteTime": {
182
+ "end_time": "2018-07-21T07:14:20.172835Z",
183
+ "start_time": "2018-07-21T07:14:20.138160Z"
184
+ }
185
+ },
186
+ "outputs": [
187
+ {
188
+ "name": "stderr",
189
+ "output_type": "stream",
190
+ "text": [
191
+ " 0%| | 0/1 [00:00<?, ?it/s]\n"
192
+ ]
193
+ },
194
+ {
195
+ "ename": "FaceWarpException",
196
+ "evalue": "In File /root/Notebooks/face/mtcnn-pytorch/src/align_trans.py:FaceWarpException('No paddings to do, output_size must be None or [ 96 112]',)",
197
+ "output_type": "error",
198
+ "traceback": [
199
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
200
+ "\u001b[0;31mFaceWarpException\u001b[0m Traceback (most recent call last)",
201
+ "\u001b[0;32m<ipython-input-36-1da710ed1190>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mlandmark\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlandmarks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mfacial5points\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlandmark\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mbox\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlandmark\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mbox\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mdst_img\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwarp_and_crop_face\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mface\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfacial5points\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcrop_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m112\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m112\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mfaces\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mImage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfromarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdst_img\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m...\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
202
+ "\u001b[0;32m~/Notebooks/face/mtcnn-pytorch/src/align_trans.py\u001b[0m in \u001b[0;36mwarp_and_crop_face\u001b[0;34m(src_img, facial_pts, reference_pts, crop_size, align_type)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0minner_padding_factor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 259\u001b[0m \u001b[0mouter_padding\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 260\u001b[0;31m default_square)\n\u001b[0m\u001b[1;32m 261\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0mref_pts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreference_pts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
203
+ "\u001b[0;32m~/Notebooks/face/mtcnn-pytorch/src/align_trans.py\u001b[0m in \u001b[0;36mget_reference_facial_points\u001b[0;34m(output_size, inner_padding_factor, outer_padding, default_square)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m raise FaceWarpException(\n\u001b[0;32m--> 104\u001b[0;31m 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))\n\u001b[0m\u001b[1;32m 105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[0;31m# check output size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
204
+ "\u001b[0;31mFaceWarpException\u001b[0m: In File /root/Notebooks/face/mtcnn-pytorch/src/align_trans.py:FaceWarpException('No paddings to do, output_size must be None or [ 96 112]',)"
205
+ ]
206
+ }
207
+ ],
208
+ "source": [
209
+ "from tqdm import tqdm\n",
210
+ "faces = []\n",
211
+ "img_cv2 = np.array(image)[...,::-1]\n",
212
+ "for i in tqdm(range(len(bounding_boxes))):\n",
213
+ " box = bounding_boxes[i][:4].astype(np.int32).tolist()\n",
214
+ " for idx, coord in enumerate(box[:2]):\n",
215
+ " if coord > 1:\n",
216
+ " box[idx] -= 1\n",
217
+ " if box[2] + 1 < img_cv2.shape[1]:\n",
218
+ " box[2] += 1\n",
219
+ " if box[3] + 1 < img_cv2.shape[0]:\n",
220
+ " box[3] += 1\n",
221
+ " face = img_cv2[box[1]:box[3],box[0]:box[2]]\n",
222
+ " landmark = landmarks[i]\n",
223
+ " facial5points = [[landmark[j] - box[0],landmark[j+5] - box[1]] for j in range(5)]\n",
224
+ " dst_img = warp_and_crop_face(face,facial5points, crop_size=(112,112))\n",
225
+ " faces.append(Image.fromarray(dst_img[...,::-1]))"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": 38,
231
+ "metadata": {
232
+ "ExecuteTime": {
233
+ "end_time": "2018-07-21T07:21:45.873749Z",
234
+ "start_time": "2018-07-21T07:21:45.857902Z"
235
+ }
236
+ },
237
+ "outputs": [],
238
+ "source": [
239
+ "reference_pts = get_reference_facial_points(default_square= True)"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 40,
245
+ "metadata": {
246
+ "ExecuteTime": {
247
+ "end_time": "2018-07-21T07:22:21.544120Z",
248
+ "start_time": "2018-07-21T07:22:21.517479Z"
249
+ }
250
+ },
251
+ "outputs": [
252
+ {
253
+ "name": "stderr",
254
+ "output_type": "stream",
255
+ "text": [
256
+ "/root/Notebooks/face/mtcnn-pytorch/src/matlab_cp2tform.py:90: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.\n",
257
+ "To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.\n",
258
+ " r, _, _, _ = lstsq(X, U)\n"
259
+ ]
260
+ }
261
+ ],
262
+ "source": [
263
+ "dst_img = warp_and_crop_face(face, facial5points, reference_pts, crop_size=(112,112))"
264
+ ]
265
+ },
266
+ {
267
+ "cell_type": "code",
268
+ "execution_count": 43,
269
+ "metadata": {
270
+ "ExecuteTime": {
271
+ "end_time": "2018-07-21T07:22:31.344783Z",
272
+ "start_time": "2018-07-21T07:22:31.313710Z"
273
+ }
274
+ },
275
+ "outputs": [
276
+ {
277
+ "data": {
278
+ "image/png": "\n",
279
+ "text/plain": [
280
+ "<PIL.Image.Image image mode=RGB size=112x112 at 0x7FCAB5CB4438>"
281
+ ]
282
+ },
283
+ "execution_count": 43,
284
+ "metadata": {},
285
+ "output_type": "execute_result"
286
+ }
287
+ ],
288
+ "source": [
289
+ "Image.fromarray(dst_img[...,::-1])"
290
+ ]
291
+ }
292
+ ],
293
+ "metadata": {
294
+ "hide_input": false,
295
+ "kernelspec": {
296
+ "display_name": "Python 3",
297
+ "language": "python",
298
+ "name": "python3"
299
+ },
300
+ "language_info": {
301
+ "codemirror_mode": {
302
+ "name": "ipython",
303
+ "version": 3
304
+ },
305
+ "file_extension": ".py",
306
+ "mimetype": "text/x-python",
307
+ "name": "python",
308
+ "nbconvert_exporter": "python",
309
+ "pygments_lexer": "ipython3",
310
+ "version": "3.6.4"
311
+ }
312
+ },
313
+ "nbformat": 4,
314
+ "nbformat_minor": 2
315
+ }
face_alignment/mtcnn_pytorch/src/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .visualization_utils import show_bboxes
2
+ from .detector import detect_faces
face_alignment/mtcnn_pytorch/src/align_trans.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Apr 24 15:43:29 2017
4
+ @author: zhaoy
5
+ """
6
+ import numpy as np
7
+ import cv2
8
+
9
+ # from scipy.linalg import lstsq
10
+ # from scipy.ndimage import geometric_transform # , map_coordinates
11
+
12
+ from mtcnn_pytorch.src.matlab_cp2tform import get_similarity_transform_for_cv2
13
+
14
+ # reference facial points, a list of coordinates (x,y)
15
+ REFERENCE_FACIAL_POINTS = [
16
+ [30.29459953, 51.69630051],
17
+ [65.53179932, 51.50139999],
18
+ [48.02519989, 71.73660278],
19
+ [33.54930115, 92.3655014],
20
+ [62.72990036, 92.20410156]
21
+ ]
22
+
23
+ DEFAULT_CROP_SIZE = (96, 112)
24
+
25
+
26
+ class FaceWarpException(Exception):
27
+ def __str__(self):
28
+ return 'In File {}:{}'.format(
29
+ __file__, super.__str__(self))
30
+
31
+
32
+ def get_reference_facial_points(output_size=None,
33
+ inner_padding_factor=0.0,
34
+ outer_padding=(0, 0),
35
+ default_square=False):
36
+ """
37
+ Function:
38
+ ----------
39
+ get reference 5 key points according to crop settings:
40
+ 0. Set default crop_size:
41
+ if default_square:
42
+ crop_size = (112, 112)
43
+ else:
44
+ crop_size = (96, 112)
45
+ 1. Pad the crop_size by inner_padding_factor in each side;
46
+ 2. Resize crop_size into (output_size - outer_padding*2),
47
+ pad into output_size with outer_padding;
48
+ 3. Output reference_5point;
49
+ Parameters:
50
+ ----------
51
+ @output_size: (w, h) or None
52
+ size of aligned face image
53
+ @inner_padding_factor: (w_factor, h_factor)
54
+ padding factor for inner (w, h)
55
+ @outer_padding: (w_pad, h_pad)
56
+ each row is a pair of coordinates (x, y)
57
+ @default_square: True or False
58
+ if True:
59
+ default crop_size = (112, 112)
60
+ else:
61
+ default crop_size = (96, 112);
62
+ !!! make sure, if output_size is not None:
63
+ (output_size - outer_padding)
64
+ = some_scale * (default crop_size * (1.0 + inner_padding_factor))
65
+ Returns:
66
+ ----------
67
+ @reference_5point: 5x2 np.array
68
+ each row is a pair of transformed coordinates (x, y)
69
+ """
70
+ #print('\n===> get_reference_facial_points():')
71
+
72
+ #print('---> Params:')
73
+ #print(' output_size: ', output_size)
74
+ #print(' inner_padding_factor: ', inner_padding_factor)
75
+ #print(' outer_padding:', outer_padding)
76
+ #print(' default_square: ', default_square)
77
+
78
+ tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
79
+ tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
80
+
81
+ # 0) make the inner region a square
82
+ if default_square:
83
+ size_diff = max(tmp_crop_size) - tmp_crop_size
84
+ tmp_5pts += size_diff / 2
85
+ tmp_crop_size += size_diff
86
+
87
+ #print('---> default:')
88
+ #print(' crop_size = ', tmp_crop_size)
89
+ #print(' reference_5pts = ', tmp_5pts)
90
+
91
+ if (output_size and
92
+ output_size[0] == tmp_crop_size[0] and
93
+ output_size[1] == tmp_crop_size[1]):
94
+ #print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
95
+ return tmp_5pts
96
+
97
+ if (inner_padding_factor == 0 and
98
+ outer_padding == (0, 0)):
99
+ if output_size is None:
100
+ #print('No paddings to do: return default reference points')
101
+ return tmp_5pts
102
+ else:
103
+ raise FaceWarpException(
104
+ 'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
105
+
106
+ # check output size
107
+ if not (0 <= inner_padding_factor <= 1.0):
108
+ raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
109
+
110
+ if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
111
+ and output_size is None):
112
+ output_size = tmp_crop_size * \
113
+ (1 + inner_padding_factor * 2).astype(np.int32)
114
+ output_size += np.array(outer_padding)
115
+ #print(' deduced from paddings, output_size = ', output_size)
116
+
117
+ if not (outer_padding[0] < output_size[0]
118
+ and outer_padding[1] < output_size[1]):
119
+ raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
120
+ 'and outer_padding[1] < output_size[1])')
121
+
122
+ # 1) pad the inner region according inner_padding_factor
123
+ #print('---> STEP1: pad the inner region according inner_padding_factor')
124
+ if inner_padding_factor > 0:
125
+ size_diff = tmp_crop_size * inner_padding_factor * 2
126
+ tmp_5pts += size_diff / 2
127
+ tmp_crop_size += np.round(size_diff).astype(np.int32)
128
+
129
+ #print(' crop_size = ', tmp_crop_size)
130
+ #print(' reference_5pts = ', tmp_5pts)
131
+
132
+ # 2) resize the padded inner region
133
+ #print('---> STEP2: resize the padded inner region')
134
+ size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
135
+ #print(' crop_size = ', tmp_crop_size)
136
+ #print(' size_bf_outer_pad = ', size_bf_outer_pad)
137
+
138
+ if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
139
+ raise FaceWarpException('Must have (output_size - outer_padding)'
140
+ '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
141
+
142
+ scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
143
+ #print(' resize scale_factor = ', scale_factor)
144
+ tmp_5pts = tmp_5pts * scale_factor
145
+ # size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
146
+ # tmp_5pts = tmp_5pts + size_diff / 2
147
+ tmp_crop_size = size_bf_outer_pad
148
+ #print(' crop_size = ', tmp_crop_size)
149
+ #print(' reference_5pts = ', tmp_5pts)
150
+
151
+ # 3) add outer_padding to make output_size
152
+ reference_5point = tmp_5pts + np.array(outer_padding)
153
+ tmp_crop_size = output_size
154
+ #print('---> STEP3: add outer_padding to make output_size')
155
+ #print(' crop_size = ', tmp_crop_size)
156
+ #print(' reference_5pts = ', tmp_5pts)
157
+
158
+ #print('===> end get_reference_facial_points\n')
159
+
160
+ return reference_5point
161
+
162
+
163
+ def get_affine_transform_matrix(src_pts, dst_pts):
164
+ """
165
+ Function:
166
+ ----------
167
+ get affine transform matrix 'tfm' from src_pts to dst_pts
168
+ Parameters:
169
+ ----------
170
+ @src_pts: Kx2 np.array
171
+ source points matrix, each row is a pair of coordinates (x, y)
172
+ @dst_pts: Kx2 np.array
173
+ destination points matrix, each row is a pair of coordinates (x, y)
174
+ Returns:
175
+ ----------
176
+ @tfm: 2x3 np.array
177
+ transform matrix from src_pts to dst_pts
178
+ """
179
+
180
+ tfm = np.float32([[1, 0, 0], [0, 1, 0]])
181
+ n_pts = src_pts.shape[0]
182
+ ones = np.ones((n_pts, 1), src_pts.dtype)
183
+ src_pts_ = np.hstack([src_pts, ones])
184
+ dst_pts_ = np.hstack([dst_pts, ones])
185
+
186
+ # #print(('src_pts_:\n' + str(src_pts_))
187
+ # #print(('dst_pts_:\n' + str(dst_pts_))
188
+
189
+ A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
190
+
191
+ # #print(('np.linalg.lstsq return A: \n' + str(A))
192
+ # #print(('np.linalg.lstsq return res: \n' + str(res))
193
+ # #print(('np.linalg.lstsq return rank: \n' + str(rank))
194
+ # #print(('np.linalg.lstsq return s: \n' + str(s))
195
+
196
+ if rank == 3:
197
+ tfm = np.float32([
198
+ [A[0, 0], A[1, 0], A[2, 0]],
199
+ [A[0, 1], A[1, 1], A[2, 1]]
200
+ ])
201
+ elif rank == 2:
202
+ tfm = np.float32([
203
+ [A[0, 0], A[1, 0], 0],
204
+ [A[0, 1], A[1, 1], 0]
205
+ ])
206
+
207
+ return tfm
208
+
209
+
210
+ def warp_and_crop_face(src_img,
211
+ facial_pts,
212
+ reference_pts=None,
213
+ crop_size=(96, 112),
214
+ align_type='smilarity'):
215
+ """
216
+ Function:
217
+ ----------
218
+ apply affine transform 'trans' to uv
219
+ Parameters:
220
+ ----------
221
+ @src_img: 3x3 np.array
222
+ input image
223
+ @facial_pts: could be
224
+ 1)a list of K coordinates (x,y)
225
+ or
226
+ 2) Kx2 or 2xK np.array
227
+ each row or col is a pair of coordinates (x, y)
228
+ @reference_pts: could be
229
+ 1) a list of K coordinates (x,y)
230
+ or
231
+ 2) Kx2 or 2xK np.array
232
+ each row or col is a pair of coordinates (x, y)
233
+ or
234
+ 3) None
235
+ if None, use default reference facial points
236
+ @crop_size: (w, h)
237
+ output face image size
238
+ @align_type: transform type, could be one of
239
+ 1) 'similarity': use similarity transform
240
+ 2) 'cv2_affine': use the first 3 points to do affine transform,
241
+ by calling cv2.getAffineTransform()
242
+ 3) 'affine': use all points to do affine transform
243
+ Returns:
244
+ ----------
245
+ @face_img: output face image with size (w, h) = @crop_size
246
+ """
247
+
248
+ if reference_pts is None:
249
+ if crop_size[0] == 96 and crop_size[1] == 112:
250
+ reference_pts = REFERENCE_FACIAL_POINTS
251
+ else:
252
+ default_square = False
253
+ inner_padding_factor = 0
254
+ outer_padding = (0, 0)
255
+ output_size = crop_size
256
+
257
+ reference_pts = get_reference_facial_points(output_size,
258
+ inner_padding_factor,
259
+ outer_padding,
260
+ default_square)
261
+
262
+ ref_pts = np.float32(reference_pts)
263
+ ref_pts_shp = ref_pts.shape
264
+ if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
265
+ raise FaceWarpException(
266
+ 'reference_pts.shape must be (K,2) or (2,K) and K>2')
267
+
268
+ if ref_pts_shp[0] == 2:
269
+ ref_pts = ref_pts.T
270
+
271
+ src_pts = np.float32(facial_pts)
272
+ src_pts_shp = src_pts.shape
273
+ if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
274
+ raise FaceWarpException(
275
+ 'facial_pts.shape must be (K,2) or (2,K) and K>2')
276
+
277
+ if src_pts_shp[0] == 2:
278
+ src_pts = src_pts.T
279
+
280
+ # #print('--->src_pts:\n', src_pts
281
+ # #print('--->ref_pts\n', ref_pts
282
+
283
+ if src_pts.shape != ref_pts.shape:
284
+ raise FaceWarpException(
285
+ 'facial_pts and reference_pts must have the same shape')
286
+
287
+ if align_type is 'cv2_affine':
288
+ tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
289
+ # #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
290
+ elif align_type is 'affine':
291
+ tfm = get_affine_transform_matrix(src_pts, ref_pts)
292
+ # #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
293
+ else:
294
+ tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
295
+ # #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm))
296
+
297
+ # #print('--->Transform matrix: '
298
+ # #print(('type(tfm):' + str(type(tfm)))
299
+ # #print(('tfm.dtype:' + str(tfm.dtype))
300
+ # #print( tfm
301
+
302
+ face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
303
+
304
+ return face_img
face_alignment/mtcnn_pytorch/src/box_utils.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from PIL import Image
3
+
4
+
5
+ def nms(boxes, overlap_threshold=0.5, mode='union'):
6
+ """Non-maximum suppression.
7
+
8
+ Arguments:
9
+ boxes: a float numpy array of shape [n, 5],
10
+ where each row is (xmin, ymin, xmax, ymax, score).
11
+ overlap_threshold: a float number.
12
+ mode: 'union' or 'min'.
13
+
14
+ Returns:
15
+ list with indices of the selected boxes
16
+ """
17
+
18
+ # if there are no boxes, return the empty list
19
+ if len(boxes) == 0:
20
+ return []
21
+
22
+ # list of picked indices
23
+ pick = []
24
+
25
+ # grab the coordinates of the bounding boxes
26
+ x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
27
+
28
+ area = (x2 - x1 + 1.0)*(y2 - y1 + 1.0)
29
+ ids = np.argsort(score) # in increasing order
30
+
31
+ while len(ids) > 0:
32
+
33
+ # grab index of the largest value
34
+ last = len(ids) - 1
35
+ i = ids[last]
36
+ pick.append(i)
37
+
38
+ # compute intersections
39
+ # of the box with the largest score
40
+ # with the rest of boxes
41
+
42
+ # left top corner of intersection boxes
43
+ ix1 = np.maximum(x1[i], x1[ids[:last]])
44
+ iy1 = np.maximum(y1[i], y1[ids[:last]])
45
+
46
+ # right bottom corner of intersection boxes
47
+ ix2 = np.minimum(x2[i], x2[ids[:last]])
48
+ iy2 = np.minimum(y2[i], y2[ids[:last]])
49
+
50
+ # width and height of intersection boxes
51
+ w = np.maximum(0.0, ix2 - ix1 + 1.0)
52
+ h = np.maximum(0.0, iy2 - iy1 + 1.0)
53
+
54
+ # intersections' areas
55
+ inter = w * h
56
+ if mode == 'min':
57
+ overlap = inter/np.minimum(area[i], area[ids[:last]])
58
+ elif mode == 'union':
59
+ # intersection over union (IoU)
60
+ overlap = inter/(area[i] + area[ids[:last]] - inter)
61
+
62
+ # delete all boxes where overlap is too big
63
+ ids = np.delete(
64
+ ids,
65
+ np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
66
+ )
67
+
68
+ return pick
69
+
70
+
71
+ def convert_to_square(bboxes):
72
+ """Convert bounding boxes to a square form.
73
+
74
+ Arguments:
75
+ bboxes: a float numpy array of shape [n, 5].
76
+
77
+ Returns:
78
+ a float numpy array of shape [n, 5],
79
+ squared bounding boxes.
80
+ """
81
+
82
+ square_bboxes = np.zeros_like(bboxes)
83
+ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
84
+ h = y2 - y1 + 1.0
85
+ w = x2 - x1 + 1.0
86
+ max_side = np.maximum(h, w)
87
+ square_bboxes[:, 0] = x1 + w*0.5 - max_side*0.5
88
+ square_bboxes[:, 1] = y1 + h*0.5 - max_side*0.5
89
+ square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
90
+ square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
91
+ return square_bboxes
92
+
93
+
94
+ def calibrate_box(bboxes, offsets):
95
+ """Transform bounding boxes to be more like true bounding boxes.
96
+ 'offsets' is one of the outputs of the nets.
97
+
98
+ Arguments:
99
+ bboxes: a float numpy array of shape [n, 5].
100
+ offsets: a float numpy array of shape [n, 4].
101
+
102
+ Returns:
103
+ a float numpy array of shape [n, 5].
104
+ """
105
+ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
106
+ w = x2 - x1 + 1.0
107
+ h = y2 - y1 + 1.0
108
+ w = np.expand_dims(w, 1)
109
+ h = np.expand_dims(h, 1)
110
+
111
+ # this is what happening here:
112
+ # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
113
+ # x1_true = x1 + tx1*w
114
+ # y1_true = y1 + ty1*h
115
+ # x2_true = x2 + tx2*w
116
+ # y2_true = y2 + ty2*h
117
+ # below is just more compact form of this
118
+
119
+ # are offsets always such that
120
+ # x1 < x2 and y1 < y2 ?
121
+
122
+ translation = np.hstack([w, h, w, h])*offsets
123
+ bboxes[:, 0:4] = bboxes[:, 0:4] + translation
124
+ return bboxes
125
+
126
+
127
+ def get_image_boxes(bounding_boxes, img, size=24):
128
+ """Cut out boxes from the image.
129
+
130
+ Arguments:
131
+ bounding_boxes: a float numpy array of shape [n, 5].
132
+ img: an instance of PIL.Image.
133
+ size: an integer, size of cutouts.
134
+
135
+ Returns:
136
+ a float numpy array of shape [n, 3, size, size].
137
+ """
138
+
139
+ num_boxes = len(bounding_boxes)
140
+ width, height = img.size
141
+
142
+ [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(bounding_boxes, width, height)
143
+ img_boxes = np.zeros((num_boxes, 3, size, size), 'float32')
144
+
145
+ for i in range(num_boxes):
146
+ img_box = np.zeros((h[i], w[i], 3), 'uint8')
147
+
148
+ img_array = np.asarray(img, 'uint8')
149
+ img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] =\
150
+ img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :]
151
+
152
+ # resize
153
+ img_box = Image.fromarray(img_box)
154
+ img_box = img_box.resize((size, size), Image.BILINEAR)
155
+ img_box = np.asarray(img_box, 'float32')
156
+
157
+ img_boxes[i, :, :, :] = _preprocess(img_box)
158
+
159
+ return img_boxes
160
+
161
+
162
+ def correct_bboxes(bboxes, width, height):
163
+ """Crop boxes that are too big and get coordinates
164
+ with respect to cutouts.
165
+
166
+ Arguments:
167
+ bboxes: a float numpy array of shape [n, 5],
168
+ where each row is (xmin, ymin, xmax, ymax, score).
169
+ width: a float number.
170
+ height: a float number.
171
+
172
+ Returns:
173
+ dy, dx, edy, edx: a int numpy arrays of shape [n],
174
+ coordinates of the boxes with respect to the cutouts.
175
+ y, x, ey, ex: a int numpy arrays of shape [n],
176
+ corrected ymin, xmin, ymax, xmax.
177
+ h, w: a int numpy arrays of shape [n],
178
+ just heights and widths of boxes.
179
+
180
+ in the following order:
181
+ [dy, edy, dx, edx, y, ey, x, ex, w, h].
182
+ """
183
+
184
+ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
185
+ w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
186
+ num_boxes = bboxes.shape[0]
187
+
188
+ # 'e' stands for end
189
+ # (x, y) -> (ex, ey)
190
+ x, y, ex, ey = x1, y1, x2, y2
191
+
192
+ # we need to cut out a box from the image.
193
+ # (x, y, ex, ey) are corrected coordinates of the box
194
+ # in the image.
195
+ # (dx, dy, edx, edy) are coordinates of the box in the cutout
196
+ # from the image.
197
+ dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
198
+ edx, edy = w.copy() - 1.0, h.copy() - 1.0
199
+
200
+ # if box's bottom right corner is too far right
201
+ ind = np.where(ex > width - 1.0)[0]
202
+ edx[ind] = w[ind] + width - 2.0 - ex[ind]
203
+ ex[ind] = width - 1.0
204
+
205
+ # if box's bottom right corner is too low
206
+ ind = np.where(ey > height - 1.0)[0]
207
+ edy[ind] = h[ind] + height - 2.0 - ey[ind]
208
+ ey[ind] = height - 1.0
209
+
210
+ # if box's top left corner is too far left
211
+ ind = np.where(x < 0.0)[0]
212
+ dx[ind] = 0.0 - x[ind]
213
+ x[ind] = 0.0
214
+
215
+ # if box's top left corner is too high
216
+ ind = np.where(y < 0.0)[0]
217
+ dy[ind] = 0.0 - y[ind]
218
+ y[ind] = 0.0
219
+
220
+ return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
221
+ return_list = [i.astype('int32') for i in return_list]
222
+
223
+ return return_list
224
+
225
+
226
+ def _preprocess(img):
227
+ """Preprocessing step before feeding the network.
228
+
229
+ Arguments:
230
+ img: a float numpy array of shape [h, w, c].
231
+
232
+ Returns:
233
+ a float numpy array of shape [1, c, h, w].
234
+ """
235
+ img = img.transpose((2, 0, 1))
236
+ img = np.expand_dims(img, 0)
237
+ img = (img - 127.5)*0.0078125
238
+ return img
face_alignment/mtcnn_pytorch/src/detector.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch.autograd import Variable
4
+ from .get_nets import PNet, RNet, ONet
5
+ from .box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
6
+ from .first_stage import run_first_stage
7
+
8
+
9
+ def detect_faces(image, min_face_size=20.0,
10
+ thresholds=[0.6, 0.7, 0.8],
11
+ nms_thresholds=[0.7, 0.7, 0.7]):
12
+ """
13
+ Arguments:
14
+ image: an instance of PIL.Image.
15
+ min_face_size: a float number.
16
+ thresholds: a list of length 3.
17
+ nms_thresholds: a list of length 3.
18
+
19
+ Returns:
20
+ two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
21
+ bounding boxes and facial landmarks.
22
+ """
23
+
24
+ # LOAD MODELS
25
+ pnet = PNet()
26
+ rnet = RNet()
27
+ onet = ONet()
28
+ # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
29
+ device = 'cpu'
30
+ pnet.to(device)
31
+ rnet.to(device)
32
+ onet.to(device)
33
+ onet.eval()
34
+
35
+ # BUILD AN IMAGE PYRAMID
36
+ width, height = image.size
37
+ min_length = min(height, width)
38
+
39
+ min_detection_size = 12
40
+ factor = 0.707 # sqrt(0.5)
41
+
42
+ # scales for scaling the image
43
+ scales = []
44
+
45
+ # scales the image so that
46
+ # minimum size that we can detect equals to
47
+ # minimum face size that we want to detect
48
+ m = min_detection_size/min_face_size
49
+ min_length *= m
50
+
51
+ factor_count = 0
52
+ while min_length > min_detection_size:
53
+ scales.append(m*factor**factor_count)
54
+ min_length *= factor
55
+ factor_count += 1
56
+
57
+ # STAGE 1
58
+
59
+ # it will be returned
60
+ bounding_boxes = []
61
+
62
+ with torch.no_grad():
63
+ # run P-Net on different scales
64
+ for s in scales:
65
+ boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
66
+ bounding_boxes.append(boxes)
67
+
68
+ # collect boxes (and offsets, and scores) from different scales
69
+ bounding_boxes = [i for i in bounding_boxes if i is not None]
70
+ bounding_boxes = np.vstack(bounding_boxes)
71
+
72
+ keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
73
+ bounding_boxes = bounding_boxes[keep]
74
+
75
+ # use offsets predicted by pnet to transform bounding boxes
76
+ bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
77
+ # shape [n_boxes, 5]
78
+
79
+ bounding_boxes = convert_to_square(bounding_boxes)
80
+ bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
81
+
82
+ # STAGE 2
83
+
84
+ img_boxes = get_image_boxes(bounding_boxes, image, size=24)
85
+ img_boxes = torch.FloatTensor(img_boxes).to(device)
86
+
87
+ output = rnet(img_boxes)
88
+ offsets = output[0].cpu().data.numpy() # shape [n_boxes, 4]
89
+ probs = output[1].cpu().data.numpy() # shape [n_boxes, 2]
90
+
91
+ keep = np.where(probs[:, 1] > thresholds[1])[0]
92
+ bounding_boxes = bounding_boxes[keep]
93
+ bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
94
+ offsets = offsets[keep]
95
+
96
+ keep = nms(bounding_boxes, nms_thresholds[1])
97
+ bounding_boxes = bounding_boxes[keep]
98
+ bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
99
+ bounding_boxes = convert_to_square(bounding_boxes)
100
+ bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
101
+
102
+ # STAGE 3
103
+
104
+ img_boxes = get_image_boxes(bounding_boxes, image, size=48)
105
+ if len(img_boxes) == 0:
106
+ return [], []
107
+ img_boxes = torch.FloatTensor(img_boxes).to(device)
108
+ output = onet(img_boxes)
109
+ landmarks = output[0].cpu().data.numpy() # shape [n_boxes, 10]
110
+ offsets = output[1].cpu().data.numpy() # shape [n_boxes, 4]
111
+ probs = output[2].cpu().data.numpy() # shape [n_boxes, 2]
112
+
113
+ keep = np.where(probs[:, 1] > thresholds[2])[0]
114
+ bounding_boxes = bounding_boxes[keep]
115
+ bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
116
+ offsets = offsets[keep]
117
+ landmarks = landmarks[keep]
118
+
119
+ # compute landmark points
120
+ width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
121
+ height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
122
+ xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
123
+ landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
124
+ landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]
125
+
126
+ bounding_boxes = calibrate_box(bounding_boxes, offsets)
127
+ keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
128
+ bounding_boxes = bounding_boxes[keep]
129
+ landmarks = landmarks[keep]
130
+
131
+ return bounding_boxes, landmarks
face_alignment/mtcnn_pytorch/src/first_stage.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.autograd import Variable
3
+ import math
4
+ from PIL import Image
5
+ import numpy as np
6
+ from .box_utils import nms, _preprocess
7
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
8
+ # device = 'cpu'
9
+
10
+ def run_first_stage(image, net, scale, threshold):
11
+ """Run P-Net, generate bounding boxes, and do NMS.
12
+
13
+ Arguments:
14
+ image: an instance of PIL.Image.
15
+ net: an instance of pytorch's nn.Module, P-Net.
16
+ scale: a float number,
17
+ scale width and height of the image by this number.
18
+ threshold: a float number,
19
+ threshold on the probability of a face when generating
20
+ bounding boxes from predictions of the net.
21
+
22
+ Returns:
23
+ a float numpy array of shape [n_boxes, 9],
24
+ bounding boxes with scores and offsets (4 + 1 + 4).
25
+ """
26
+
27
+ # scale the image and convert it to a float array
28
+ width, height = image.size
29
+ sw, sh = math.ceil(width*scale), math.ceil(height*scale)
30
+ img = image.resize((sw, sh), Image.BILINEAR)
31
+ img = np.asarray(img, 'float32')
32
+
33
+ img = torch.FloatTensor(_preprocess(img)).to(net.features.conv1.weight.device)
34
+ with torch.no_grad():
35
+ output = net(img)
36
+ probs = output[1].cpu().data.numpy()[0, 1, :, :]
37
+ offsets = output[0].cpu().data.numpy()
38
+ # probs: probability of a face at each sliding window
39
+ # offsets: transformations to true bounding boxes
40
+
41
+ boxes = _generate_bboxes(probs, offsets, scale, threshold)
42
+ if len(boxes) == 0:
43
+ return None
44
+
45
+ keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
46
+ return boxes[keep]
47
+
48
+
49
+ def _generate_bboxes(probs, offsets, scale, threshold):
50
+ """Generate bounding boxes at places
51
+ where there is probably a face.
52
+
53
+ Arguments:
54
+ probs: a float numpy array of shape [n, m].
55
+ offsets: a float numpy array of shape [1, 4, n, m].
56
+ scale: a float number,
57
+ width and height of the image were scaled by this number.
58
+ threshold: a float number.
59
+
60
+ Returns:
61
+ a float numpy array of shape [n_boxes, 9]
62
+ """
63
+
64
+ # applying P-Net is equivalent, in some sense, to
65
+ # moving 12x12 window with stride 2
66
+ stride = 2
67
+ cell_size = 12
68
+
69
+ # indices of boxes where there is probably a face
70
+ inds = np.where(probs > threshold)
71
+
72
+ if inds[0].size == 0:
73
+ return np.array([])
74
+
75
+ # transformations of bounding boxes
76
+ tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
77
+ # they are defined as:
78
+ # w = x2 - x1 + 1
79
+ # h = y2 - y1 + 1
80
+ # x1_true = x1 + tx1*w
81
+ # x2_true = x2 + tx2*w
82
+ # y1_true = y1 + ty1*h
83
+ # y2_true = y2 + ty2*h
84
+
85
+ offsets = np.array([tx1, ty1, tx2, ty2])
86
+ score = probs[inds[0], inds[1]]
87
+
88
+ # P-Net is applied to scaled images
89
+ # so we need to rescale bounding boxes back
90
+ bounding_boxes = np.vstack([
91
+ np.round((stride*inds[1] + 1.0)/scale),
92
+ np.round((stride*inds[0] + 1.0)/scale),
93
+ np.round((stride*inds[1] + 1.0 + cell_size)/scale),
94
+ np.round((stride*inds[0] + 1.0 + cell_size)/scale),
95
+ score, offsets
96
+ ])
97
+ # why one is added?
98
+
99
+ return bounding_boxes.T
face_alignment/mtcnn_pytorch/src/get_nets.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from collections import OrderedDict
5
+ import numpy as np
6
+
7
+
8
+ class Flatten(nn.Module):
9
+
10
+ def __init__(self):
11
+ super(Flatten, self).__init__()
12
+
13
+ def forward(self, x):
14
+ """
15
+ Arguments:
16
+ x: a float tensor with shape [batch_size, c, h, w].
17
+ Returns:
18
+ a float tensor with shape [batch_size, c*h*w].
19
+ """
20
+
21
+ # without this pretrained model isn't working
22
+ x = x.transpose(3, 2).contiguous()
23
+
24
+ return x.view(x.size(0), -1)
25
+
26
+
27
+ class PNet(nn.Module):
28
+
29
+ def __init__(self):
30
+
31
+ super(PNet, self).__init__()
32
+
33
+ # suppose we have input with size HxW, then
34
+ # after first layer: H - 2,
35
+ # after pool: ceil((H - 2)/2),
36
+ # after second conv: ceil((H - 2)/2) - 2,
37
+ # after last conv: ceil((H - 2)/2) - 4,
38
+ # and the same for W
39
+
40
+ self.features = nn.Sequential(OrderedDict([
41
+ ('conv1', nn.Conv2d(3, 10, 3, 1)),
42
+ ('prelu1', nn.PReLU(10)),
43
+ ('pool1', nn.MaxPool2d(2, 2, ceil_mode=True)),
44
+
45
+ ('conv2', nn.Conv2d(10, 16, 3, 1)),
46
+ ('prelu2', nn.PReLU(16)),
47
+
48
+ ('conv3', nn.Conv2d(16, 32, 3, 1)),
49
+ ('prelu3', nn.PReLU(32))
50
+ ]))
51
+
52
+ self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
53
+ self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
54
+
55
+ weights = np.load('mtcnn_pytorch/src/weights/pnet.npy', allow_pickle=True)[()]
56
+ for n, p in self.named_parameters():
57
+ p.data = torch.FloatTensor(weights[n])
58
+
59
+ def forward(self, x):
60
+ """
61
+ Arguments:
62
+ x: a float tensor with shape [batch_size, 3, h, w].
63
+ Returns:
64
+ b: a float tensor with shape [batch_size, 4, h', w'].
65
+ a: a float tensor with shape [batch_size, 2, h', w'].
66
+ """
67
+ x = self.features(x)
68
+ a = self.conv4_1(x)
69
+ b = self.conv4_2(x)
70
+ a = F.softmax(a, dim=-1)
71
+ return b, a
72
+
73
+
74
+ class RNet(nn.Module):
75
+
76
+ def __init__(self):
77
+
78
+ super(RNet, self).__init__()
79
+
80
+ self.features = nn.Sequential(OrderedDict([
81
+ ('conv1', nn.Conv2d(3, 28, 3, 1)),
82
+ ('prelu1', nn.PReLU(28)),
83
+ ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
84
+
85
+ ('conv2', nn.Conv2d(28, 48, 3, 1)),
86
+ ('prelu2', nn.PReLU(48)),
87
+ ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
88
+
89
+ ('conv3', nn.Conv2d(48, 64, 2, 1)),
90
+ ('prelu3', nn.PReLU(64)),
91
+
92
+ ('flatten', Flatten()),
93
+ ('conv4', nn.Linear(576, 128)),
94
+ ('prelu4', nn.PReLU(128))
95
+ ]))
96
+
97
+ self.conv5_1 = nn.Linear(128, 2)
98
+ self.conv5_2 = nn.Linear(128, 4)
99
+
100
+ weights = np.load('mtcnn_pytorch/src/weights/rnet.npy', allow_pickle=True)[()]
101
+ for n, p in self.named_parameters():
102
+ p.data = torch.FloatTensor(weights[n])
103
+
104
+ def forward(self, x):
105
+ """
106
+ Arguments:
107
+ x: a float tensor with shape [batch_size, 3, h, w].
108
+ Returns:
109
+ b: a float tensor with shape [batch_size, 4].
110
+ a: a float tensor with shape [batch_size, 2].
111
+ """
112
+ x = self.features(x)
113
+ a = self.conv5_1(x)
114
+ b = self.conv5_2(x)
115
+ a = F.softmax(a, dim=-1)
116
+ return b, a
117
+
118
+
119
+ class ONet(nn.Module):
120
+
121
+ def __init__(self):
122
+
123
+ super(ONet, self).__init__()
124
+
125
+ self.features = nn.Sequential(OrderedDict([
126
+ ('conv1', nn.Conv2d(3, 32, 3, 1)),
127
+ ('prelu1', nn.PReLU(32)),
128
+ ('pool1', nn.MaxPool2d(3, 2, ceil_mode=True)),
129
+
130
+ ('conv2', nn.Conv2d(32, 64, 3, 1)),
131
+ ('prelu2', nn.PReLU(64)),
132
+ ('pool2', nn.MaxPool2d(3, 2, ceil_mode=True)),
133
+
134
+ ('conv3', nn.Conv2d(64, 64, 3, 1)),
135
+ ('prelu3', nn.PReLU(64)),
136
+ ('pool3', nn.MaxPool2d(2, 2, ceil_mode=True)),
137
+
138
+ ('conv4', nn.Conv2d(64, 128, 2, 1)),
139
+ ('prelu4', nn.PReLU(128)),
140
+
141
+ ('flatten', Flatten()),
142
+ ('conv5', nn.Linear(1152, 256)),
143
+ ('drop5', nn.Dropout(0.25)),
144
+ ('prelu5', nn.PReLU(256)),
145
+ ]))
146
+
147
+ self.conv6_1 = nn.Linear(256, 2)
148
+ self.conv6_2 = nn.Linear(256, 4)
149
+ self.conv6_3 = nn.Linear(256, 10)
150
+
151
+ weights = np.load('mtcnn_pytorch/src/weights/onet.npy', allow_pickle=True)[()]
152
+ for n, p in self.named_parameters():
153
+ p.data = torch.FloatTensor(weights[n])
154
+
155
+ def forward(self, x):
156
+ """
157
+ Arguments:
158
+ x: a float tensor with shape [batch_size, 3, h, w].
159
+ Returns:
160
+ c: a float tensor with shape [batch_size, 10].
161
+ b: a float tensor with shape [batch_size, 4].
162
+ a: a float tensor with shape [batch_size, 2].
163
+ """
164
+ x = self.features(x)
165
+ a = self.conv6_1(x)
166
+ b = self.conv6_2(x)
167
+ c = self.conv6_3(x)
168
+ a = F.softmax(a, dim = -1)
169
+ return c, b, a
face_alignment/mtcnn_pytorch/src/matlab_cp2tform.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Tue Jul 11 06:54:28 2017
4
+
5
+ @author: zhaoyafei
6
+ """
7
+
8
+ import numpy as np
9
+ from numpy.linalg import inv, norm, lstsq
10
+ from numpy.linalg import matrix_rank as rank
11
+
12
+ class MatlabCp2tormException(Exception):
13
+ def __str__(self):
14
+ return 'In File {}:{}'.format(
15
+ __file__, super.__str__(self))
16
+
17
+ def tformfwd(trans, uv):
18
+ """
19
+ Function:
20
+ ----------
21
+ apply affine transform 'trans' to uv
22
+
23
+ Parameters:
24
+ ----------
25
+ @trans: 3x3 np.array
26
+ transform matrix
27
+ @uv: Kx2 np.array
28
+ each row is a pair of coordinates (x, y)
29
+
30
+ Returns:
31
+ ----------
32
+ @xy: Kx2 np.array
33
+ each row is a pair of transformed coordinates (x, y)
34
+ """
35
+ uv = np.hstack((
36
+ uv, np.ones((uv.shape[0], 1))
37
+ ))
38
+ xy = np.dot(uv, trans)
39
+ xy = xy[:, 0:-1]
40
+ return xy
41
+
42
+
43
+ def tforminv(trans, uv):
44
+ """
45
+ Function:
46
+ ----------
47
+ apply the inverse of affine transform 'trans' to uv
48
+
49
+ Parameters:
50
+ ----------
51
+ @trans: 3x3 np.array
52
+ transform matrix
53
+ @uv: Kx2 np.array
54
+ each row is a pair of coordinates (x, y)
55
+
56
+ Returns:
57
+ ----------
58
+ @xy: Kx2 np.array
59
+ each row is a pair of inverse-transformed coordinates (x, y)
60
+ """
61
+ Tinv = inv(trans)
62
+ xy = tformfwd(Tinv, uv)
63
+ return xy
64
+
65
+
66
+ def findNonreflectiveSimilarity(uv, xy, options=None):
67
+
68
+ options = {'K': 2}
69
+
70
+ K = options['K']
71
+ M = xy.shape[0]
72
+ x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
73
+ y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
74
+ # print('--->x, y:\n', x, y
75
+
76
+ tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
77
+ tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
78
+ X = np.vstack((tmp1, tmp2))
79
+ # print('--->X.shape: ', X.shape
80
+ # print('X:\n', X
81
+
82
+ u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
83
+ v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
84
+ U = np.vstack((u, v))
85
+ # print('--->U.shape: ', U.shape
86
+ # print('U:\n', U
87
+
88
+ # We know that X * r = U
89
+ if rank(X) >= 2 * K:
90
+ r, _, _, _ = lstsq(X, U)
91
+ r = np.squeeze(r)
92
+ else:
93
+ raise Exception('cp2tform:twoUniquePointsReq')
94
+
95
+ # print('--->r:\n', r
96
+
97
+ sc = r[0]
98
+ ss = r[1]
99
+ tx = r[2]
100
+ ty = r[3]
101
+
102
+ Tinv = np.array([
103
+ [sc, -ss, 0],
104
+ [ss, sc, 0],
105
+ [tx, ty, 1]
106
+ ])
107
+
108
+ # print('--->Tinv:\n', Tinv
109
+
110
+ T = inv(Tinv)
111
+ # print('--->T:\n', T
112
+
113
+ T[:, 2] = np.array([0, 0, 1])
114
+
115
+ return T, Tinv
116
+
117
+
118
+ def findSimilarity(uv, xy, options=None):
119
+
120
+ options = {'K': 2}
121
+
122
+ # uv = np.array(uv)
123
+ # xy = np.array(xy)
124
+
125
+ # Solve for trans1
126
+ trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
127
+
128
+ # Solve for trans2
129
+
130
+ # manually reflect the xy data across the Y-axis
131
+ xyR = xy
132
+ xyR[:, 0] = -1 * xyR[:, 0]
133
+
134
+ trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
135
+
136
+ # manually reflect the tform to undo the reflection done on xyR
137
+ TreflectY = np.array([
138
+ [-1, 0, 0],
139
+ [0, 1, 0],
140
+ [0, 0, 1]
141
+ ])
142
+
143
+ trans2 = np.dot(trans2r, TreflectY)
144
+
145
+ # Figure out if trans1 or trans2 is better
146
+ xy1 = tformfwd(trans1, uv)
147
+ norm1 = norm(xy1 - xy)
148
+
149
+ xy2 = tformfwd(trans2, uv)
150
+ norm2 = norm(xy2 - xy)
151
+
152
+ if norm1 <= norm2:
153
+ return trans1, trans1_inv
154
+ else:
155
+ trans2_inv = inv(trans2)
156
+ return trans2, trans2_inv
157
+
158
+
159
+ def get_similarity_transform(src_pts, dst_pts, reflective=True):
160
+ """
161
+ Function:
162
+ ----------
163
+ Find Similarity Transform Matrix 'trans':
164
+ u = src_pts[:, 0]
165
+ v = src_pts[:, 1]
166
+ x = dst_pts[:, 0]
167
+ y = dst_pts[:, 1]
168
+ [x, y, 1] = [u, v, 1] * trans
169
+
170
+ Parameters:
171
+ ----------
172
+ @src_pts: Kx2 np.array
173
+ source points, each row is a pair of coordinates (x, y)
174
+ @dst_pts: Kx2 np.array
175
+ destination points, each row is a pair of transformed
176
+ coordinates (x, y)
177
+ @reflective: True or False
178
+ if True:
179
+ use reflective similarity transform
180
+ else:
181
+ use non-reflective similarity transform
182
+
183
+ Returns:
184
+ ----------
185
+ @trans: 3x3 np.array
186
+ transform matrix from uv to xy
187
+ trans_inv: 3x3 np.array
188
+ inverse of trans, transform matrix from xy to uv
189
+ """
190
+
191
+ if reflective:
192
+ trans, trans_inv = findSimilarity(src_pts, dst_pts)
193
+ else:
194
+ trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
195
+
196
+ return trans, trans_inv
197
+
198
+
199
+ def cvt_tform_mat_for_cv2(trans):
200
+ """
201
+ Function:
202
+ ----------
203
+ Convert Transform Matrix 'trans' into 'cv2_trans' which could be
204
+ directly used by cv2.warpAffine():
205
+ u = src_pts[:, 0]
206
+ v = src_pts[:, 1]
207
+ x = dst_pts[:, 0]
208
+ y = dst_pts[:, 1]
209
+ [x, y].T = cv_trans * [u, v, 1].T
210
+
211
+ Parameters:
212
+ ----------
213
+ @trans: 3x3 np.array
214
+ transform matrix from uv to xy
215
+
216
+ Returns:
217
+ ----------
218
+ @cv2_trans: 2x3 np.array
219
+ transform matrix from src_pts to dst_pts, could be directly used
220
+ for cv2.warpAffine()
221
+ """
222
+ cv2_trans = trans[:, 0:2].T
223
+
224
+ return cv2_trans
225
+
226
+
227
+ def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
228
+ """
229
+ Function:
230
+ ----------
231
+ Find Similarity Transform Matrix 'cv2_trans' which could be
232
+ directly used by cv2.warpAffine():
233
+ u = src_pts[:, 0]
234
+ v = src_pts[:, 1]
235
+ x = dst_pts[:, 0]
236
+ y = dst_pts[:, 1]
237
+ [x, y].T = cv_trans * [u, v, 1].T
238
+
239
+ Parameters:
240
+ ----------
241
+ @src_pts: Kx2 np.array
242
+ source points, each row is a pair of coordinates (x, y)
243
+ @dst_pts: Kx2 np.array
244
+ destination points, each row is a pair of transformed
245
+ coordinates (x, y)
246
+ reflective: True or False
247
+ if True:
248
+ use reflective similarity transform
249
+ else:
250
+ use non-reflective similarity transform
251
+
252
+ Returns:
253
+ ----------
254
+ @cv2_trans: 2x3 np.array
255
+ transform matrix from src_pts to dst_pts, could be directly used
256
+ for cv2.warpAffine()
257
+ """
258
+ trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
259
+ cv2_trans = cvt_tform_mat_for_cv2(trans)
260
+
261
+ return cv2_trans
262
+
263
+
264
+ if __name__ == '__main__':
265
+ """
266
+ u = [0, 6, -2]
267
+ v = [0, 3, 5]
268
+ x = [-1, 0, 4]
269
+ y = [-1, -10, 4]
270
+
271
+ # In Matlab, run:
272
+ #
273
+ # uv = [u'; v'];
274
+ # xy = [x'; y'];
275
+ # tform_sim=cp2tform(uv,xy,'similarity');
276
+ #
277
+ # trans = tform_sim.tdata.T
278
+ # ans =
279
+ # -0.0764 -1.6190 0
280
+ # 1.6190 -0.0764 0
281
+ # -3.2156 0.0290 1.0000
282
+ # trans_inv = tform_sim.tdata.Tinv
283
+ # ans =
284
+ #
285
+ # -0.0291 0.6163 0
286
+ # -0.6163 -0.0291 0
287
+ # -0.0756 1.9826 1.0000
288
+ # xy_m=tformfwd(tform_sim, u,v)
289
+ #
290
+ # xy_m =
291
+ #
292
+ # -3.2156 0.0290
293
+ # 1.1833 -9.9143
294
+ # 5.0323 2.8853
295
+ # uv_m=tforminv(tform_sim, x,y)
296
+ #
297
+ # uv_m =
298
+ #
299
+ # 0.5698 1.3953
300
+ # 6.0872 2.2733
301
+ # -2.6570 4.3314
302
+ """
303
+ u = [0, 6, -2]
304
+ v = [0, 3, 5]
305
+ x = [-1, 0, 4]
306
+ y = [-1, -10, 4]
307
+
308
+ uv = np.array((u, v)).T
309
+ xy = np.array((x, y)).T
310
+
311
+ print('\n--->uv:')
312
+ print(uv)
313
+ print('\n--->xy:')
314
+ print(xy)
315
+
316
+ trans, trans_inv = get_similarity_transform(uv, xy)
317
+
318
+ print('\n--->trans matrix:')
319
+ print(trans)
320
+
321
+ print('\n--->trans_inv matrix:')
322
+ print(trans_inv)
323
+
324
+ print('\n---> apply transform to uv')
325
+ print('\nxy_m = uv_augmented * trans')
326
+ uv_aug = np.hstack((
327
+ uv, np.ones((uv.shape[0], 1))
328
+ ))
329
+ xy_m = np.dot(uv_aug, trans)
330
+ print(xy_m)
331
+
332
+ print('\nxy_m = tformfwd(trans, uv)')
333
+ xy_m = tformfwd(trans, uv)
334
+ print(xy_m)
335
+
336
+ print('\n---> apply inverse transform to xy')
337
+ print('\nuv_m = xy_augmented * trans_inv')
338
+ xy_aug = np.hstack((
339
+ xy, np.ones((xy.shape[0], 1))
340
+ ))
341
+ uv_m = np.dot(xy_aug, trans_inv)
342
+ print(uv_m)
343
+
344
+ print('\nuv_m = tformfwd(trans_inv, xy)')
345
+ uv_m = tformfwd(trans_inv, xy)
346
+ print(uv_m)
347
+
348
+ uv_m = tforminv(trans, xy)
349
+ print('\nuv_m = tforminv(trans, xy)')
350
+ print(uv_m)
face_alignment/mtcnn_pytorch/src/visualization_utils.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import ImageDraw
2
+
3
+
4
+ def show_bboxes(img, bounding_boxes, facial_landmarks=[]):
5
+ """Draw bounding boxes and facial landmarks.
6
+
7
+ Arguments:
8
+ img: an instance of PIL.Image.
9
+ bounding_boxes: a float numpy array of shape [n, 5].
10
+ facial_landmarks: a float numpy array of shape [n, 10].
11
+
12
+ Returns:
13
+ an instance of PIL.Image.
14
+ """
15
+
16
+ img_copy = img.copy()
17
+ draw = ImageDraw.Draw(img_copy)
18
+
19
+ for b in bounding_boxes:
20
+ draw.rectangle([
21
+ (b[0], b[1]), (b[2], b[3])
22
+ ], outline='white')
23
+
24
+ for p in facial_landmarks:
25
+ for i in range(5):
26
+ draw.ellipse([
27
+ (p[i] - 1.0, p[i + 5] - 1.0),
28
+ (p[i] + 1.0, p[i + 5] + 1.0)
29
+ ], outline='blue')
30
+
31
+ return img_copy
face_alignment/mtcnn_pytorch/src/weights/onet.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:313141c3646bebb73cb8350a2d5fee4c7f044fb96304b46ccc21aeea8b818f83
3
+ size 2345483
face_alignment/mtcnn_pytorch/src/weights/pnet.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e19e5c473932ab38f5a6308fe6210624006994a687e858d1dcda53c66f18cb
3
+ size 41271
face_alignment/mtcnn_pytorch/src/weights/rnet.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5660aad67688edc9e8a3dd4e47ed120932835e06a8a711a423252a6f2c747083
3
+ size 604651
face_alignment/mtcnn_pytorch/test_on_images.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
face_alignment/mtcnn_pytorch/try_mtcnn_step_by_step.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
net.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import namedtuple
2
+ import torch
3
+ import torch.nn as nn
4
+ from torch.nn import Dropout
5
+ from torch.nn import MaxPool2d
6
+ from torch.nn import Sequential
7
+ from torch.nn import Conv2d, Linear
8
+ from torch.nn import BatchNorm1d, BatchNorm2d
9
+ from torch.nn import ReLU, Sigmoid
10
+ from torch.nn import Module
11
+ from torch.nn import PReLU
12
+ import os
13
+
14
+ def build_model(model_name='ir_50'):
15
+ if model_name == 'ir_101':
16
+ return IR_101(input_size=(112,112))
17
+ elif model_name == 'ir_50':
18
+ return IR_50(input_size=(112,112))
19
+ elif model_name == 'ir_se_50':
20
+ return IR_SE_50(input_size=(112,112))
21
+ elif model_name == 'ir_34':
22
+ return IR_34(input_size=(112,112))
23
+ elif model_name == 'ir_18':
24
+ return IR_18(input_size=(112,112))
25
+ else:
26
+ raise ValueError('not a correct model name', model_name)
27
+
28
+ def initialize_weights(modules):
29
+ """ Weight initilize, conv2d and linear is initialized with kaiming_normal
30
+ """
31
+ for m in modules:
32
+ if isinstance(m, nn.Conv2d):
33
+ nn.init.kaiming_normal_(m.weight,
34
+ mode='fan_out',
35
+ nonlinearity='relu')
36
+ if m.bias is not None:
37
+ m.bias.data.zero_()
38
+ elif isinstance(m, nn.BatchNorm2d):
39
+ m.weight.data.fill_(1)
40
+ m.bias.data.zero_()
41
+ elif isinstance(m, nn.Linear):
42
+ nn.init.kaiming_normal_(m.weight,
43
+ mode='fan_out',
44
+ nonlinearity='relu')
45
+ if m.bias is not None:
46
+ m.bias.data.zero_()
47
+
48
+
49
+ class Flatten(Module):
50
+ """ Flat tensor
51
+ """
52
+ def forward(self, input):
53
+ return input.view(input.size(0), -1)
54
+
55
+
56
+ class LinearBlock(Module):
57
+ """ Convolution block without no-linear activation layer
58
+ """
59
+ def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
60
+ super(LinearBlock, self).__init__()
61
+ self.conv = Conv2d(in_c, out_c, kernel, stride, padding, groups=groups, bias=False)
62
+ self.bn = BatchNorm2d(out_c)
63
+
64
+ def forward(self, x):
65
+ x = self.conv(x)
66
+ x = self.bn(x)
67
+ return x
68
+
69
+
70
+ class GNAP(Module):
71
+ """ Global Norm-Aware Pooling block
72
+ """
73
+ def __init__(self, in_c):
74
+ super(GNAP, self).__init__()
75
+ self.bn1 = BatchNorm2d(in_c, affine=False)
76
+ self.pool = nn.AdaptiveAvgPool2d((1, 1))
77
+ self.bn2 = BatchNorm1d(in_c, affine=False)
78
+
79
+ def forward(self, x):
80
+ x = self.bn1(x)
81
+ x_norm = torch.norm(x, 2, 1, True)
82
+ x_norm_mean = torch.mean(x_norm)
83
+ weight = x_norm_mean / x_norm
84
+ x = x * weight
85
+ x = self.pool(x)
86
+ x = x.view(x.shape[0], -1)
87
+ feature = self.bn2(x)
88
+ return feature
89
+
90
+
91
+ class GDC(Module):
92
+ """ Global Depthwise Convolution block
93
+ """
94
+ def __init__(self, in_c, embedding_size):
95
+ super(GDC, self).__init__()
96
+ self.conv_6_dw = LinearBlock(in_c, in_c,
97
+ groups=in_c,
98
+ kernel=(7, 7),
99
+ stride=(1, 1),
100
+ padding=(0, 0))
101
+ self.conv_6_flatten = Flatten()
102
+ self.linear = Linear(in_c, embedding_size, bias=False)
103
+ self.bn = BatchNorm1d(embedding_size, affine=False)
104
+
105
+ def forward(self, x):
106
+ x = self.conv_6_dw(x)
107
+ x = self.conv_6_flatten(x)
108
+ x = self.linear(x)
109
+ x = self.bn(x)
110
+ return x
111
+
112
+
113
+ class SEModule(Module):
114
+ """ SE block
115
+ """
116
+ def __init__(self, channels, reduction):
117
+ super(SEModule, self).__init__()
118
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
119
+ self.fc1 = Conv2d(channels, channels // reduction,
120
+ kernel_size=1, padding=0, bias=False)
121
+
122
+ nn.init.xavier_uniform_(self.fc1.weight.data)
123
+
124
+ self.relu = ReLU(inplace=True)
125
+ self.fc2 = Conv2d(channels // reduction, channels,
126
+ kernel_size=1, padding=0, bias=False)
127
+
128
+ self.sigmoid = Sigmoid()
129
+
130
+ def forward(self, x):
131
+ module_input = x
132
+ x = self.avg_pool(x)
133
+ x = self.fc1(x)
134
+ x = self.relu(x)
135
+ x = self.fc2(x)
136
+ x = self.sigmoid(x)
137
+
138
+ return module_input * x
139
+
140
+
141
+
142
+ class BasicBlockIR(Module):
143
+ """ BasicBlock for IRNet
144
+ """
145
+ def __init__(self, in_channel, depth, stride):
146
+ super(BasicBlockIR, self).__init__()
147
+ if in_channel == depth:
148
+ self.shortcut_layer = MaxPool2d(1, stride)
149
+ else:
150
+ self.shortcut_layer = Sequential(
151
+ Conv2d(in_channel, depth, (1, 1), stride, bias=False),
152
+ BatchNorm2d(depth))
153
+ self.res_layer = Sequential(
154
+ BatchNorm2d(in_channel),
155
+ Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
156
+ BatchNorm2d(depth),
157
+ PReLU(depth),
158
+ Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
159
+ BatchNorm2d(depth))
160
+
161
+ def forward(self, x):
162
+ shortcut = self.shortcut_layer(x)
163
+ res = self.res_layer(x)
164
+
165
+ return res + shortcut
166
+
167
+
168
+ class BottleneckIR(Module):
169
+ """ BasicBlock with bottleneck for IRNet
170
+ """
171
+ def __init__(self, in_channel, depth, stride):
172
+ super(BottleneckIR, self).__init__()
173
+ reduction_channel = depth // 4
174
+ if in_channel == depth:
175
+ self.shortcut_layer = MaxPool2d(1, stride)
176
+ else:
177
+ self.shortcut_layer = Sequential(
178
+ Conv2d(in_channel, depth, (1, 1), stride, bias=False),
179
+ BatchNorm2d(depth))
180
+ self.res_layer = Sequential(
181
+ BatchNorm2d(in_channel),
182
+ Conv2d(in_channel, reduction_channel, (1, 1), (1, 1), 0, bias=False),
183
+ BatchNorm2d(reduction_channel),
184
+ PReLU(reduction_channel),
185
+ Conv2d(reduction_channel, reduction_channel, (3, 3), (1, 1), 1, bias=False),
186
+ BatchNorm2d(reduction_channel),
187
+ PReLU(reduction_channel),
188
+ Conv2d(reduction_channel, depth, (1, 1), stride, 0, bias=False),
189
+ BatchNorm2d(depth))
190
+
191
+ def forward(self, x):
192
+ shortcut = self.shortcut_layer(x)
193
+ res = self.res_layer(x)
194
+
195
+ return res + shortcut
196
+
197
+
198
+ class BasicBlockIRSE(BasicBlockIR):
199
+ def __init__(self, in_channel, depth, stride):
200
+ super(BasicBlockIRSE, self).__init__(in_channel, depth, stride)
201
+ self.res_layer.add_module("se_block", SEModule(depth, 16))
202
+
203
+
204
+ class BottleneckIRSE(BottleneckIR):
205
+ def __init__(self, in_channel, depth, stride):
206
+ super(BottleneckIRSE, self).__init__(in_channel, depth, stride)
207
+ self.res_layer.add_module("se_block", SEModule(depth, 16))
208
+
209
+
210
+ class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
211
+ '''A named tuple describing a ResNet block.'''
212
+
213
+
214
+ def get_block(in_channel, depth, num_units, stride=2):
215
+
216
+ return [Bottleneck(in_channel, depth, stride)] +\
217
+ [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
218
+
219
+
220
+ def get_blocks(num_layers):
221
+ if num_layers == 18:
222
+ blocks = [
223
+ get_block(in_channel=64, depth=64, num_units=2),
224
+ get_block(in_channel=64, depth=128, num_units=2),
225
+ get_block(in_channel=128, depth=256, num_units=2),
226
+ get_block(in_channel=256, depth=512, num_units=2)
227
+ ]
228
+ elif num_layers == 34:
229
+ blocks = [
230
+ get_block(in_channel=64, depth=64, num_units=3),
231
+ get_block(in_channel=64, depth=128, num_units=4),
232
+ get_block(in_channel=128, depth=256, num_units=6),
233
+ get_block(in_channel=256, depth=512, num_units=3)
234
+ ]
235
+ elif num_layers == 50:
236
+ blocks = [
237
+ get_block(in_channel=64, depth=64, num_units=3),
238
+ get_block(in_channel=64, depth=128, num_units=4),
239
+ get_block(in_channel=128, depth=256, num_units=14),
240
+ get_block(in_channel=256, depth=512, num_units=3)
241
+ ]
242
+ elif num_layers == 100:
243
+ blocks = [
244
+ get_block(in_channel=64, depth=64, num_units=3),
245
+ get_block(in_channel=64, depth=128, num_units=13),
246
+ get_block(in_channel=128, depth=256, num_units=30),
247
+ get_block(in_channel=256, depth=512, num_units=3)
248
+ ]
249
+ elif num_layers == 152:
250
+ blocks = [
251
+ get_block(in_channel=64, depth=256, num_units=3),
252
+ get_block(in_channel=256, depth=512, num_units=8),
253
+ get_block(in_channel=512, depth=1024, num_units=36),
254
+ get_block(in_channel=1024, depth=2048, num_units=3)
255
+ ]
256
+ elif num_layers == 200:
257
+ blocks = [
258
+ get_block(in_channel=64, depth=256, num_units=3),
259
+ get_block(in_channel=256, depth=512, num_units=24),
260
+ get_block(in_channel=512, depth=1024, num_units=36),
261
+ get_block(in_channel=1024, depth=2048, num_units=3)
262
+ ]
263
+
264
+ return blocks
265
+
266
+
267
+ class Backbone(Module):
268
+ def __init__(self, input_size, num_layers, mode='ir'):
269
+ """ Args:
270
+ input_size: input_size of backbone
271
+ num_layers: num_layers of backbone
272
+ mode: support ir or irse
273
+ """
274
+ super(Backbone, self).__init__()
275
+ assert input_size[0] in [112, 224], \
276
+ "input_size should be [112, 112] or [224, 224]"
277
+ assert num_layers in [18, 34, 50, 100, 152, 200], \
278
+ "num_layers should be 18, 34, 50, 100 or 152"
279
+ assert mode in ['ir', 'ir_se'], \
280
+ "mode should be ir or ir_se"
281
+ self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
282
+ BatchNorm2d(64), PReLU(64))
283
+ blocks = get_blocks(num_layers)
284
+ if num_layers <= 100:
285
+ if mode == 'ir':
286
+ unit_module = BasicBlockIR
287
+ elif mode == 'ir_se':
288
+ unit_module = BasicBlockIRSE
289
+ output_channel = 512
290
+ else:
291
+ if mode == 'ir':
292
+ unit_module = BottleneckIR
293
+ elif mode == 'ir_se':
294
+ unit_module = BottleneckIRSE
295
+ output_channel = 2048
296
+
297
+ if input_size[0] == 112:
298
+ self.output_layer = Sequential(BatchNorm2d(output_channel),
299
+ Dropout(0.4), Flatten(),
300
+ Linear(output_channel * 7 * 7, 512),
301
+ BatchNorm1d(512, affine=False))
302
+ else:
303
+ self.output_layer = Sequential(
304
+ BatchNorm2d(output_channel), Dropout(0.4), Flatten(),
305
+ Linear(output_channel * 14 * 14, 512),
306
+ BatchNorm1d(512, affine=False))
307
+
308
+ modules = []
309
+ for block in blocks:
310
+ for bottleneck in block:
311
+ modules.append(
312
+ unit_module(bottleneck.in_channel, bottleneck.depth,
313
+ bottleneck.stride))
314
+ self.body = Sequential(*modules)
315
+
316
+ initialize_weights(self.modules())
317
+
318
+
319
+ def forward(self, x):
320
+
321
+ # current code only supports one extra image
322
+ # it comes with a extra dimension for number of extra image. We will just squeeze it out for now
323
+ x = self.input_layer(x)
324
+
325
+ for idx, module in enumerate(self.body):
326
+ x = module(x)
327
+
328
+ x = self.output_layer(x)
329
+ norm = torch.norm(x, 2, 1, True)
330
+ output = torch.div(x, norm)
331
+
332
+ return output, norm
333
+
334
+
335
+
336
+ def IR_18(input_size):
337
+ """ Constructs a ir-18 model.
338
+ """
339
+ model = Backbone(input_size, 18, 'ir')
340
+
341
+ return model
342
+
343
+
344
+ def IR_34(input_size):
345
+ """ Constructs a ir-34 model.
346
+ """
347
+ model = Backbone(input_size, 34, 'ir')
348
+
349
+ return model
350
+
351
+
352
+ def IR_50(input_size):
353
+ """ Constructs a ir-50 model.
354
+ """
355
+ model = Backbone(input_size, 50, 'ir')
356
+
357
+ return model
358
+
359
+
360
+ def IR_101(input_size):
361
+ """ Constructs a ir-101 model.
362
+ """
363
+ model = Backbone(input_size, 100, 'ir')
364
+
365
+ return model
366
+
367
+
368
+ def IR_152(input_size):
369
+ """ Constructs a ir-152 model.
370
+ """
371
+ model = Backbone(input_size, 152, 'ir')
372
+
373
+ return model
374
+
375
+
376
+ def IR_200(input_size):
377
+ """ Constructs a ir-200 model.
378
+ """
379
+ model = Backbone(input_size, 200, 'ir')
380
+
381
+ return model
382
+
383
+
384
+ def IR_SE_50(input_size):
385
+ """ Constructs a ir_se-50 model.
386
+ """
387
+ model = Backbone(input_size, 50, 'ir_se')
388
+
389
+ return model
390
+
391
+
392
+ def IR_SE_101(input_size):
393
+ """ Constructs a ir_se-101 model.
394
+ """
395
+ model = Backbone(input_size, 100, 'ir_se')
396
+
397
+ return model
398
+
399
+
400
+ def IR_SE_152(input_size):
401
+ """ Constructs a ir_se-152 model.
402
+ """
403
+ model = Backbone(input_size, 152, 'ir_se')
404
+
405
+ return model
406
+
407
+
408
+ def IR_SE_200(input_size):
409
+ """ Constructs a ir_se-200 model.
410
+ """
411
+ model = Backbone(input_size, 200, 'ir_se')
412
+
413
+ return model
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy
2
+ opencv-python
3
+ mediapipe
4
+ gradio
5
+ torch
6
+ torchvision
7
+ timm
static/idiap-black.png ADDED
static/idiap-white.png ADDED