forked from AoiDragon/VIPER
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_helper.py
More file actions
65 lines (60 loc) · 2.18 KB
/
data_helper.py
File metadata and controls
65 lines (60 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from PIL import Image # add this import
import os
import hashlib
from datasets import load_dataset
from PIL import Image
import json
def _is_pil_image(x):
return isinstance(x, Image.Image)
def save_pil_to_file(pil_img, out_dir, filename_stem, ext=".jpg", quality=95):
os.makedirs(out_dir, exist_ok=True)
if ext.lower() in [".jpg", ".jpeg"]:
if pil_img.mode in ("RGBA", "LA", "P"):
pil_img = pil_img.convert("RGB")
save_path = os.path.join(out_dir, f"{filename_stem}{ext.lower()}")
pil_img.save(save_path, quality=quality)
return save_path
def materialize_images_in_item(item, out_dir, item_id, prefer_ext=".jpg"):
img = item.get("image", None)
if _is_pil_image(img):
stem = f"{item_id}_image"
item["image"] = save_pil_to_file(img, out_dir, stem, ext=prefer_ext)
elif isinstance(img, str) or img is None:
pass
else:
pass
refs = item.get("reference_frames", None)
if refs is None:
item["reference_frames"] = []
elif isinstance(refs, list):
new_refs = []
for i, rf in enumerate(refs):
if _is_pil_image(rf):
stem = f"{item_id}_ref_{i:04d}"
new_refs.append(save_pil_to_file(rf, out_dir, stem, ext=prefer_ext))
elif isinstance(rf, str):
new_refs.append(rf)
else:
continue
item["reference_frames"] = new_refs
else:
item["reference_frames"] = []
return item
def get_viper(target="data"):
os.makedirs(target, exist_ok=True)
json_path = os.path.join(target, "viper.json")
if os.path.exists(json_path):
return json_path
images_dir = os.path.join(target, "images")
ds = load_dataset("Monosail/VIPER", split="train")
out = []
for idx, ex in enumerate(ds):
item_id = ex.get("id") or f"{idx:08d}"
item = dict(ex) # avoid mutating dataset internal object
item = materialize_images_in_item(item, images_dir, item_id, prefer_ext=".jpg")
out.append(item)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(out, f, ensure_ascii=False, indent=2)
return json_path
if __name__ == "__main__":
get_viper()