By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 download_data.py

View raw Download
text/x-script.python • 3.83 kiB
Python script, ASCII text executable
        
            
1
import os
2
import shutil
3
import httpx
4
import tqdm
5
from PIL import Image, ImageOps
6
import tempfile
7
from pathlib import Path
8
import random
9
10
VALIDATION_SPLIT = 0.1
11
12
query_yaml = """
13
want:
14
- has: ["waste"]
15
- nature: ["photo"]
16
"""
17
18
object_types = httpx.get("https://datasets.roundabout-host.com/api/object-types").json()
19
20
base_types = {
21
"plastic household waste": set(),
22
"glass household waste": set(),
23
"metal household waste": set(),
24
"paper and cardboard": set(),
25
"organic household waste": set(),
26
"household waste": set(),
27
"waste": set(),
28
}
29
30
memoisation = {}
31
32
def get_objects(object_info):
33
if object_info["id"] in memoisation:
34
return memoisation[object_info["id"]]
35
objects = set()
36
for object in object_info["children"]:
37
#print(f"Scanning {object}")
38
info = httpx.get("https://datasets.roundabout-host.com/api/object/" + object).json()
39
objects |= get_objects(info)
40
objects.add(object_info["id"])
41
memoisation[object_info["id"]] = objects
42
return objects
43
44
for object_name in base_types:
45
object_info = httpx.get("https://datasets.roundabout-host.com/api/object/" + object_name).json()
46
base_types[object_name] = get_objects(object_info)
47
print(object_name, base_types[object_name])
48
49
result = {"resources": True} # dummy value to enter the loop
50
photos = []
51
offset = 0
52
limit = 192
53
output = Path("data/")
54
val_output = Path("val_data/")
55
56
print("Downloading photo metadata...")
57
58
while result["resources"]:
59
result = httpx.post(f"https://datasets.roundabout-host.com/api/query-pictures?offset={offset}&limit={limit}", data={"query": query_yaml}).json()
60
photos += result["resources"]
61
offset += limit
62
print(f"Received photos {offset-limit} to {offset}")
63
64
shutil.rmtree(output, ignore_errors=True)
65
os.makedirs(output)
66
shutil.rmtree(val_output, ignore_errors=True)
67
os.makedirs(val_output)
68
69
class_mapping = {
70
"plastic household waste": 0,
71
"glass household waste": 1,
72
"metal household waste": 2,
73
"paper and cardboard": 3,
74
"organic household waste": 4,
75
"household waste": 5,
76
"waste": 6,
77
}
78
79
with tempfile.NamedTemporaryFile(delete_on_close=False, mode="wb") as temporary_file:
80
for photo in tqdm.tqdm(photos):
81
# Download the photo
82
result = httpx.get(photo["download"], follow_redirects=True, timeout=15)
83
temporary_file.write(result.content)
84
temporary_file.seek(0)
85
image = Image.open(temporary_file.name, formats=["JPEG"])
86
ImageOps.exif_transpose(image, in_place=True)
87
image.thumbnail((640, 640))
88
if random.random() < VALIDATION_SPLIT:
89
directory = val_output
90
else:
91
directory = output
92
image.save(directory / (str(photo["id"]) + ".jpg"))
93
# Download the annotations
94
with open(directory / (str(photo["id"]) + ".txt"), "w") as annotation_file:
95
for region in photo["regions"]:
96
klass = -1
97
for base_type in base_types:
98
if region["object"] in base_types[base_type]:
99
klass = class_mapping[base_type]
100
break
101
if klass == -1:
102
continue
103
104
if region["type"] == "bbox":
105
cx = region["shape"]["x"] + region["shape"]["w"] / 2
106
cy = region["shape"]["y"] + region["shape"]["h"] / 2
107
w = region["shape"]["w"]
108
h = region["shape"]["h"]
109
elif region["type"] == "polygon":
110
x = [point["x"] for point in region["shape"]]
111
y = [point["y"] for point in region["shape"]]
112
cx = (min(x) + max(x)) / 2
113
cy = (min(y) + max(y)) / 2
114
w = max(x) - min(x)
115
h = max(y) - min(y)
116
117
annotation_file.write(f"{klass} {cx} {cy} {w} {h}\n")
118
119