By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 download_data.py

View raw Download
text/x-script.python • 3.55 kiB
Python script, ASCII text executable
        
            
1
import os
2
import shutil
3
import httpx
4
import tqdm
5
from PIL import Image, ImageOps
6
import tempfile
7
from pathlib import Path
8
9
query_yaml = """
10
want:
11
- has: ["waste"]
12
- nature: ["photo"]
13
"""
14
15
object_types = httpx.get("https://datasets.roundabout-host.com/api/object-types").json()
16
17
base_types = {
18
"plastic household waste": set(),
19
"glass household waste": set(),
20
"metal household waste": set(),
21
"paper and cardboard": set(),
22
"organic household waste": set(),
23
"household waste": set(),
24
"waste": set(),
25
}
26
27
memoisation = {}
28
29
def get_objects(object_info):
30
if object_info["id"] in memoisation:
31
return memoisation[object_info["id"]]
32
objects = set()
33
for object in object_info["children"]:
34
#print(f"Scanning {object}")
35
info = httpx.get("https://datasets.roundabout-host.com/api/object/" + object).json()
36
objects |= get_objects(info)
37
objects.add(object_info["id"])
38
memoisation[object_info["id"]] = objects
39
return objects
40
41
for object_name in base_types:
42
object_info = httpx.get("https://datasets.roundabout-host.com/api/object/" + object_name).json()
43
base_types[object_name] = get_objects(object_info)
44
print(object_name, base_types[object_name])
45
46
result = {"resources": True} # dummy value to enter the loop
47
photos = []
48
offset = 0
49
limit = 192
50
output = Path("data/")
51
52
print("Downloading photo metadata...")
53
54
while result["resources"]:
55
result = httpx.post(f"https://datasets.roundabout-host.com/api/query-pictures?offset={offset}&limit={limit}", data={"query": query_yaml}).json()
56
photos += result["resources"]
57
offset += limit
58
print(f"Received photos {offset-limit} to {offset}")
59
60
shutil.rmtree(output, ignore_errors=True)
61
os.makedirs(output)
62
63
class_mapping = {
64
"plastic household waste": 0,
65
"glass household waste": 1,
66
"metal household waste": 2,
67
"paper and cardboard": 3,
68
"organic household waste": 4,
69
"household waste": 5,
70
"waste": 6,
71
}
72
73
with tempfile.NamedTemporaryFile(delete_on_close=False, mode="wb") as temporary_file:
74
for photo in tqdm.tqdm(photos):
75
# Download the photo
76
result = httpx.get(photo["download"], follow_redirects=True)
77
temporary_file.write(result.content)
78
temporary_file.seek(0)
79
image = Image.open(temporary_file.name, formats=["JPEG"])
80
ImageOps.exif_transpose(image, in_place=True)
81
image.thumbnail((640, 640))
82
image.save(output / (str(photo["id"]) + ".jpg"))
83
# Download the annotations
84
with open(output / (str(photo["id"]) + ".txt"), "w") as annotation_file:
85
for region in photo["regions"]:
86
klass = -1
87
for base_type in base_types:
88
if region["object"] in base_types[base_type]:
89
klass = class_mapping[base_type]
90
break
91
if klass == -1:
92
continue
93
94
if region["type"] == "bbox":
95
cx = region["shape"]["x"] + region["shape"]["w"] / 2
96
cy = region["shape"]["y"] + region["shape"]["h"] / 2
97
w = region["shape"]["w"]
98
h = region["shape"]["h"]
99
elif region["type"] == "polygon":
100
x = [point["x"] for point in region["shape"]]
101
y = [point["y"] for point in region["shape"]]
102
cx = (min(x) + max(x)) / 2
103
cy = (min(y) + max(y)) / 2
104
w = max(x) - min(x)
105
h = max(y) - min(y)
106
107
annotation_file.write(f"{klass} {cx} {cy} {w} {h}\n")
108
109