Web platform for sharing free data for ML and research

By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 app.py

View raw Download
text/x-script.python • 22.2 kiB
Python script, ASCII text executable
        
            
1
import json
2
from datetime import datetime
3
from email.policy import default
4
5
import flask
6
from flask_sqlalchemy import SQLAlchemy
7
from flask_bcrypt import Bcrypt
8
from flask_httpauth import HTTPBasicAuth
9
from markupsafe import escape, Markup
10
from flask_migrate import Migrate, current
11
from jinja2_fragments.flask import render_block
12
from sqlalchemy.orm import backref
13
import sqlalchemy.dialects.postgresql
14
from os import path
15
import mimetypes
16
import ruamel.yaml as yaml
17
18
from PIL import Image
19
20
import config
21
import markdown
22
23
24
app = flask.Flask(__name__)
25
bcrypt = Bcrypt(app)
26
27
28
app.config["SQLALCHEMY_DATABASE_URI"] = config.DB_URI
29
app.config["SECRET_KEY"] = config.DB_PASSWORD
30
31
32
db = SQLAlchemy(app)
33
migrate = Migrate(app, db)
34
35
36
@app.template_filter("split")
37
def split(value, separator=None, maxsplit=-1):
38
return value.split(separator, maxsplit)
39
40
41
@app.template_filter("median")
42
def median(value):
43
value = list(value) # prevent generators
44
return sorted(value)[len(value) // 2]
45
46
47
@app.template_filter("set")
48
def set_filter(value):
49
return set(value)
50
51
52
with app.app_context():
53
class User(db.Model):
54
username = db.Column(db.String(32), unique=True, nullable=False, primary_key=True)
55
password_hashed = db.Column(db.String(60), nullable=False)
56
admin = db.Column(db.Boolean, nullable=False, default=False, server_default="false")
57
pictures = db.relationship("PictureResource", back_populates="author")
58
59
def __init__(self, username, password):
60
self.username = username
61
self.password_hashed = bcrypt.generate_password_hash(password).decode("utf-8")
62
63
64
class Licence(db.Model):
65
id = db.Column(db.String(64), primary_key=True) # SPDX identifier
66
title = db.Column(db.UnicodeText, nullable=False) # the official name of the licence
67
description = db.Column(db.UnicodeText, nullable=False) # brief description of its permissions and restrictions
68
info_url = db.Column(db.String(1024), nullable=False) # the URL to a page with general information about the licence
69
url = db.Column(db.String(1024), nullable=True) # the URL to a page with the full text of the licence and more information
70
pictures = db.relationship("PictureLicence", back_populates="licence")
71
free = db.Column(db.Boolean, nullable=False, default=False) # whether the licence is free or not
72
logo_url = db.Column(db.String(1024), nullable=True) # URL to the logo of the licence
73
pinned = db.Column(db.Boolean, nullable=False, default=False) # whether the licence should be shown at the top of the list
74
75
def __init__(self, id, title, description, info_url, url, free, logo_url=None, pinned=False):
76
self.id = id
77
self.title = title
78
self.description = description
79
self.info_url = info_url
80
self.url = url
81
self.free = free
82
self.logo_url = logo_url
83
self.pinned = pinned
84
85
86
class PictureLicence(db.Model):
87
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
88
89
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"))
90
licence_id = db.Column(db.String(64), db.ForeignKey("licence.id"))
91
92
resource = db.relationship("PictureResource", back_populates="licences")
93
licence = db.relationship("Licence", back_populates="pictures")
94
95
def __init__(self, resource_id, licence_id):
96
self.resource_id = resource_id
97
self.licence_id = licence_id
98
99
100
class Resource(db.Model):
101
__abstract__ = True
102
103
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
104
title = db.Column(db.UnicodeText, nullable=False)
105
description = db.Column(db.UnicodeText, nullable=False)
106
timestamp = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
107
origin_url = db.Column(db.String(2048), nullable=True) # should be left empty if it's original or the source is unknown but public domain
108
109
110
class PictureNature(db.Model):
111
# Examples:
112
# "photo", "paper-scan", "2d-art-photo", "sculpture-photo", "computer-3d", "computer-painting",
113
# "computer-line-art", "diagram", "infographic", "text", "map", "chart-graph", "screen-capture",
114
# "screen-photo", "pattern", "collage", "ai", and so on
115
id = db.Column(db.String(64), primary_key=True)
116
description = db.Column(db.UnicodeText, nullable=False)
117
resources = db.relationship("PictureResource", back_populates="nature")
118
119
def __init__(self, id, description):
120
self.id = id
121
self.description = description
122
123
124
class PictureObjectInheritance(db.Model):
125
parent_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
126
primary_key=True)
127
child_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
128
primary_key=True)
129
130
parent = db.relationship("PictureObject", foreign_keys=[parent_id],
131
back_populates="child_links")
132
child = db.relationship("PictureObject", foreign_keys=[child_id],
133
back_populates="parent_links")
134
135
def __init__(self, parent, child):
136
self.parent = parent
137
self.child = child
138
139
140
class PictureObject(db.Model):
141
id = db.Column(db.String(64), primary_key=True)
142
description = db.Column(db.UnicodeText, nullable=False)
143
144
child_links = db.relationship("PictureObjectInheritance",
145
foreign_keys=[PictureObjectInheritance.parent_id],
146
back_populates="parent")
147
parent_links = db.relationship("PictureObjectInheritance",
148
foreign_keys=[PictureObjectInheritance.child_id],
149
back_populates="child")
150
151
def __init__(self, id, description):
152
self.id = id
153
self.description = description
154
155
156
class PictureRegion(db.Model):
157
# This is for picture region annotations
158
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
159
json = db.Column(sqlalchemy.dialects.postgresql.JSONB, nullable=False)
160
161
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=False)
162
object_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"), nullable=True)
163
164
resource = db.relationship("PictureResource", backref="regions")
165
object = db.relationship("PictureObject", backref="regions")
166
167
def __init__(self, json, resource, object):
168
self.json = json
169
self.resource = resource
170
self.object = object
171
172
173
class PictureResource(Resource):
174
# This is only for bitmap pictures. Vectors will be stored under a different model
175
# File name is the ID in the picture directory under data, without an extension
176
file_format = db.Column(db.String(64), nullable=False) # MIME type
177
width = db.Column(db.Integer, nullable=False)
178
height = db.Column(db.Integer, nullable=False)
179
nature_id = db.Column(db.String(32), db.ForeignKey("picture_nature.id"), nullable=True)
180
author_name = db.Column(db.String(32), db.ForeignKey("user.username"), nullable=False)
181
author = db.relationship("User", back_populates="pictures")
182
183
nature = db.relationship("PictureNature", back_populates="resources")
184
185
replaces_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=True)
186
replaced_by_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"),
187
nullable=True)
188
189
replaces = db.relationship("PictureResource", remote_side="PictureResource.id",
190
foreign_keys=[replaces_id], back_populates="replaced_by")
191
replaced_by = db.relationship("PictureResource", remote_side="PictureResource.id",
192
foreign_keys=[replaced_by_id])
193
194
licences = db.relationship("PictureLicence", back_populates="resource")
195
196
def __init__(self, title, author, description, origin_url, licence_ids, mime, nature=None,
197
replaces=None):
198
self.title = title
199
self.author = author
200
self.description = description
201
self.origin_url = origin_url
202
self.file_format = mime
203
self.width = self.height = 0
204
self.nature = nature
205
db.session.add(self)
206
db.session.commit()
207
for licence_id in licence_ids:
208
joiner = PictureLicence(self.id, licence_id)
209
db.session.add(joiner)
210
if replaces is not None:
211
self.replaces = replaces
212
replaces.replaced_by = self
213
214
def put_annotations(self, json):
215
# Delete all previous annotations
216
db.session.query(PictureRegion).filter_by(resource_id=self.id).delete()
217
218
for region in json:
219
object_id = region["object"]
220
picture_object = db.session.get(PictureObject, object_id)
221
222
region_data = {
223
"type": region["type"],
224
"shape": region["shape"],
225
}
226
227
region_row = PictureRegion(region_data, self, picture_object)
228
db.session.add(region_row)
229
230
231
@app.route("/")
232
def index():
233
return flask.render_template("home.html")
234
235
236
@app.route("/accounts/")
237
def accounts():
238
return flask.render_template("login.html")
239
240
241
@app.route("/login", methods=["POST"])
242
def login():
243
username = flask.request.form["username"]
244
password = flask.request.form["password"]
245
246
user = db.session.get(User, username)
247
248
if user is None:
249
flask.flash("This username is not registered.")
250
return flask.redirect("/accounts")
251
252
if not bcrypt.check_password_hash(user.password_hashed, password):
253
flask.flash("Incorrect password.")
254
return flask.redirect("/accounts")
255
256
flask.flash("You have been logged in.")
257
258
flask.session["username"] = username
259
return flask.redirect("/")
260
261
262
@app.route("/logout")
263
def logout():
264
flask.session.pop("username", None)
265
flask.flash("You have been logged out.")
266
return flask.redirect("/")
267
268
269
@app.route("/signup", methods=["POST"])
270
def signup():
271
username = flask.request.form["username"]
272
password = flask.request.form["password"]
273
274
if db.session.get(User, username) is not None:
275
flask.flash("This username is already taken.")
276
return flask.redirect("/accounts")
277
278
if set(username) > set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"):
279
flask.flash("Usernames can only contain the Latin alphabet, digits, hyphens, and underscores.")
280
return flask.redirect("/accounts")
281
282
if len(username) < 3 or len(username) > 32:
283
flask.flash("Usernames must be between 3 and 32 characters long.")
284
return flask.redirect("/accounts")
285
286
if len(password) < 6:
287
flask.flash("Passwords must be at least 6 characters long.")
288
return flask.redirect("/accounts")
289
290
user = User(username, password)
291
db.session.add(user)
292
db.session.commit()
293
294
flask.session["username"] = username
295
296
flask.flash("You have been registered and logged in.")
297
298
return flask.redirect("/")
299
300
301
@app.route("/profile", defaults={"username": None})
302
@app.route("/profile/<username>")
303
def profile(username):
304
if username is None:
305
if "username" in flask.session:
306
return flask.redirect("/profile/" + flask.session["username"])
307
else:
308
flask.flash("Please log in to perform this action.")
309
return flask.redirect("/accounts")
310
311
user = db.session.get(User, username)
312
if user is None:
313
flask.abort(404)
314
315
return flask.render_template("profile.html", user=user)
316
317
318
@app.route("/upload")
319
def upload():
320
if "username" not in flask.session:
321
flask.flash("Log in to upload pictures.")
322
return flask.redirect("/accounts")
323
324
licences = Licence.query.order_by(Licence.free.desc(), Licence.pinned.desc(), Licence.title).all()
325
326
types = PictureNature.query.all()
327
328
return flask.render_template("upload.html", licences=licences, types=types)
329
330
331
@app.route("/upload", methods=["POST"])
332
def upload_post():
333
title = flask.request.form["title"]
334
description = flask.request.form["description"]
335
origin_url = flask.request.form["origin_url"]
336
author = db.session.get(User, flask.session.get("username"))
337
licence_ids = flask.request.form.getlist("licence")
338
nature_id = flask.request.form["nature"]
339
340
if author is None:
341
flask.abort(401)
342
343
file = flask.request.files["file"]
344
345
if not file or not file.filename:
346
flask.flash("Select a file")
347
return flask.redirect(flask.request.url)
348
349
if not file.mimetype.startswith("image/"):
350
flask.flash("Only images are supported")
351
return flask.redirect(flask.request.url)
352
353
if not title:
354
flask.flash("Enter a title")
355
return flask.redirect(flask.request.url)
356
357
if not description:
358
description = ""
359
360
if not nature_id:
361
flask.flash("Select a picture type")
362
return flask.redirect(flask.request.url)
363
364
if not licence_ids:
365
flask.flash("Select licences")
366
return flask.redirect(flask.request.url)
367
368
licences = [db.session.get(Licence, licence_id) for licence_id in licence_ids]
369
if not any(licence.free for licence in licences):
370
flask.flash("Select at least one free licence")
371
return flask.redirect(flask.request.url)
372
373
resource = PictureResource(title, author, description, origin_url, licence_ids, file.mimetype,
374
db.session.get(PictureNature, nature_id))
375
db.session.add(resource)
376
db.session.commit()
377
file.save(path.join(config.DATA_PATH, "pictures", str(resource.id)))
378
pil_image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
379
resource.width, resource.height = pil_image.size
380
381
if flask.request.form.get("annotations"):
382
try:
383
resource.put_annotations(json.loads(flask.request.form.get("annotations")))
384
db.session.commit()
385
except json.JSONDecodeError:
386
flask.flash("Invalid annotations")
387
388
flask.flash("Picture uploaded successfully")
389
390
return flask.redirect("/picture/" + str(resource.id))
391
392
393
@app.route("/picture/<int:id>/")
394
def picture(id):
395
resource = db.session.get(PictureResource, id)
396
if resource is None:
397
flask.abort(404)
398
399
image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
400
401
return flask.render_template("picture.html", resource=resource,
402
file_extension=mimetypes.guess_extension(resource.file_format),
403
size=image.size)
404
405
406
407
@app.route("/picture/<int:id>/annotate")
408
def annotate_picture(id):
409
resource = db.session.get(PictureResource, id)
410
if resource is None:
411
flask.abort(404)
412
413
current_user = db.session.get(User, flask.session.get("username"))
414
if current_user is None:
415
flask.abort(401)
416
if resource.author != current_user and not current_user.admin:
417
flask.abort(403)
418
419
return flask.render_template("picture-annotation.html", resource=resource,
420
file_extension=mimetypes.guess_extension(resource.file_format))
421
422
423
@app.route("/picture/<int:id>/put-annotations-form")
424
def put_annotations_form(id):
425
resource = db.session.get(PictureResource, id)
426
if resource is None:
427
flask.abort(404)
428
429
current_user = db.session.get(User, flask.session.get("username"))
430
if current_user is None:
431
flask.abort(401)
432
433
if resource.author != current_user and not current_user.admin:
434
flask.abort(403)
435
436
return flask.render_template("put-annotations-form.html", resource=resource)
437
438
439
@app.route("/picture/<int:id>/put-annotations-form", methods=["POST"])
440
def put_annotations_form_post(id):
441
resource = db.session.get(PictureResource, id)
442
if resource is None:
443
flask.abort(404)
444
445
current_user = db.session.get(User, flask.session.get("username"))
446
if current_user is None:
447
flask.abort(401)
448
449
if resource.author != current_user and not current_user.admin:
450
flask.abort(403)
451
452
resource.put_annotations(json.loads(flask.request.form["annotations"]))
453
454
db.session.commit()
455
456
return flask.redirect("/picture/" + str(resource.id))
457
458
459
460
@app.route("/picture/<int:id>/save-annotations", methods=["POST"])
461
def save_annotations(id):
462
resource = db.session.get(PictureResource, id)
463
if resource is None:
464
flask.abort(404)
465
466
current_user = db.session.get(User, flask.session.get("username"))
467
if resource.author != current_user and not current_user.admin:
468
flask.abort(403)
469
470
resource.put_annotations(flask.request.json)
471
472
db.session.commit()
473
474
response = flask.make_response()
475
response.status_code = 204
476
return response
477
478
479
@app.route("/picture/<int:id>/get-annotations")
480
def get_annotations(id):
481
resource = db.session.get(PictureResource, id)
482
if resource is None:
483
flask.abort(404)
484
485
regions = db.session.query(PictureRegion).filter_by(resource_id=id).all()
486
487
regions_json = []
488
489
for region in regions:
490
regions_json.append({
491
"object": region.object_id,
492
"type": region.json["type"],
493
"shape": region.json["shape"],
494
})
495
496
return flask.jsonify(regions_json)
497
498
499
@app.route("/query-pictures", methods=["POST"]) # sadly GET can't have a body
500
def query_pictures():
501
offset = int(flask.request.args.get("offset", 0))
502
limit = int(flask.request.args.get("limit", 16))
503
ordering = flask.request.args.get("ordering", "date-desc")
504
505
yaml_parser = yaml.YAML()
506
query_data = yaml_parser.load(flask.request.data) or {}
507
508
query = db.session.query(PictureResource)
509
510
requirement_conditions = {
511
"has_object": lambda value: PictureResource.regions.any(
512
PictureRegion.object_id.in_(value)),
513
"nature": lambda value: PictureResource.nature_id.in_(value),
514
"licence": lambda value: PictureResource.licences.any(
515
PictureLicence.licence_id.in_(value)),
516
"author": lambda value: PictureResource.author_name.in_(value),
517
"title": lambda value: PictureResource.title.ilike(value),
518
"description": lambda value: PictureResource.description.ilike(value),
519
"origin_url": lambda value: db.func.lower(db.func.substr(
520
PictureResource.origin_url,
521
db.func.length(db.func.split_part(PictureResource.origin_url, "://", 1)) + 4
522
)).in_(value),
523
"above_width": lambda value: PictureResource.width >= value,
524
"below_width": lambda value: PictureResource.width <= value,
525
"above_height": lambda value: PictureResource.height >= value,
526
"below_height": lambda value: PictureResource.height <= value,
527
"before_date": lambda value: PictureResource.timestamp <= datetime.utcfromtimestamp(
528
value),
529
"after_date": lambda value: PictureResource.timestamp >= datetime.utcfromtimestamp(
530
value)
531
}
532
if "want" in query_data:
533
for i in query_data["want"]:
534
requirement, value = list(i.items())[0]
535
condition = requirement_conditions.get(requirement)
536
if condition:
537
query = query.filter(condition(value))
538
if "exclude" in query_data:
539
for i in query_data["exclude"]:
540
requirement, value = list(i.items())[0]
541
condition = requirement_conditions.get(requirement)
542
if condition:
543
query = query.filter(~condition(value))
544
if not query_data.get("include_obsolete", False):
545
query = query.filter(PictureResource.replaced_by_id.is_(None))
546
547
match ordering:
548
case "date-desc":
549
query = query.order_by(PictureResource.timestamp.desc())
550
case "date-asc":
551
query = query.order_by(PictureResource.timestamp.asc())
552
case "title-asc":
553
query = query.order_by(PictureResource.title.asc())
554
case "title-desc":
555
query = query.order_by(PictureResource.title.desc())
556
case "random":
557
query = query.order_by(db.func.random())
558
case "number-regions-desc":
559
query = query.order_by(db.func.count(PictureResource.regions).desc())
560
case "number-regions-asc":
561
query = query.order_by(db.func.count(PictureResource.regions).asc())
562
563
query = query.offset(offset).limit(limit)
564
resources = query.all()
565
566
json_response = {
567
"date_generated": datetime.utcnow().timestamp(),
568
"resources": [],
569
"offset": offset,
570
"limit": limit,
571
}
572
573
json_resources = json_response["resources"]
574
575
for resource in resources:
576
json_resource = {
577
"id": resource.id,
578
"title": resource.title,
579
"description": resource.description,
580
"timestamp": resource.timestamp.timestamp(),
581
"origin_url": resource.origin_url,
582
"author": resource.author_name,
583
"file_format": resource.file_format,
584
"width": resource.width,
585
"height": resource.height,
586
"nature": resource.nature_id,
587
"licences": [licence.licence_id for licence in resource.licences],
588
"replaces": resource.replaces_id,
589
"replaced_by": resource.replaced_by_id,
590
"regions": [],
591
}
592
for region in resource.regions:
593
json_resource["regions"].append({
594
"object": region.object_id,
595
"type": region.json["type"],
596
"shape": region.json["shape"],
597
})
598
599
json_resources.append(json_resource)
600
601
response = flask.jsonify(json_response)
602
response.headers["Content-Type"] = "application/json"
603
return response
604
605
606
@app.route("/raw/picture/<int:id>")
607
def raw_picture(id):
608
resource = db.session.get(PictureResource, id)
609
if resource is None:
610
flask.abort(404)
611
612
response = flask.send_from_directory(path.join(config.DATA_PATH, "pictures"), str(resource.id))
613
response.mimetype = resource.file_format
614
615
return response
616
617
618
@app.route("/api/object-types")
619
def object_types():
620
objects = db.session.query(PictureObject).all()
621
return flask.jsonify({object.id: object.description for object in objects})
622