Web platform for sharing free data for ML and research

By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 app.py

View raw Download
text/x-script.python • 23.31 kiB
Python script, ASCII text executable
        
            
1
import json
2
from datetime import datetime
3
from email.policy import default
4
from time import perf_counter
5
6
import flask
7
from flask_sqlalchemy import SQLAlchemy
8
from flask_bcrypt import Bcrypt
9
from flask_httpauth import HTTPBasicAuth
10
from markupsafe import escape, Markup
11
from flask_migrate import Migrate, current
12
from jinja2_fragments.flask import render_block
13
from sqlalchemy.orm import backref
14
import sqlalchemy.dialects.postgresql
15
from os import path
16
from urllib.parse import urlencode
17
import mimetypes
18
import ruamel.yaml as yaml
19
20
from PIL import Image
21
22
import config
23
import markdown
24
25
26
app = flask.Flask(__name__)
27
bcrypt = Bcrypt(app)
28
29
30
app.config["SQLALCHEMY_DATABASE_URI"] = config.DB_URI
31
app.config["SECRET_KEY"] = config.DB_PASSWORD
32
33
34
db = SQLAlchemy(app)
35
migrate = Migrate(app, db)
36
37
38
@app.template_filter("split")
39
def split(value, separator=None, maxsplit=-1):
40
return value.split(separator, maxsplit)
41
42
43
@app.template_filter("median")
44
def median(value):
45
value = list(value) # prevent generators
46
return sorted(value)[len(value) // 2]
47
48
49
@app.template_filter("set")
50
def set_filter(value):
51
return set(value)
52
53
54
@app.template_global()
55
def modify_query(**new_values):
56
args = flask.request.args.copy()
57
for key, value in new_values.items():
58
args[key] = value
59
60
return f"{flask.request.path}?{urlencode(args)}"
61
62
63
@app.context_processor
64
def default_variables():
65
return {
66
"current_user": db.session.get(User, flask.session.get("username")),
67
}
68
69
70
with app.app_context():
71
class User(db.Model):
72
username = db.Column(db.String(32), unique=True, nullable=False, primary_key=True)
73
password_hashed = db.Column(db.String(60), nullable=False)
74
admin = db.Column(db.Boolean, nullable=False, default=False, server_default="false")
75
pictures = db.relationship("PictureResource", back_populates="author")
76
77
def __init__(self, username, password):
78
self.username = username
79
self.password_hashed = bcrypt.generate_password_hash(password).decode("utf-8")
80
81
82
class Licence(db.Model):
83
id = db.Column(db.String(64), primary_key=True) # SPDX identifier
84
title = db.Column(db.UnicodeText, nullable=False) # the official name of the licence
85
description = db.Column(db.UnicodeText, nullable=False) # brief description of its permissions and restrictions
86
info_url = db.Column(db.String(1024), nullable=False) # the URL to a page with general information about the licence
87
url = db.Column(db.String(1024), nullable=True) # the URL to a page with the full text of the licence and more information
88
pictures = db.relationship("PictureLicence", back_populates="licence")
89
free = db.Column(db.Boolean, nullable=False, default=False) # whether the licence is free or not
90
logo_url = db.Column(db.String(1024), nullable=True) # URL to the logo of the licence
91
pinned = db.Column(db.Boolean, nullable=False, default=False) # whether the licence should be shown at the top of the list
92
93
def __init__(self, id, title, description, info_url, url, free, logo_url=None, pinned=False):
94
self.id = id
95
self.title = title
96
self.description = description
97
self.info_url = info_url
98
self.url = url
99
self.free = free
100
self.logo_url = logo_url
101
self.pinned = pinned
102
103
104
class PictureLicence(db.Model):
105
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
106
107
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"))
108
licence_id = db.Column(db.String(64), db.ForeignKey("licence.id"))
109
110
resource = db.relationship("PictureResource", back_populates="licences")
111
licence = db.relationship("Licence", back_populates="pictures")
112
113
def __init__(self, resource_id, licence_id):
114
self.resource_id = resource_id
115
self.licence_id = licence_id
116
117
118
class Resource(db.Model):
119
__abstract__ = True
120
121
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
122
title = db.Column(db.UnicodeText, nullable=False)
123
description = db.Column(db.UnicodeText, nullable=False)
124
timestamp = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
125
origin_url = db.Column(db.String(2048), nullable=True) # should be left empty if it's original or the source is unknown but public domain
126
127
128
class PictureNature(db.Model):
129
# Examples:
130
# "photo", "paper-scan", "2d-art-photo", "sculpture-photo", "computer-3d", "computer-painting",
131
# "computer-line-art", "diagram", "infographic", "text", "map", "chart-graph", "screen-capture",
132
# "screen-photo", "pattern", "collage", "ai", and so on
133
id = db.Column(db.String(64), primary_key=True)
134
description = db.Column(db.UnicodeText, nullable=False)
135
resources = db.relationship("PictureResource", back_populates="nature")
136
137
def __init__(self, id, description):
138
self.id = id
139
self.description = description
140
141
142
class PictureObjectInheritance(db.Model):
143
parent_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
144
primary_key=True)
145
child_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
146
primary_key=True)
147
148
parent = db.relationship("PictureObject", foreign_keys=[parent_id],
149
back_populates="child_links")
150
child = db.relationship("PictureObject", foreign_keys=[child_id],
151
back_populates="parent_links")
152
153
def __init__(self, parent, child):
154
self.parent = parent
155
self.child = child
156
157
158
class PictureObject(db.Model):
159
id = db.Column(db.String(64), primary_key=True)
160
description = db.Column(db.UnicodeText, nullable=False)
161
162
child_links = db.relationship("PictureObjectInheritance",
163
foreign_keys=[PictureObjectInheritance.parent_id],
164
back_populates="parent")
165
parent_links = db.relationship("PictureObjectInheritance",
166
foreign_keys=[PictureObjectInheritance.child_id],
167
back_populates="child")
168
169
def __init__(self, id, description):
170
self.id = id
171
self.description = description
172
173
174
class PictureRegion(db.Model):
175
# This is for picture region annotations
176
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
177
json = db.Column(sqlalchemy.dialects.postgresql.JSONB, nullable=False)
178
179
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=False)
180
object_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"), nullable=True)
181
182
resource = db.relationship("PictureResource", backref="regions")
183
object = db.relationship("PictureObject", backref="regions")
184
185
def __init__(self, json, resource, object):
186
self.json = json
187
self.resource = resource
188
self.object = object
189
190
191
class PictureResource(Resource):
192
# This is only for bitmap pictures. Vectors will be stored under a different model
193
# File name is the ID in the picture directory under data, without an extension
194
file_format = db.Column(db.String(64), nullable=False) # MIME type
195
width = db.Column(db.Integer, nullable=False)
196
height = db.Column(db.Integer, nullable=False)
197
nature_id = db.Column(db.String(32), db.ForeignKey("picture_nature.id"), nullable=True)
198
author_name = db.Column(db.String(32), db.ForeignKey("user.username"), nullable=False)
199
author = db.relationship("User", back_populates="pictures")
200
201
nature = db.relationship("PictureNature", back_populates="resources")
202
203
replaces_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=True)
204
replaced_by_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"),
205
nullable=True)
206
207
replaces = db.relationship("PictureResource", remote_side="PictureResource.id",
208
foreign_keys=[replaces_id], back_populates="replaced_by")
209
replaced_by = db.relationship("PictureResource", remote_side="PictureResource.id",
210
foreign_keys=[replaced_by_id])
211
212
licences = db.relationship("PictureLicence", back_populates="resource")
213
214
def __init__(self, title, author, description, origin_url, licence_ids, mime, nature=None,
215
replaces=None):
216
self.title = title
217
self.author = author
218
self.description = description
219
self.origin_url = origin_url
220
self.file_format = mime
221
self.width = self.height = 0
222
self.nature = nature
223
db.session.add(self)
224
db.session.commit()
225
for licence_id in licence_ids:
226
joiner = PictureLicence(self.id, licence_id)
227
db.session.add(joiner)
228
if replaces is not None:
229
self.replaces = replaces
230
replaces.replaced_by = self
231
232
def put_annotations(self, json):
233
# Delete all previous annotations
234
db.session.query(PictureRegion).filter_by(resource_id=self.id).delete()
235
236
for region in json:
237
object_id = region["object"]
238
picture_object = db.session.get(PictureObject, object_id)
239
240
region_data = {
241
"type": region["type"],
242
"shape": region["shape"],
243
}
244
245
region_row = PictureRegion(region_data, self, picture_object)
246
db.session.add(region_row)
247
248
249
@app.route("/")
250
def index():
251
return flask.render_template("home.html")
252
253
254
@app.route("/accounts/")
255
def accounts():
256
return flask.render_template("login.html")
257
258
259
@app.route("/login", methods=["POST"])
260
def login():
261
username = flask.request.form["username"]
262
password = flask.request.form["password"]
263
264
user = db.session.get(User, username)
265
266
if user is None:
267
flask.flash("This username is not registered.")
268
return flask.redirect("/accounts")
269
270
if not bcrypt.check_password_hash(user.password_hashed, password):
271
flask.flash("Incorrect password.")
272
return flask.redirect("/accounts")
273
274
flask.flash("You have been logged in.")
275
276
flask.session["username"] = username
277
return flask.redirect("/")
278
279
280
@app.route("/logout")
281
def logout():
282
flask.session.pop("username", None)
283
flask.flash("You have been logged out.")
284
return flask.redirect("/")
285
286
287
@app.route("/signup", methods=["POST"])
288
def signup():
289
username = flask.request.form["username"]
290
password = flask.request.form["password"]
291
292
if db.session.get(User, username) is not None:
293
flask.flash("This username is already taken.")
294
return flask.redirect("/accounts")
295
296
if set(username) > set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"):
297
flask.flash("Usernames can only contain the Latin alphabet, digits, hyphens, and underscores.")
298
return flask.redirect("/accounts")
299
300
if len(username) < 3 or len(username) > 32:
301
flask.flash("Usernames must be between 3 and 32 characters long.")
302
return flask.redirect("/accounts")
303
304
if len(password) < 6:
305
flask.flash("Passwords must be at least 6 characters long.")
306
return flask.redirect("/accounts")
307
308
user = User(username, password)
309
db.session.add(user)
310
db.session.commit()
311
312
flask.session["username"] = username
313
314
flask.flash("You have been registered and logged in.")
315
316
return flask.redirect("/")
317
318
319
@app.route("/profile", defaults={"username": None})
320
@app.route("/profile/<username>")
321
def profile(username):
322
if username is None:
323
if "username" in flask.session:
324
return flask.redirect("/profile/" + flask.session["username"])
325
else:
326
flask.flash("Please log in to perform this action.")
327
return flask.redirect("/accounts")
328
329
user = db.session.get(User, username)
330
if user is None:
331
flask.abort(404)
332
333
return flask.render_template("profile.html", user=user)
334
335
336
@app.route("/object/<id>")
337
def has_object(id):
338
object_ = db.session.get(PictureObject, id)
339
if object_ is None:
340
flask.abort(404)
341
342
query = db.session.query(PictureResource).join(PictureRegion).filter(PictureRegion.object_id == id)
343
344
page = int(flask.request.args.get("page", 1))
345
per_page = int(flask.request.args.get("per_page", 16))
346
347
resources = query.paginate(page=page, per_page=per_page)
348
349
return flask.render_template("object.html", object=object_, resources=resources, page_number=page,
350
page_length=per_page, num_pages=resources.pages, prev_page=resources.prev_num,
351
next_page=resources.next_num)
352
353
354
@app.route("/upload")
355
def upload():
356
if "username" not in flask.session:
357
flask.flash("Log in to upload pictures.")
358
return flask.redirect("/accounts")
359
360
licences = Licence.query.order_by(Licence.free.desc(), Licence.pinned.desc(), Licence.title).all()
361
362
types = PictureNature.query.all()
363
364
return flask.render_template("upload.html", licences=licences, types=types)
365
366
367
@app.route("/upload", methods=["POST"])
368
def upload_post():
369
title = flask.request.form["title"]
370
description = flask.request.form["description"]
371
origin_url = flask.request.form["origin_url"]
372
author = db.session.get(User, flask.session.get("username"))
373
licence_ids = flask.request.form.getlist("licence")
374
nature_id = flask.request.form["nature"]
375
376
if author is None:
377
flask.abort(401)
378
379
file = flask.request.files["file"]
380
381
if not file or not file.filename:
382
flask.flash("Select a file")
383
return flask.redirect(flask.request.url)
384
385
if not file.mimetype.startswith("image/"):
386
flask.flash("Only images are supported")
387
return flask.redirect(flask.request.url)
388
389
if not title:
390
flask.flash("Enter a title")
391
return flask.redirect(flask.request.url)
392
393
if not description:
394
description = ""
395
396
if not nature_id:
397
flask.flash("Select a picture type")
398
return flask.redirect(flask.request.url)
399
400
if not licence_ids:
401
flask.flash("Select licences")
402
return flask.redirect(flask.request.url)
403
404
licences = [db.session.get(Licence, licence_id) for licence_id in licence_ids]
405
if not any(licence.free for licence in licences):
406
flask.flash("Select at least one free licence")
407
return flask.redirect(flask.request.url)
408
409
resource = PictureResource(title, author, description, origin_url, licence_ids, file.mimetype,
410
db.session.get(PictureNature, nature_id))
411
db.session.add(resource)
412
db.session.commit()
413
file.save(path.join(config.DATA_PATH, "pictures", str(resource.id)))
414
pil_image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
415
resource.width, resource.height = pil_image.size
416
417
if flask.request.form.get("annotations"):
418
try:
419
resource.put_annotations(json.loads(flask.request.form.get("annotations")))
420
db.session.commit()
421
except json.JSONDecodeError:
422
flask.flash("Invalid annotations")
423
424
flask.flash("Picture uploaded successfully")
425
426
return flask.redirect("/picture/" + str(resource.id))
427
428
429
@app.route("/picture/<int:id>/")
430
def picture(id):
431
resource = db.session.get(PictureResource, id)
432
if resource is None:
433
flask.abort(404)
434
435
image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
436
437
return flask.render_template("picture.html", resource=resource,
438
file_extension=mimetypes.guess_extension(resource.file_format),
439
size=image.size)
440
441
442
443
@app.route("/picture/<int:id>/annotate")
444
def annotate_picture(id):
445
resource = db.session.get(PictureResource, id)
446
if resource is None:
447
flask.abort(404)
448
449
current_user = db.session.get(User, flask.session.get("username"))
450
if current_user is None:
451
flask.abort(401)
452
if resource.author != current_user and not current_user.admin:
453
flask.abort(403)
454
455
return flask.render_template("picture-annotation.html", resource=resource,
456
file_extension=mimetypes.guess_extension(resource.file_format))
457
458
459
@app.route("/picture/<int:id>/put-annotations-form")
460
def put_annotations_form(id):
461
resource = db.session.get(PictureResource, id)
462
if resource is None:
463
flask.abort(404)
464
465
current_user = db.session.get(User, flask.session.get("username"))
466
if current_user is None:
467
flask.abort(401)
468
469
if resource.author != current_user and not current_user.admin:
470
flask.abort(403)
471
472
return flask.render_template("put-annotations-form.html", resource=resource)
473
474
475
@app.route("/picture/<int:id>/put-annotations-form", methods=["POST"])
476
def put_annotations_form_post(id):
477
resource = db.session.get(PictureResource, id)
478
if resource is None:
479
flask.abort(404)
480
481
current_user = db.session.get(User, flask.session.get("username"))
482
if current_user is None:
483
flask.abort(401)
484
485
if resource.author != current_user and not current_user.admin:
486
flask.abort(403)
487
488
resource.put_annotations(json.loads(flask.request.form["annotations"]))
489
490
db.session.commit()
491
492
return flask.redirect("/picture/" + str(resource.id))
493
494
495
496
@app.route("/picture/<int:id>/save-annotations", methods=["POST"])
497
def save_annotations(id):
498
resource = db.session.get(PictureResource, id)
499
if resource is None:
500
flask.abort(404)
501
502
current_user = db.session.get(User, flask.session.get("username"))
503
if resource.author != current_user and not current_user.admin:
504
flask.abort(403)
505
506
resource.put_annotations(flask.request.json)
507
508
db.session.commit()
509
510
response = flask.make_response()
511
response.status_code = 204
512
return response
513
514
515
@app.route("/picture/<int:id>/get-annotations")
516
def get_annotations(id):
517
resource = db.session.get(PictureResource, id)
518
if resource is None:
519
flask.abort(404)
520
521
regions = db.session.query(PictureRegion).filter_by(resource_id=id).all()
522
523
regions_json = []
524
525
for region in regions:
526
regions_json.append({
527
"object": region.object_id,
528
"type": region.json["type"],
529
"shape": region.json["shape"],
530
})
531
532
return flask.jsonify(regions_json)
533
534
535
@app.route("/query-pictures", methods=["POST"]) # sadly GET can't have a body
536
def query_pictures():
537
offset = int(flask.request.args.get("offset", 0))
538
limit = int(flask.request.args.get("limit", 16))
539
ordering = flask.request.args.get("ordering", "date-desc")
540
541
yaml_parser = yaml.YAML()
542
query_data = yaml_parser.load(flask.request.data) or {}
543
544
query = db.session.query(PictureResource)
545
546
requirement_conditions = {
547
"has_object": lambda value: PictureResource.regions.any(
548
PictureRegion.object_id.in_(value)),
549
"nature": lambda value: PictureResource.nature_id.in_(value),
550
"licence": lambda value: PictureResource.licences.any(
551
PictureLicence.licence_id.in_(value)),
552
"author": lambda value: PictureResource.author_name.in_(value),
553
"title": lambda value: PictureResource.title.ilike(value),
554
"description": lambda value: PictureResource.description.ilike(value),
555
"origin_url": lambda value: db.func.lower(db.func.substr(
556
PictureResource.origin_url,
557
db.func.length(db.func.split_part(PictureResource.origin_url, "://", 1)) + 4
558
)).in_(value),
559
"above_width": lambda value: PictureResource.width >= value,
560
"below_width": lambda value: PictureResource.width <= value,
561
"above_height": lambda value: PictureResource.height >= value,
562
"below_height": lambda value: PictureResource.height <= value,
563
"before_date": lambda value: PictureResource.timestamp <= datetime.utcfromtimestamp(
564
value),
565
"after_date": lambda value: PictureResource.timestamp >= datetime.utcfromtimestamp(
566
value)
567
}
568
if "want" in query_data:
569
for i in query_data["want"]:
570
requirement, value = list(i.items())[0]
571
condition = requirement_conditions.get(requirement)
572
if condition:
573
query = query.filter(condition(value))
574
if "exclude" in query_data:
575
for i in query_data["exclude"]:
576
requirement, value = list(i.items())[0]
577
condition = requirement_conditions.get(requirement)
578
if condition:
579
query = query.filter(~condition(value))
580
if not query_data.get("include_obsolete", False):
581
query = query.filter(PictureResource.replaced_by_id.is_(None))
582
583
match ordering:
584
case "date-desc":
585
query = query.order_by(PictureResource.timestamp.desc())
586
case "date-asc":
587
query = query.order_by(PictureResource.timestamp.asc())
588
case "title-asc":
589
query = query.order_by(PictureResource.title.asc())
590
case "title-desc":
591
query = query.order_by(PictureResource.title.desc())
592
case "random":
593
query = query.order_by(db.func.random())
594
case "number-regions-desc":
595
query = query.order_by(db.func.count(PictureResource.regions).desc())
596
case "number-regions-asc":
597
query = query.order_by(db.func.count(PictureResource.regions).asc())
598
599
query = query.offset(offset).limit(limit)
600
resources = query.all()
601
602
json_response = {
603
"date_generated": datetime.utcnow().timestamp(),
604
"resources": [],
605
"offset": offset,
606
"limit": limit,
607
}
608
609
json_resources = json_response["resources"]
610
611
for resource in resources:
612
json_resource = {
613
"id": resource.id,
614
"title": resource.title,
615
"description": resource.description,
616
"timestamp": resource.timestamp.timestamp(),
617
"origin_url": resource.origin_url,
618
"author": resource.author_name,
619
"file_format": resource.file_format,
620
"width": resource.width,
621
"height": resource.height,
622
"nature": resource.nature_id,
623
"licences": [licence.licence_id for licence in resource.licences],
624
"replaces": resource.replaces_id,
625
"replaced_by": resource.replaced_by_id,
626
"regions": [],
627
}
628
for region in resource.regions:
629
json_resource["regions"].append({
630
"object": region.object_id,
631
"type": region.json["type"],
632
"shape": region.json["shape"],
633
})
634
635
json_resources.append(json_resource)
636
637
response = flask.jsonify(json_response)
638
response.headers["Content-Type"] = "application/json"
639
return response
640
641
642
@app.route("/raw/picture/<int:id>")
643
def raw_picture(id):
644
resource = db.session.get(PictureResource, id)
645
if resource is None:
646
flask.abort(404)
647
648
response = flask.send_from_directory(path.join(config.DATA_PATH, "pictures"), str(resource.id))
649
response.mimetype = resource.file_format
650
651
return response
652
653
654
@app.route("/api/object-types")
655
def object_types():
656
objects = db.session.query(PictureObject).all()
657
return flask.jsonify({object.id: object.description for object in objects})
658