Web platform for sharing free data for ML and research

By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 app.py

View raw Download
text/x-script.python • 24.1 kiB
Python script, ASCII text executable
        
            
1
import json
2
from datetime import datetime
3
from email.policy import default
4
from time import perf_counter
5
6
import flask
7
from flask_sqlalchemy import SQLAlchemy
8
from flask_bcrypt import Bcrypt
9
from flask_httpauth import HTTPBasicAuth
10
from markupsafe import escape, Markup
11
from flask_migrate import Migrate, current
12
from jinja2_fragments.flask import render_block
13
from sqlalchemy.orm import backref
14
import sqlalchemy.dialects.postgresql
15
from os import path
16
from urllib.parse import urlencode
17
import mimetypes
18
import ruamel.yaml as yaml
19
20
from PIL import Image
21
22
import config
23
import markdown
24
25
26
app = flask.Flask(__name__)
27
bcrypt = Bcrypt(app)
28
29
30
app.config["SQLALCHEMY_DATABASE_URI"] = config.DB_URI
31
app.config["SECRET_KEY"] = config.DB_PASSWORD
32
33
34
db = SQLAlchemy(app)
35
migrate = Migrate(app, db)
36
37
38
@app.template_filter("split")
39
def split(value, separator=None, maxsplit=-1):
40
return value.split(separator, maxsplit)
41
42
43
@app.template_filter("median")
44
def median(value):
45
value = list(value) # prevent generators
46
return sorted(value)[len(value) // 2]
47
48
49
@app.template_filter("set")
50
def set_filter(value):
51
return set(value)
52
53
54
@app.template_global()
55
def modify_query(**new_values):
56
args = flask.request.args.copy()
57
for key, value in new_values.items():
58
args[key] = value
59
60
return f"{flask.request.path}?{urlencode(args)}"
61
62
63
@app.context_processor
64
def default_variables():
65
return {
66
"current_user": db.session.get(User, flask.session.get("username")),
67
}
68
69
70
with app.app_context():
71
class User(db.Model):
72
username = db.Column(db.String(32), unique=True, nullable=False, primary_key=True)
73
password_hashed = db.Column(db.String(60), nullable=False)
74
admin = db.Column(db.Boolean, nullable=False, default=False, server_default="false")
75
pictures = db.relationship("PictureResource", back_populates="author")
76
77
def __init__(self, username, password):
78
self.username = username
79
self.password_hashed = bcrypt.generate_password_hash(password).decode("utf-8")
80
81
82
class Licence(db.Model):
83
id = db.Column(db.String(64), primary_key=True) # SPDX identifier
84
title = db.Column(db.UnicodeText, nullable=False) # the official name of the licence
85
description = db.Column(db.UnicodeText, nullable=False) # brief description of its permissions and restrictions
86
info_url = db.Column(db.String(1024), nullable=False) # the URL to a page with general information about the licence
87
url = db.Column(db.String(1024), nullable=True) # the URL to a page with the full text of the licence and more information
88
pictures = db.relationship("PictureLicence", back_populates="licence")
89
free = db.Column(db.Boolean, nullable=False, default=False) # whether the licence is free or not
90
logo_url = db.Column(db.String(1024), nullable=True) # URL to the logo of the licence
91
pinned = db.Column(db.Boolean, nullable=False, default=False) # whether the licence should be shown at the top of the list
92
93
def __init__(self, id, title, description, info_url, url, free, logo_url=None, pinned=False):
94
self.id = id
95
self.title = title
96
self.description = description
97
self.info_url = info_url
98
self.url = url
99
self.free = free
100
self.logo_url = logo_url
101
self.pinned = pinned
102
103
104
class PictureLicence(db.Model):
105
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
106
107
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"))
108
licence_id = db.Column(db.String(64), db.ForeignKey("licence.id"))
109
110
resource = db.relationship("PictureResource", back_populates="licences")
111
licence = db.relationship("Licence", back_populates="pictures")
112
113
def __init__(self, resource_id, licence_id):
114
self.resource_id = resource_id
115
self.licence_id = licence_id
116
117
118
class Resource(db.Model):
119
__abstract__ = True
120
121
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
122
title = db.Column(db.UnicodeText, nullable=False)
123
description = db.Column(db.UnicodeText, nullable=False)
124
timestamp = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
125
origin_url = db.Column(db.String(2048), nullable=True) # should be left empty if it's original or the source is unknown but public domain
126
127
128
class PictureNature(db.Model):
129
# Examples:
130
# "photo", "paper-scan", "2d-art-photo", "sculpture-photo", "computer-3d", "computer-painting",
131
# "computer-line-art", "diagram", "infographic", "text", "map", "chart-graph", "screen-capture",
132
# "screen-photo", "pattern", "collage", "ai", and so on
133
id = db.Column(db.String(64), primary_key=True)
134
description = db.Column(db.UnicodeText, nullable=False)
135
resources = db.relationship("PictureResource", back_populates="nature")
136
137
def __init__(self, id, description):
138
self.id = id
139
self.description = description
140
141
142
class PictureObjectInheritance(db.Model):
143
parent_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
144
primary_key=True)
145
child_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
146
primary_key=True)
147
148
parent = db.relationship("PictureObject", foreign_keys=[parent_id],
149
back_populates="child_links")
150
child = db.relationship("PictureObject", foreign_keys=[child_id],
151
back_populates="parent_links")
152
153
def __init__(self, parent, child):
154
self.parent = parent
155
self.child = child
156
157
158
class PictureObject(db.Model):
159
id = db.Column(db.String(64), primary_key=True)
160
description = db.Column(db.UnicodeText, nullable=False)
161
162
child_links = db.relationship("PictureObjectInheritance",
163
foreign_keys=[PictureObjectInheritance.parent_id],
164
back_populates="parent")
165
parent_links = db.relationship("PictureObjectInheritance",
166
foreign_keys=[PictureObjectInheritance.child_id],
167
back_populates="child")
168
169
def __init__(self, id, description):
170
self.id = id
171
self.description = description
172
173
174
class PictureRegion(db.Model):
175
# This is for picture region annotations
176
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
177
json = db.Column(sqlalchemy.dialects.postgresql.JSONB, nullable=False)
178
179
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=False)
180
object_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"), nullable=True)
181
182
resource = db.relationship("PictureResource", backref="regions")
183
object = db.relationship("PictureObject", backref="regions")
184
185
def __init__(self, json, resource, object):
186
self.json = json
187
self.resource = resource
188
self.object = object
189
190
191
class PictureResource(Resource):
192
# This is only for bitmap pictures. Vectors will be stored under a different model
193
# File name is the ID in the picture directory under data, without an extension
194
file_format = db.Column(db.String(64), nullable=False) # MIME type
195
width = db.Column(db.Integer, nullable=False)
196
height = db.Column(db.Integer, nullable=False)
197
nature_id = db.Column(db.String(32), db.ForeignKey("picture_nature.id"), nullable=True)
198
author_name = db.Column(db.String(32), db.ForeignKey("user.username"), nullable=False)
199
author = db.relationship("User", back_populates="pictures")
200
201
nature = db.relationship("PictureNature", back_populates="resources")
202
203
replaces_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=True)
204
replaced_by_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"),
205
nullable=True)
206
207
replaces = db.relationship("PictureResource", remote_side="PictureResource.id",
208
foreign_keys=[replaces_id], back_populates="replaced_by")
209
replaced_by = db.relationship("PictureResource", remote_side="PictureResource.id",
210
foreign_keys=[replaced_by_id])
211
212
licences = db.relationship("PictureLicence", back_populates="resource")
213
214
def __init__(self, title, author, description, origin_url, licence_ids, mime, nature=None,
215
replaces=None):
216
self.title = title
217
self.author = author
218
self.description = description
219
self.origin_url = origin_url
220
self.file_format = mime
221
self.width = self.height = 0
222
self.nature = nature
223
db.session.add(self)
224
db.session.commit()
225
for licence_id in licence_ids:
226
joiner = PictureLicence(self.id, licence_id)
227
db.session.add(joiner)
228
if replaces is not None:
229
self.replaces = replaces
230
replaces.replaced_by = self
231
232
def put_annotations(self, json):
233
# Delete all previous annotations
234
db.session.query(PictureRegion).filter_by(resource_id=self.id).delete()
235
236
for region in json:
237
object_id = region["object"]
238
picture_object = db.session.get(PictureObject, object_id)
239
240
region_data = {
241
"type": region["type"],
242
"shape": region["shape"],
243
}
244
245
region_row = PictureRegion(region_data, self, picture_object)
246
db.session.add(region_row)
247
248
249
@app.route("/")
250
def index():
251
return flask.render_template("home.html", resources=PictureResource.query.order_by(db.func.random()).limit(10).all())
252
253
254
@app.route("/accounts/")
255
def accounts():
256
return flask.render_template("login.html")
257
258
259
@app.route("/login", methods=["POST"])
260
def login():
261
username = flask.request.form["username"]
262
password = flask.request.form["password"]
263
264
user = db.session.get(User, username)
265
266
if user is None:
267
flask.flash("This username is not registered.")
268
return flask.redirect("/accounts")
269
270
if not bcrypt.check_password_hash(user.password_hashed, password):
271
flask.flash("Incorrect password.")
272
return flask.redirect("/accounts")
273
274
flask.flash("You have been logged in.")
275
276
flask.session["username"] = username
277
return flask.redirect("/")
278
279
280
@app.route("/logout")
281
def logout():
282
flask.session.pop("username", None)
283
flask.flash("You have been logged out.")
284
return flask.redirect("/")
285
286
287
@app.route("/signup", methods=["POST"])
288
def signup():
289
username = flask.request.form["username"]
290
password = flask.request.form["password"]
291
292
if db.session.get(User, username) is not None:
293
flask.flash("This username is already taken.")
294
return flask.redirect("/accounts")
295
296
if set(username) > set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"):
297
flask.flash("Usernames can only contain the Latin alphabet, digits, hyphens, and underscores.")
298
return flask.redirect("/accounts")
299
300
if len(username) < 3 or len(username) > 32:
301
flask.flash("Usernames must be between 3 and 32 characters long.")
302
return flask.redirect("/accounts")
303
304
if len(password) < 6:
305
flask.flash("Passwords must be at least 6 characters long.")
306
return flask.redirect("/accounts")
307
308
user = User(username, password)
309
db.session.add(user)
310
db.session.commit()
311
312
flask.session["username"] = username
313
314
flask.flash("You have been registered and logged in.")
315
316
return flask.redirect("/")
317
318
319
@app.route("/profile", defaults={"username": None})
320
@app.route("/profile/<username>")
321
def profile(username):
322
if username is None:
323
if "username" in flask.session:
324
return flask.redirect("/profile/" + flask.session["username"])
325
else:
326
flask.flash("Please log in to perform this action.")
327
return flask.redirect("/accounts")
328
329
user = db.session.get(User, username)
330
if user is None:
331
flask.abort(404)
332
333
return flask.render_template("profile.html", user=user)
334
335
336
@app.route("/object/<id>")
337
def has_object(id):
338
object_ = db.session.get(PictureObject, id)
339
if object_ is None:
340
flask.abort(404)
341
342
query = db.session.query(PictureResource).join(PictureRegion).filter(PictureRegion.object_id == id)
343
344
page = int(flask.request.args.get("page", 1))
345
per_page = int(flask.request.args.get("per_page", 16))
346
347
resources = query.paginate(page=page, per_page=per_page)
348
349
return flask.render_template("object.html", object=object_, resources=resources, page_number=page,
350
page_length=per_page, num_pages=resources.pages, prev_page=resources.prev_num,
351
next_page=resources.next_num, PictureRegion=PictureRegion)
352
353
354
@app.route("/upload")
355
def upload():
356
if "username" not in flask.session:
357
flask.flash("Log in to upload pictures.")
358
return flask.redirect("/accounts")
359
360
licences = Licence.query.order_by(Licence.free.desc(), Licence.pinned.desc(), Licence.title).all()
361
362
types = PictureNature.query.all()
363
364
return flask.render_template("upload.html", licences=licences, types=types)
365
366
367
@app.route("/upload", methods=["POST"])
368
def upload_post():
369
title = flask.request.form["title"]
370
description = flask.request.form["description"]
371
origin_url = flask.request.form["origin_url"]
372
author = db.session.get(User, flask.session.get("username"))
373
licence_ids = flask.request.form.getlist("licence")
374
nature_id = flask.request.form["nature"]
375
376
if author is None:
377
flask.abort(401)
378
379
file = flask.request.files["file"]
380
381
if not file or not file.filename:
382
flask.flash("Select a file")
383
return flask.redirect(flask.request.url)
384
385
if not file.mimetype.startswith("image/"):
386
flask.flash("Only images are supported")
387
return flask.redirect(flask.request.url)
388
389
if not title:
390
flask.flash("Enter a title")
391
return flask.redirect(flask.request.url)
392
393
if not description:
394
description = ""
395
396
if not nature_id:
397
flask.flash("Select a picture type")
398
return flask.redirect(flask.request.url)
399
400
if not licence_ids:
401
flask.flash("Select licences")
402
return flask.redirect(flask.request.url)
403
404
licences = [db.session.get(Licence, licence_id) for licence_id in licence_ids]
405
if not any(licence.free for licence in licences):
406
flask.flash("Select at least one free licence")
407
return flask.redirect(flask.request.url)
408
409
resource = PictureResource(title, author, description, origin_url, licence_ids, file.mimetype,
410
db.session.get(PictureNature, nature_id))
411
db.session.add(resource)
412
db.session.commit()
413
file.save(path.join(config.DATA_PATH, "pictures", str(resource.id)))
414
pil_image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
415
resource.width, resource.height = pil_image.size
416
db.session.commit()
417
418
if flask.request.form.get("annotations"):
419
try:
420
resource.put_annotations(json.loads(flask.request.form.get("annotations")))
421
db.session.commit()
422
except json.JSONDecodeError:
423
flask.flash("Invalid annotations")
424
425
flask.flash("Picture uploaded successfully")
426
427
return flask.redirect("/picture/" + str(resource.id))
428
429
430
@app.route("/picture/<int:id>/")
431
def picture(id):
432
resource = db.session.get(PictureResource, id)
433
if resource is None:
434
flask.abort(404)
435
436
image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
437
438
return flask.render_template("picture.html", resource=resource,
439
file_extension=mimetypes.guess_extension(resource.file_format),
440
size=image.size)
441
442
443
444
@app.route("/picture/<int:id>/annotate")
445
def annotate_picture(id):
446
resource = db.session.get(PictureResource, id)
447
if resource is None:
448
flask.abort(404)
449
450
current_user = db.session.get(User, flask.session.get("username"))
451
if current_user is None:
452
flask.abort(401)
453
if resource.author != current_user and not current_user.admin:
454
flask.abort(403)
455
456
return flask.render_template("picture-annotation.html", resource=resource,
457
file_extension=mimetypes.guess_extension(resource.file_format))
458
459
460
@app.route("/picture/<int:id>/put-annotations-form")
461
def put_annotations_form(id):
462
resource = db.session.get(PictureResource, id)
463
if resource is None:
464
flask.abort(404)
465
466
current_user = db.session.get(User, flask.session.get("username"))
467
if current_user is None:
468
flask.abort(401)
469
470
if resource.author != current_user and not current_user.admin:
471
flask.abort(403)
472
473
return flask.render_template("put-annotations-form.html", resource=resource)
474
475
476
@app.route("/picture/<int:id>/put-annotations-form", methods=["POST"])
477
def put_annotations_form_post(id):
478
resource = db.session.get(PictureResource, id)
479
if resource is None:
480
flask.abort(404)
481
482
current_user = db.session.get(User, flask.session.get("username"))
483
if current_user is None:
484
flask.abort(401)
485
486
if resource.author != current_user and not current_user.admin:
487
flask.abort(403)
488
489
resource.put_annotations(json.loads(flask.request.form["annotations"]))
490
491
db.session.commit()
492
493
return flask.redirect("/picture/" + str(resource.id))
494
495
496
497
@app.route("/picture/<int:id>/save-annotations", methods=["POST"])
498
def save_annotations(id):
499
resource = db.session.get(PictureResource, id)
500
if resource is None:
501
flask.abort(404)
502
503
current_user = db.session.get(User, flask.session.get("username"))
504
if resource.author != current_user and not current_user.admin:
505
flask.abort(403)
506
507
resource.put_annotations(flask.request.json)
508
509
db.session.commit()
510
511
response = flask.make_response()
512
response.status_code = 204
513
return response
514
515
516
@app.route("/picture/<int:id>/get-annotations")
517
def get_annotations(id):
518
resource = db.session.get(PictureResource, id)
519
if resource is None:
520
flask.abort(404)
521
522
regions = db.session.query(PictureRegion).filter_by(resource_id=id).all()
523
524
regions_json = []
525
526
for region in regions:
527
regions_json.append({
528
"object": region.object_id,
529
"type": region.json["type"],
530
"shape": region.json["shape"],
531
})
532
533
return flask.jsonify(regions_json)
534
535
536
@app.route("/picture/<int:id>/delete")
537
def delete_picture(id):
538
resource = db.session.get(PictureResource, id)
539
if resource is None:
540
flask.abort(404)
541
542
current_user = db.session.get(User, flask.session.get("username"))
543
if current_user is None:
544
flask.abort(401)
545
546
if resource.author != current_user and not current_user.admin:
547
flask.abort(403)
548
549
PictureLicence.query.filter_by(resource=resource).delete()
550
PictureRegion.query.filter_by(resource=resource).delete()
551
db.session.delete(resource)
552
db.session.commit()
553
554
return flask.redirect("/")
555
556
557
@app.route("/query-pictures", methods=["POST"]) # sadly GET can't have a body
558
def query_pictures():
559
offset = int(flask.request.args.get("offset", 0))
560
limit = int(flask.request.args.get("limit", 16))
561
ordering = flask.request.args.get("ordering", "date-desc")
562
563
yaml_parser = yaml.YAML()
564
query_data = yaml_parser.load(flask.request.data) or {}
565
566
query = db.session.query(PictureResource)
567
568
requirement_conditions = {
569
"has_object": lambda value: PictureResource.regions.any(
570
PictureRegion.object_id.in_(value)),
571
"nature": lambda value: PictureResource.nature_id.in_(value),
572
"licence": lambda value: PictureResource.licences.any(
573
PictureLicence.licence_id.in_(value)),
574
"author": lambda value: PictureResource.author_name.in_(value),
575
"title": lambda value: PictureResource.title.ilike(value),
576
"description": lambda value: PictureResource.description.ilike(value),
577
"origin_url": lambda value: db.func.lower(db.func.substr(
578
PictureResource.origin_url,
579
db.func.length(db.func.split_part(PictureResource.origin_url, "://", 1)) + 4
580
)).in_(value),
581
"above_width": lambda value: PictureResource.width >= value,
582
"below_width": lambda value: PictureResource.width <= value,
583
"above_height": lambda value: PictureResource.height >= value,
584
"below_height": lambda value: PictureResource.height <= value,
585
"before_date": lambda value: PictureResource.timestamp <= datetime.utcfromtimestamp(
586
value),
587
"after_date": lambda value: PictureResource.timestamp >= datetime.utcfromtimestamp(
588
value)
589
}
590
if "want" in query_data:
591
for i in query_data["want"]:
592
requirement, value = list(i.items())[0]
593
condition = requirement_conditions.get(requirement)
594
if condition:
595
query = query.filter(condition(value))
596
if "exclude" in query_data:
597
for i in query_data["exclude"]:
598
requirement, value = list(i.items())[0]
599
condition = requirement_conditions.get(requirement)
600
if condition:
601
query = query.filter(~condition(value))
602
if not query_data.get("include_obsolete", False):
603
query = query.filter(PictureResource.replaced_by_id.is_(None))
604
605
match ordering:
606
case "date-desc":
607
query = query.order_by(PictureResource.timestamp.desc())
608
case "date-asc":
609
query = query.order_by(PictureResource.timestamp.asc())
610
case "title-asc":
611
query = query.order_by(PictureResource.title.asc())
612
case "title-desc":
613
query = query.order_by(PictureResource.title.desc())
614
case "random":
615
query = query.order_by(db.func.random())
616
case "number-regions-desc":
617
query = query.order_by(db.func.count(PictureResource.regions).desc())
618
case "number-regions-asc":
619
query = query.order_by(db.func.count(PictureResource.regions).asc())
620
621
query = query.offset(offset).limit(limit)
622
resources = query.all()
623
624
json_response = {
625
"date_generated": datetime.utcnow().timestamp(),
626
"resources": [],
627
"offset": offset,
628
"limit": limit,
629
}
630
631
json_resources = json_response["resources"]
632
633
for resource in resources:
634
json_resource = {
635
"id": resource.id,
636
"title": resource.title,
637
"description": resource.description,
638
"timestamp": resource.timestamp.timestamp(),
639
"origin_url": resource.origin_url,
640
"author": resource.author_name,
641
"file_format": resource.file_format,
642
"width": resource.width,
643
"height": resource.height,
644
"nature": resource.nature_id,
645
"licences": [licence.licence_id for licence in resource.licences],
646
"replaces": resource.replaces_id,
647
"replaced_by": resource.replaced_by_id,
648
"regions": [],
649
"download": config.ROOT_URL + flask.url_for("raw_picture", id=resource.id),
650
}
651
for region in resource.regions:
652
json_resource["regions"].append({
653
"object": region.object_id,
654
"type": region.json["type"],
655
"shape": region.json["shape"],
656
})
657
658
json_resources.append(json_resource)
659
660
response = flask.jsonify(json_response)
661
response.headers["Content-Type"] = "application/json"
662
return response
663
664
665
@app.route("/raw/picture/<int:id>")
666
def raw_picture(id):
667
resource = db.session.get(PictureResource, id)
668
if resource is None:
669
flask.abort(404)
670
671
response = flask.send_from_directory(path.join(config.DATA_PATH, "pictures"), str(resource.id))
672
response.mimetype = resource.file_format
673
674
return response
675
676
677
@app.route("/api/object-types")
678
def object_types():
679
objects = db.session.query(PictureObject).all()
680
return flask.jsonify({object.id: object.description for object in objects})
681