Web platform for sharing free data for ML and research

By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 app.py

View raw Download
text/x-script.python • 23.24 kiB
Python script, ASCII text executable
        
            
1
import json
2
from datetime import datetime
3
from email.policy import default
4
from time import perf_counter
5
6
import flask
7
from flask_sqlalchemy import SQLAlchemy
8
from flask_bcrypt import Bcrypt
9
from flask_httpauth import HTTPBasicAuth
10
from markupsafe import escape, Markup
11
from flask_migrate import Migrate, current
12
from jinja2_fragments.flask import render_block
13
from sqlalchemy.orm import backref
14
import sqlalchemy.dialects.postgresql
15
from os import path
16
from urllib.parse import urlencode
17
import mimetypes
18
import ruamel.yaml as yaml
19
20
from PIL import Image
21
22
import config
23
import markdown
24
25
26
app = flask.Flask(__name__)
27
bcrypt = Bcrypt(app)
28
29
30
app.config["SQLALCHEMY_DATABASE_URI"] = config.DB_URI
31
app.config["SECRET_KEY"] = config.DB_PASSWORD
32
33
34
db = SQLAlchemy(app)
35
migrate = Migrate(app, db)
36
37
38
@app.template_filter("split")
39
def split(value, separator=None, maxsplit=-1):
40
return value.split(separator, maxsplit)
41
42
43
@app.template_filter("median")
44
def median(value):
45
value = list(value) # prevent generators
46
return sorted(value)[len(value) // 2]
47
48
49
@app.template_filter("set")
50
def set_filter(value):
51
return set(value)
52
53
54
@app.template_global()
55
def modify_query(**new_values):
56
args = flask.request.args.copy()
57
# for key, value in new_values.items():
58
# args[key] = value
59
args |= new_values
60
return f"{flask.request.path}?{urlencode(args)}"
61
62
63
@app.context_processor
64
def default_variables():
65
return {
66
"current_user": db.session.get(User, flask.session.get("username")),
67
}
68
69
70
with app.app_context():
71
class User(db.Model):
72
username = db.Column(db.String(32), unique=True, nullable=False, primary_key=True)
73
password_hashed = db.Column(db.String(60), nullable=False)
74
admin = db.Column(db.Boolean, nullable=False, default=False, server_default="false")
75
pictures = db.relationship("PictureResource", back_populates="author")
76
77
def __init__(self, username, password):
78
self.username = username
79
self.password_hashed = bcrypt.generate_password_hash(password).decode("utf-8")
80
81
82
class Licence(db.Model):
83
id = db.Column(db.String(64), primary_key=True) # SPDX identifier
84
title = db.Column(db.UnicodeText, nullable=False) # the official name of the licence
85
description = db.Column(db.UnicodeText, nullable=False) # brief description of its permissions and restrictions
86
info_url = db.Column(db.String(1024), nullable=False) # the URL to a page with general information about the licence
87
url = db.Column(db.String(1024), nullable=True) # the URL to a page with the full text of the licence and more information
88
pictures = db.relationship("PictureLicence", back_populates="licence")
89
free = db.Column(db.Boolean, nullable=False, default=False) # whether the licence is free or not
90
logo_url = db.Column(db.String(1024), nullable=True) # URL to the logo of the licence
91
pinned = db.Column(db.Boolean, nullable=False, default=False) # whether the licence should be shown at the top of the list
92
93
def __init__(self, id, title, description, info_url, url, free, logo_url=None, pinned=False):
94
self.id = id
95
self.title = title
96
self.description = description
97
self.info_url = info_url
98
self.url = url
99
self.free = free
100
self.logo_url = logo_url
101
self.pinned = pinned
102
103
104
class PictureLicence(db.Model):
105
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
106
107
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"))
108
licence_id = db.Column(db.String(64), db.ForeignKey("licence.id"))
109
110
resource = db.relationship("PictureResource", back_populates="licences")
111
licence = db.relationship("Licence", back_populates="pictures")
112
113
def __init__(self, resource_id, licence_id):
114
self.resource_id = resource_id
115
self.licence_id = licence_id
116
117
118
class Resource(db.Model):
119
__abstract__ = True
120
121
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
122
title = db.Column(db.UnicodeText, nullable=False)
123
description = db.Column(db.UnicodeText, nullable=False)
124
timestamp = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
125
origin_url = db.Column(db.String(2048), nullable=True) # should be left empty if it's original or the source is unknown but public domain
126
127
128
class PictureNature(db.Model):
129
# Examples:
130
# "photo", "paper-scan", "2d-art-photo", "sculpture-photo", "computer-3d", "computer-painting",
131
# "computer-line-art", "diagram", "infographic", "text", "map", "chart-graph", "screen-capture",
132
# "screen-photo", "pattern", "collage", "ai", and so on
133
id = db.Column(db.String(64), primary_key=True)
134
description = db.Column(db.UnicodeText, nullable=False)
135
resources = db.relationship("PictureResource", back_populates="nature")
136
137
def __init__(self, id, description):
138
self.id = id
139
self.description = description
140
141
142
class PictureObjectInheritance(db.Model):
143
parent_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
144
primary_key=True)
145
child_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"),
146
primary_key=True)
147
148
parent = db.relationship("PictureObject", foreign_keys=[parent_id],
149
back_populates="child_links")
150
child = db.relationship("PictureObject", foreign_keys=[child_id],
151
back_populates="parent_links")
152
153
def __init__(self, parent, child):
154
self.parent = parent
155
self.child = child
156
157
158
class PictureObject(db.Model):
159
id = db.Column(db.String(64), primary_key=True)
160
description = db.Column(db.UnicodeText, nullable=False)
161
162
child_links = db.relationship("PictureObjectInheritance",
163
foreign_keys=[PictureObjectInheritance.parent_id],
164
back_populates="parent")
165
parent_links = db.relationship("PictureObjectInheritance",
166
foreign_keys=[PictureObjectInheritance.child_id],
167
back_populates="child")
168
169
def __init__(self, id, description):
170
self.id = id
171
self.description = description
172
173
174
class PictureRegion(db.Model):
175
# This is for picture region annotations
176
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
177
json = db.Column(sqlalchemy.dialects.postgresql.JSONB, nullable=False)
178
179
resource_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=False)
180
object_id = db.Column(db.String(64), db.ForeignKey("picture_object.id"), nullable=True)
181
182
resource = db.relationship("PictureResource", backref="regions")
183
object = db.relationship("PictureObject", backref="regions")
184
185
def __init__(self, json, resource, object):
186
self.json = json
187
self.resource = resource
188
self.object = object
189
190
191
class PictureResource(Resource):
192
# This is only for bitmap pictures. Vectors will be stored under a different model
193
# File name is the ID in the picture directory under data, without an extension
194
file_format = db.Column(db.String(64), nullable=False) # MIME type
195
width = db.Column(db.Integer, nullable=False)
196
height = db.Column(db.Integer, nullable=False)
197
nature_id = db.Column(db.String(32), db.ForeignKey("picture_nature.id"), nullable=True)
198
author_name = db.Column(db.String(32), db.ForeignKey("user.username"), nullable=False)
199
author = db.relationship("User", back_populates="pictures")
200
201
nature = db.relationship("PictureNature", back_populates="resources")
202
203
replaces_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"), nullable=True)
204
replaced_by_id = db.Column(db.Integer, db.ForeignKey("picture_resource.id"),
205
nullable=True)
206
207
replaces = db.relationship("PictureResource", remote_side="PictureResource.id",
208
foreign_keys=[replaces_id], back_populates="replaced_by")
209
replaced_by = db.relationship("PictureResource", remote_side="PictureResource.id",
210
foreign_keys=[replaced_by_id])
211
212
licences = db.relationship("PictureLicence", back_populates="resource")
213
214
def __init__(self, title, author, description, origin_url, licence_ids, mime, nature=None,
215
replaces=None):
216
self.title = title
217
self.author = author
218
self.description = description
219
self.origin_url = origin_url
220
self.file_format = mime
221
self.width = self.height = 0
222
self.nature = nature
223
db.session.add(self)
224
db.session.commit()
225
for licence_id in licence_ids:
226
joiner = PictureLicence(self.id, licence_id)
227
db.session.add(joiner)
228
if replaces is not None:
229
self.replaces = replaces
230
replaces.replaced_by = self
231
232
def put_annotations(self, json):
233
# Delete all previous annotations
234
db.session.query(PictureRegion).filter_by(resource_id=self.id).delete()
235
236
for region in json:
237
object_id = region["object"]
238
picture_object = db.session.get(PictureObject, object_id)
239
240
region_data = {
241
"type": region["type"],
242
"shape": region["shape"],
243
}
244
245
region_row = PictureRegion(region_data, self, picture_object)
246
db.session.add(region_row)
247
248
249
@app.route("/")
250
def index():
251
return flask.render_template("home.html")
252
253
254
@app.route("/accounts/")
255
def accounts():
256
return flask.render_template("login.html")
257
258
259
@app.route("/login", methods=["POST"])
260
def login():
261
username = flask.request.form["username"]
262
password = flask.request.form["password"]
263
264
user = db.session.get(User, username)
265
266
if user is None:
267
flask.flash("This username is not registered.")
268
return flask.redirect("/accounts")
269
270
if not bcrypt.check_password_hash(user.password_hashed, password):
271
flask.flash("Incorrect password.")
272
return flask.redirect("/accounts")
273
274
flask.flash("You have been logged in.")
275
276
flask.session["username"] = username
277
return flask.redirect("/")
278
279
280
@app.route("/logout")
281
def logout():
282
flask.session.pop("username", None)
283
flask.flash("You have been logged out.")
284
return flask.redirect("/")
285
286
287
@app.route("/signup", methods=["POST"])
288
def signup():
289
username = flask.request.form["username"]
290
password = flask.request.form["password"]
291
292
if db.session.get(User, username) is not None:
293
flask.flash("This username is already taken.")
294
return flask.redirect("/accounts")
295
296
if set(username) > set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"):
297
flask.flash("Usernames can only contain the Latin alphabet, digits, hyphens, and underscores.")
298
return flask.redirect("/accounts")
299
300
if len(username) < 3 or len(username) > 32:
301
flask.flash("Usernames must be between 3 and 32 characters long.")
302
return flask.redirect("/accounts")
303
304
if len(password) < 6:
305
flask.flash("Passwords must be at least 6 characters long.")
306
return flask.redirect("/accounts")
307
308
user = User(username, password)
309
db.session.add(user)
310
db.session.commit()
311
312
flask.session["username"] = username
313
314
flask.flash("You have been registered and logged in.")
315
316
return flask.redirect("/")
317
318
319
@app.route("/profile", defaults={"username": None})
320
@app.route("/profile/<username>")
321
def profile(username):
322
if username is None:
323
if "username" in flask.session:
324
return flask.redirect("/profile/" + flask.session["username"])
325
else:
326
flask.flash("Please log in to perform this action.")
327
return flask.redirect("/accounts")
328
329
user = db.session.get(User, username)
330
if user is None:
331
flask.abort(404)
332
333
return flask.render_template("profile.html", user=user)
334
335
336
@app.route("/object/<id>")
337
def has_object(id):
338
object_ = db.session.get(PictureObject, id)
339
if object_ is None:
340
flask.abort(404)
341
342
query = db.session.query(PictureResource).join(PictureRegion).filter(PictureRegion.object_id == id)
343
344
page = int(flask.request.args.get("page", 1))
345
per_page = int(flask.request.args.get("per_page", 16))
346
347
resources = query.paginate(page=page, per_page=per_page)
348
349
return flask.render_template("object.html", object=object_, resources=resources, page_number=page,
350
per_page=per_page, num_pages=resources.pages)
351
352
353
@app.route("/upload")
354
def upload():
355
if "username" not in flask.session:
356
flask.flash("Log in to upload pictures.")
357
return flask.redirect("/accounts")
358
359
licences = Licence.query.order_by(Licence.free.desc(), Licence.pinned.desc(), Licence.title).all()
360
361
types = PictureNature.query.all()
362
363
return flask.render_template("upload.html", licences=licences, types=types)
364
365
366
@app.route("/upload", methods=["POST"])
367
def upload_post():
368
title = flask.request.form["title"]
369
description = flask.request.form["description"]
370
origin_url = flask.request.form["origin_url"]
371
author = db.session.get(User, flask.session.get("username"))
372
licence_ids = flask.request.form.getlist("licence")
373
nature_id = flask.request.form["nature"]
374
375
if author is None:
376
flask.abort(401)
377
378
file = flask.request.files["file"]
379
380
if not file or not file.filename:
381
flask.flash("Select a file")
382
return flask.redirect(flask.request.url)
383
384
if not file.mimetype.startswith("image/"):
385
flask.flash("Only images are supported")
386
return flask.redirect(flask.request.url)
387
388
if not title:
389
flask.flash("Enter a title")
390
return flask.redirect(flask.request.url)
391
392
if not description:
393
description = ""
394
395
if not nature_id:
396
flask.flash("Select a picture type")
397
return flask.redirect(flask.request.url)
398
399
if not licence_ids:
400
flask.flash("Select licences")
401
return flask.redirect(flask.request.url)
402
403
licences = [db.session.get(Licence, licence_id) for licence_id in licence_ids]
404
if not any(licence.free for licence in licences):
405
flask.flash("Select at least one free licence")
406
return flask.redirect(flask.request.url)
407
408
resource = PictureResource(title, author, description, origin_url, licence_ids, file.mimetype,
409
db.session.get(PictureNature, nature_id))
410
db.session.add(resource)
411
db.session.commit()
412
file.save(path.join(config.DATA_PATH, "pictures", str(resource.id)))
413
pil_image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
414
resource.width, resource.height = pil_image.size
415
416
if flask.request.form.get("annotations"):
417
try:
418
resource.put_annotations(json.loads(flask.request.form.get("annotations")))
419
db.session.commit()
420
except json.JSONDecodeError:
421
flask.flash("Invalid annotations")
422
423
flask.flash("Picture uploaded successfully")
424
425
return flask.redirect("/picture/" + str(resource.id))
426
427
428
@app.route("/picture/<int:id>/")
429
def picture(id):
430
resource = db.session.get(PictureResource, id)
431
if resource is None:
432
flask.abort(404)
433
434
image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
435
436
return flask.render_template("picture.html", resource=resource,
437
file_extension=mimetypes.guess_extension(resource.file_format),
438
size=image.size)
439
440
441
442
@app.route("/picture/<int:id>/annotate")
443
def annotate_picture(id):
444
resource = db.session.get(PictureResource, id)
445
if resource is None:
446
flask.abort(404)
447
448
current_user = db.session.get(User, flask.session.get("username"))
449
if current_user is None:
450
flask.abort(401)
451
if resource.author != current_user and not current_user.admin:
452
flask.abort(403)
453
454
return flask.render_template("picture-annotation.html", resource=resource,
455
file_extension=mimetypes.guess_extension(resource.file_format))
456
457
458
@app.route("/picture/<int:id>/put-annotations-form")
459
def put_annotations_form(id):
460
resource = db.session.get(PictureResource, id)
461
if resource is None:
462
flask.abort(404)
463
464
current_user = db.session.get(User, flask.session.get("username"))
465
if current_user is None:
466
flask.abort(401)
467
468
if resource.author != current_user and not current_user.admin:
469
flask.abort(403)
470
471
return flask.render_template("put-annotations-form.html", resource=resource)
472
473
474
@app.route("/picture/<int:id>/put-annotations-form", methods=["POST"])
475
def put_annotations_form_post(id):
476
resource = db.session.get(PictureResource, id)
477
if resource is None:
478
flask.abort(404)
479
480
current_user = db.session.get(User, flask.session.get("username"))
481
if current_user is None:
482
flask.abort(401)
483
484
if resource.author != current_user and not current_user.admin:
485
flask.abort(403)
486
487
resource.put_annotations(json.loads(flask.request.form["annotations"]))
488
489
db.session.commit()
490
491
return flask.redirect("/picture/" + str(resource.id))
492
493
494
495
@app.route("/picture/<int:id>/save-annotations", methods=["POST"])
496
def save_annotations(id):
497
resource = db.session.get(PictureResource, id)
498
if resource is None:
499
flask.abort(404)
500
501
current_user = db.session.get(User, flask.session.get("username"))
502
if resource.author != current_user and not current_user.admin:
503
flask.abort(403)
504
505
resource.put_annotations(flask.request.json)
506
507
db.session.commit()
508
509
response = flask.make_response()
510
response.status_code = 204
511
return response
512
513
514
@app.route("/picture/<int:id>/get-annotations")
515
def get_annotations(id):
516
resource = db.session.get(PictureResource, id)
517
if resource is None:
518
flask.abort(404)
519
520
regions = db.session.query(PictureRegion).filter_by(resource_id=id).all()
521
522
regions_json = []
523
524
for region in regions:
525
regions_json.append({
526
"object": region.object_id,
527
"type": region.json["type"],
528
"shape": region.json["shape"],
529
})
530
531
return flask.jsonify(regions_json)
532
533
534
@app.route("/query-pictures", methods=["POST"]) # sadly GET can't have a body
535
def query_pictures():
536
offset = int(flask.request.args.get("offset", 0))
537
limit = int(flask.request.args.get("limit", 16))
538
ordering = flask.request.args.get("ordering", "date-desc")
539
540
yaml_parser = yaml.YAML()
541
query_data = yaml_parser.load(flask.request.data) or {}
542
543
query = db.session.query(PictureResource)
544
545
requirement_conditions = {
546
"has_object": lambda value: PictureResource.regions.any(
547
PictureRegion.object_id.in_(value)),
548
"nature": lambda value: PictureResource.nature_id.in_(value),
549
"licence": lambda value: PictureResource.licences.any(
550
PictureLicence.licence_id.in_(value)),
551
"author": lambda value: PictureResource.author_name.in_(value),
552
"title": lambda value: PictureResource.title.ilike(value),
553
"description": lambda value: PictureResource.description.ilike(value),
554
"origin_url": lambda value: db.func.lower(db.func.substr(
555
PictureResource.origin_url,
556
db.func.length(db.func.split_part(PictureResource.origin_url, "://", 1)) + 4
557
)).in_(value),
558
"above_width": lambda value: PictureResource.width >= value,
559
"below_width": lambda value: PictureResource.width <= value,
560
"above_height": lambda value: PictureResource.height >= value,
561
"below_height": lambda value: PictureResource.height <= value,
562
"before_date": lambda value: PictureResource.timestamp <= datetime.utcfromtimestamp(
563
value),
564
"after_date": lambda value: PictureResource.timestamp >= datetime.utcfromtimestamp(
565
value)
566
}
567
if "want" in query_data:
568
for i in query_data["want"]:
569
requirement, value = list(i.items())[0]
570
condition = requirement_conditions.get(requirement)
571
if condition:
572
query = query.filter(condition(value))
573
if "exclude" in query_data:
574
for i in query_data["exclude"]:
575
requirement, value = list(i.items())[0]
576
condition = requirement_conditions.get(requirement)
577
if condition:
578
query = query.filter(~condition(value))
579
if not query_data.get("include_obsolete", False):
580
query = query.filter(PictureResource.replaced_by_id.is_(None))
581
582
match ordering:
583
case "date-desc":
584
query = query.order_by(PictureResource.timestamp.desc())
585
case "date-asc":
586
query = query.order_by(PictureResource.timestamp.asc())
587
case "title-asc":
588
query = query.order_by(PictureResource.title.asc())
589
case "title-desc":
590
query = query.order_by(PictureResource.title.desc())
591
case "random":
592
query = query.order_by(db.func.random())
593
case "number-regions-desc":
594
query = query.order_by(db.func.count(PictureResource.regions).desc())
595
case "number-regions-asc":
596
query = query.order_by(db.func.count(PictureResource.regions).asc())
597
598
query = query.offset(offset).limit(limit)
599
resources = query.all()
600
601
json_response = {
602
"date_generated": datetime.utcnow().timestamp(),
603
"resources": [],
604
"offset": offset,
605
"limit": limit,
606
}
607
608
json_resources = json_response["resources"]
609
610
for resource in resources:
611
json_resource = {
612
"id": resource.id,
613
"title": resource.title,
614
"description": resource.description,
615
"timestamp": resource.timestamp.timestamp(),
616
"origin_url": resource.origin_url,
617
"author": resource.author_name,
618
"file_format": resource.file_format,
619
"width": resource.width,
620
"height": resource.height,
621
"nature": resource.nature_id,
622
"licences": [licence.licence_id for licence in resource.licences],
623
"replaces": resource.replaces_id,
624
"replaced_by": resource.replaced_by_id,
625
"regions": [],
626
}
627
for region in resource.regions:
628
json_resource["regions"].append({
629
"object": region.object_id,
630
"type": region.json["type"],
631
"shape": region.json["shape"],
632
})
633
634
json_resources.append(json_resource)
635
636
response = flask.jsonify(json_response)
637
response.headers["Content-Type"] = "application/json"
638
return response
639
640
641
@app.route("/raw/picture/<int:id>")
642
def raw_picture(id):
643
resource = db.session.get(PictureResource, id)
644
if resource is None:
645
flask.abort(404)
646
647
response = flask.send_from_directory(path.join(config.DATA_PATH, "pictures"), str(resource.id))
648
response.mimetype = resource.file_format
649
650
return response
651
652
653
@app.route("/api/object-types")
654
def object_types():
655
objects = db.session.query(PictureObject).all()
656
return flask.jsonify({object.id: object.description for object in objects})
657