Web platform for sharing free image data for ML and research

Homepage: https://datasets.roundabout-host.com

By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.


Query API

created on Friday, 6 September 2024, 09:42:10 (1725615730), received on Friday, 6 September 2024, 19:35:56 (1725651356)
Author identity: vlad <vlad.muntoiu@gmail.com>



@@ -2,5 +2,6 @@

                                            <project version="4">
                                              <component name="VcsDirectoryMappings">
                                                <mapping directory="$PROJECT_DIR$" vcs="Git" />
                                            <mapping directory="$PROJECT_DIR$/static/efficient-ui" vcs="Git" />


@@ -13,6 +13,7 @@ from sqlalchemy.orm import backref

                                            import sqlalchemy.dialects.postgresql
                                            from os import path
                                            import mimetypes
                                        import ruamel.yaml as yaml
                                            from PIL import Image

@@ -374,6 +375,8 @@ def upload_post():

                                                file.save(path.join(config.DATA_PATH, "pictures", str(resource.id)))
                                            pil_image = Image.open(path.join(config.DATA_PATH, "pictures", str(resource.id)))
                                            resource.width, resource.height = pil_image.size
                                                if flask.request.form.get("annotations"):

@@ -493,6 +496,113 @@ def get_annotations(id):

                                                return flask.jsonify(regions_json)
                                        @app.route("/query-pictures", methods=["POST"])      # sadly GET can't have a body
                                        def query_pictures():
                                            offset = int(flask.request.args.get("offset", 0))
                                            limit = int(flask.request.args.get("limit", 16))
                                            ordering = flask.request.args.get("ordering", "date-desc")
                                            yaml_parser = yaml.YAML()
                                            query_data = yaml_parser.load(flask.request.data) or {}
                                            query = db.session.query(PictureResource)
                                            requirement_conditions = {
                                                "has_object": lambda value: PictureResource.regions.any(
                                                "nature": lambda value: PictureResource.nature_id.in_(value),
                                                "licence": lambda value: PictureResource.licences.any(
                                                "author": lambda value: PictureResource.author_name.in_(value),
                                                "title": lambda value: PictureResource.title.ilike(value),
                                                "description": lambda value: PictureResource.description.ilike(value),
                                                "origin_url": lambda value: db.func.lower(db.func.substr(
                                                        db.func.length(db.func.split_part(PictureResource.origin_url, "://", 1)) + 4
                                                "above_width": lambda value: PictureResource.width >= value,
                                                "below_width": lambda value: PictureResource.width <= value,
                                                "above_height": lambda value: PictureResource.height >= value,
                                                "below_height": lambda value: PictureResource.height <= value,
                                                "before_date": lambda value: PictureResource.timestamp <= datetime.utcfromtimestamp(
                                                "after_date": lambda value: PictureResource.timestamp >= datetime.utcfromtimestamp(
                                            if "want" in query_data:
                                                for i in query_data["want"]:
                                                    requirement, value = list(i.items())[0]
                                                    condition = requirement_conditions.get(requirement)
                                                    if condition:
                                                        query = query.filter(condition(value))
                                            if "exclude" in query_data:
                                                for i in query_data["exclude"]:
                                                    requirement, value = list(i.items())[0]
                                                    condition = requirement_conditions.get(requirement)
                                                    if condition:
                                                        query = query.filter(~condition(value))
                                            if not query_data.get("include_obsolete", False):
                                                query = query.filter(PictureResource.replaced_by_id.is_(None))
                                            match ordering:
                                                case "date-desc":
                                                    query = query.order_by(PictureResource.timestamp.desc())
                                                case "date-asc":
                                                    query = query.order_by(PictureResource.timestamp.asc())
                                                case "title-asc":
                                                    query = query.order_by(PictureResource.title.asc())
                                                case "title-desc":
                                                    query = query.order_by(PictureResource.title.desc())
                                                case "random":
                                                    query = query.order_by(db.func.random())
                                                case "number-regions-desc":
                                                    query = query.order_by(db.func.count(PictureResource.regions).desc())
                                                case "number-regions-asc":
                                                    query = query.order_by(db.func.count(PictureResource.regions).asc())
                                            query = query.offset(offset).limit(limit)
                                            resources = query.all()
                                            json_response = {
                                                "date_generated": datetime.utcnow().timestamp(),
                                                "resources": [],
                                                "offset": offset,
                                                "limit": limit,
                                            json_resources = json_response["resources"]
                                            for resource in resources:
                                                json_resource = {
                                                    "id": resource.id,
                                                    "title": resource.title,
                                                    "description": resource.description,
                                                    "timestamp": resource.timestamp.timestamp(),
                                                    "origin_url": resource.origin_url,
                                                    "author": resource.author_name,
                                                    "file_format": resource.file_format,
                                                    "width": resource.width,
                                                    "height": resource.height,
                                                    "nature": resource.nature_id,
                                                    "licences": [licence.licence_id for licence in resource.licences],
                                                    "replaces": resource.replaces_id,
                                                    "replaced_by": resource.replaced_by_id,
                                                    "regions": [],
                                                for region in resource.regions:
                                                        "object": region.object_id,
                                                        "type": region.json["type"],
                                                        "shape": region.json["shape"],
                                            response = flask.jsonify(json_response)
                                            response.headers["Content-Type"] = "application/json"
                                            return response
                                            def raw_picture(id):
                                                resource = db.session.get(PictureResource, id)


@@ -71,7 +71,7 @@ The query format is based on YAML and used to query for pictures in the system.

                                            ### Example
                                            # Restrictions for queried images
                                        - want:
                                                # This means that the image must contain both rules, so both a cat and a dog
                                                - has_object: ["Cat (Felis catus)"]
                                                - has_object: ["Dog (Canis lupus familiaris)"]

@@ -90,28 +90,16 @@ The query format is based on YAML and used to query for pictures in the system.

                                                            "Apache-2.0", "Informal-attribution", "Informal-do-anything",
                                                            "Public-domain-old", "Public-domain"]
                                            # Prohibitions for queried images
                                        - exclude:
                                                # This means that the image must not contain any of the objects in the list
                                                - has_object: ["Human"]
                                            # This excludes images taken before the given date
                                            - before_date: 2019-01-01
                                            # This excludes images uploaded before the given date
                                            - before_date: 1546300800
                                                # This requires images to have a minimum resolution
                                                - below_width: 800
                                                - below_height: 600
                                        # The object types to get - if omitted, all object types mentioned in the
                                        # `want` section are returned
                                        - object_types: ["Cat (Felis catus)", "Dog (Canis lupus familiaris)"]
                                        # Pagination
                                        - limit: 32
                                        - offset: 0
                                        # Sorting
                                        - sort_by: "date-uploaded-recent"
                                        # Format
                                        - format: "jpg"
                                        - max_resolution: [800, 800]  # resizes
                                        # In summary, we want the 32 most recent images that contain both a cat and
                                        # a dog, either a grass or a flower, but not a human, taken after 2019-01-01,
                                        # must be a photo or a drawing, must carry one of certain permissive licences
                                        # and have a resolution of at least 800x600 pixels. We don't need the object
                                        # data for the plants, only for the pets.
                                        # In summary, we want images that contain both a cat and a dog, either a grass
                                        # or a flower, but not a human, taken after 2019-01-01, must be a photo or a
                                        # drawing, must carry one of certain permissive licences and have a resolution
                                        # of at least 800x600 pixels.


@@ -18,7 +18,8 @@

                                                        <a href="/raw/picture/{{ resource.id }}">View</a> |
                                                        <a href="/raw/picture/{{ resource.id }}" download="GigadataPicture_{{ resource.id }}{{ file_extension }}">Download</a> |
                                                        <a href="/picture/{{ resource.id }}/annotate">Annotate</a> |
                                                    <a href="/picture/{{ resource.id }}/put-annotations-form">Submit JSON annotations</a>
                                                    <a href="/picture/{{ resource.id }}/put-annotations-form">Submit JSON annotations</a> |
                                                    <a href="/picture/{{ resource.id }}/get-annotations">Download annotations</a>
                                                    <div id="annotation-zone">
                                                        <img id="annotation-image" src="/raw/picture/{{ resource.id }}" alt="{{ resource.title }}">