[V3 Config] Update Mongo document organization to bypass doc size restriction (#2536)

* modify config to use identifier data class and update json driver * move identifier data attributes into read only properties * Update mongo get and set methods * Update get/set to use UUID separately, make clear work * Remove not implemented and fix get_raw * Update remaining untouched get/set/clear * Fix get_raw * Finally fix get_raw and set_raw * style * This is better * Sorry guys * Update get behavior to handle "all" calls as expected * style again * Why do you do this to me * style once more * Update mongo schema
2026-03-13 00:13:38 -04:00 · 2019-04-03 09:04:47 -04:00
parent d6d6d14977
commit 1cd7e41f33
6 changed files with 194 additions and 77 deletions
--- a/redbot/core/drivers/init.py
+++ b/redbot/core/drivers/init.py
@@ -1,4 +1,6 @@
-__all__ = ["get_driver"]
+from .red_base import IdentifierData
+
+__all__ = ["get_driver", "IdentifierData"]


 def get_driver(type, *args, **kwargs):
--- a/redbot/core/drivers/red_base.py
+++ b/redbot/core/drivers/red_base.py
@@ -1,4 +1,51 @@
-__all__ = ["BaseDriver"]
+from typing import Tuple
+
+__all__ = ["BaseDriver", "IdentifierData"]
+
+
+class IdentifierData:
+    def __init__(self, uuid: str, category: str, primary_key: Tuple[str], identifiers: Tuple[str]):
+        self._uuid = uuid
+        self._category = category
+        self._primary_key = primary_key
+        self._identifiers = identifiers
+
+    @property
+    def uuid(self):
+        return self._uuid
+
+    @property
+    def category(self):
+        return self._category
+
+    @property
+    def primary_key(self):
+        return self._primary_key
+
+    @property
+    def identifiers(self):
+        return self._identifiers
+
+    def __repr__(self):
+        return (
+            f"<IdentifierData uuid={self.uuid} category={self.category} primary_key={self.primary_key}"
+            f" identifiers={self.identifiers}>"
+        )
+
+    def add_identifier(self, *identifier: str) -> "IdentifierData":
+        if not all(isinstance(i, str) for i in identifier):
+            raise ValueError("Identifiers must be strings.")
+
+        return IdentifierData(
+            self.uuid, self.category, self.primary_key, self.identifiers + identifier
+        )
+
+    def to_tuple(self):
+        return tuple(
+            item
+            for item in (self.uuid, self.category, *self.primary_key, *self.identifiers)
+            if len(item) > 0
+        )


 class BaseDriver:
@@ -6,14 +53,13 @@ class BaseDriver:
        self.cog_name = cog_name
        self.unique_cog_identifier = identifier

-    async def get(self, *identifiers: str):
+    async def get(self, identifier_data: IdentifierData):
        """
        Finds the value indicate by the given identifiers.

        Parameters
        ----------
-        identifiers
-            A list of identifiers that correspond to nested dict accesses.
+        identifier_data

        Returns
        -------
@@ -33,20 +79,19 @@ class BaseDriver:
        """
        raise NotImplementedError

-    async def set(self, *identifiers: str, value=None):
+    async def set(self, identifier_data: IdentifierData, value=None):
        """
        Sets the value of the key indicated by the given identifiers.

        Parameters
        ----------
-        identifiers
-            A list of identifiers that correspond to nested dict accesses.
+        identifier_data
        value
            Any JSON serializable python object.
        """
        raise NotImplementedError

-    async def clear(self, *identifiers: str):
+    async def clear(self, identifier_data: IdentifierData):
        """
        Clears out the value specified by the given identifiers.

@@ -54,7 +99,6 @@ class BaseDriver:

        Parameters
        ----------
-        identifiers
-            A list of identifiers that correspond to nested dict accesses.
+        identifier_data
        """
        raise NotImplementedError
--- a/redbot/core/drivers/red_json.py
+++ b/redbot/core/drivers/red_json.py
@@ -6,7 +6,7 @@ import logging

 from ..json_io import JsonIO

-from .red_base import BaseDriver
+from .red_base import BaseDriver, IdentifierData

 __all__ = ["JSON"]

@@ -93,16 +93,16 @@ class JSON(BaseDriver):
            self.data = {}
            self.jsonIO._save_json(self.data)

-    async def get(self, *identifiers: Tuple[str]):
+    async def get(self, identifier_data: IdentifierData):
        partial = self.data
-        full_identifiers = (self.unique_cog_identifier, *identifiers)
+        full_identifiers = identifier_data.to_tuple()
        for i in full_identifiers:
            partial = partial[i]
        return copy.deepcopy(partial)

-    async def set(self, *identifiers: str, value=None):
+    async def set(self, identifier_data: IdentifierData, value=None):
        partial = self.data
-        full_identifiers = (self.unique_cog_identifier, *identifiers)
+        full_identifiers = identifier_data.to_tuple()
        for i in full_identifiers[:-1]:
            if i not in partial:
                partial[i] = {}
@@ -111,9 +111,9 @@ class JSON(BaseDriver):
        partial[full_identifiers[-1]] = copy.deepcopy(value)
        await self.jsonIO._threadsafe_save_json(self.data)

-    async def clear(self, *identifiers: str):
+    async def clear(self, identifier_data: IdentifierData):
        partial = self.data
-        full_identifiers = (self.unique_cog_identifier, *identifiers)
+        full_identifiers = identifier_data.to_tuple()
        try:
            for i in full_identifiers[:-1]:
                partial = partial[i]
--- a/redbot/core/drivers/red_mongo.py
+++ b/redbot/core/drivers/red_mongo.py
@@ -1,11 +1,12 @@
 import re
-from typing import Match, Pattern
+from typing import Match, Pattern, Tuple
 from urllib.parse import quote_plus

 import motor.core
 import motor.motor_asyncio
+from motor.motor_asyncio import AsyncIOMotorCursor

-from .red_base import BaseDriver
+from .red_base import BaseDriver, IdentifierData

 __all__ = ["Mongo"]

@@ -64,66 +65,119 @@ class Mongo(BaseDriver):
        """
        return _conn.get_database()

-    def get_collection(self) -> motor.core.Collection:
+    def get_collection(self, category: str) -> motor.core.Collection:
        """
        Gets a specified collection within the PyMongo database for this cog.

-        Unless you are doing custom stuff ``collection_name`` should be one of the class
+        Unless you are doing custom stuff ``category`` should be one of the class
        attributes of :py:class:`core.config.Config`.

-        :param str collection_name:
+        :param str category:
        :return:
            PyMongo collection object.
        """
-        return self.db[self.cog_name]
+        return self.db[self.cog_name][category]

-    @staticmethod
-    def _parse_identifiers(identifiers):
-        uuid, identifiers = identifiers[0], identifiers[1:]
-        return uuid, identifiers
+    def get_primary_key(self, identifier_data: IdentifierData) -> Tuple[str]:
+        # noinspection PyTypeChecker
+        return identifier_data.primary_key

-    async def get(self, *identifiers: str):
-        mongo_collection = self.get_collection()
+    async def rebuild_dataset(self, identifier_data: IdentifierData, cursor: AsyncIOMotorCursor):
+        ret = {}
+        async for doc in cursor:
+            pkeys = doc["_id"]["RED_primary_key"]
+            del doc["_id"]
+            if len(pkeys) == 1:
+                # Global data
+                ret.update(**doc)
+            elif len(pkeys) > 1:
+                # All other data
+                partial = ret
+                for key in pkeys[1:-1]:
+                    if key in identifier_data.primary_key:
+                        continue
+                    if key not in partial:
+                        partial[key] = {}
+                    partial = partial[key]
+                if pkeys[-1] in identifier_data.primary_key:
+                    partial.update(**doc)
+                else:
+                    partial[pkeys[-1]] = doc
+            else:
+                raise RuntimeError("This should not happen.")
+        return ret

-        identifiers = (*map(self._escape_key, identifiers),)
-        dot_identifiers = ".".join(identifiers)
+    async def get(self, identifier_data: IdentifierData):
+        mongo_collection = self.get_collection(identifier_data.category)

-        partial = await mongo_collection.find_one(
-            filter={"_id": self.unique_cog_identifier}, projection={dot_identifiers: True}
-        )
+        pkey_filter = self.generate_primary_key_filter(identifier_data)
+        if len(identifier_data.identifiers) > 0:
+            dot_identifiers = ".".join(map(self._escape_key, identifier_data.identifiers))
+            proj = {"_id": False, dot_identifiers: True}
+
+            partial = await mongo_collection.find_one(filter=pkey_filter, projection=proj)
+        else:
+            # The case here is for partial primary keys like all_members()
+            cursor = mongo_collection.find(filter=pkey_filter)
+            partial = await self.rebuild_dataset(identifier_data, cursor)

        if partial is None:
            raise KeyError("No matching document was found and Config expects a KeyError.")

-        for i in identifiers:
+        for i in identifier_data.identifiers:
            partial = partial[i]
        if isinstance(partial, dict):
            return self._unescape_dict_keys(partial)
        return partial

-    async def set(self, *identifiers: str, value=None):
-        dot_identifiers = ".".join(map(self._escape_key, identifiers))
+    async def set(self, identifier_data: IdentifierData, value=None):
+        uuid = self._escape_key(identifier_data.uuid)
+        primary_key = list(map(self._escape_key, self.get_primary_key(identifier_data)))
+        dot_identifiers = ".".join(map(self._escape_key, identifier_data.identifiers))
        if isinstance(value, dict):
+            if len(value) == 0:
+                await self.clear(identifier_data)
+                return
            value = self._escape_dict_keys(value)

-        mongo_collection = self.get_collection()
+        mongo_collection = self.get_collection(identifier_data.category)
+        if len(dot_identifiers) > 0:
+            update_stmt = {"$set": {dot_identifiers: value}}
+        else:
+            update_stmt = {"$set": value}

        await mongo_collection.update_one(
-            {"_id": self.unique_cog_identifier},
-            update={"$set": {dot_identifiers: value}},
+            {"_id": {"RED_uuid": uuid, "RED_primary_key": primary_key}},
+            update=update_stmt,
            upsert=True,
        )

-    async def clear(self, *identifiers: str):
-        dot_identifiers = ".".join(map(self._escape_key, identifiers))
-        mongo_collection = self.get_collection()
-
-        if len(identifiers) > 0:
-            await mongo_collection.update_one(
-                {"_id": self.unique_cog_identifier}, update={"$unset": {dot_identifiers: 1}}
-            )
+    def generate_primary_key_filter(self, identifier_data: IdentifierData):
+        uuid = self._escape_key(identifier_data.uuid)
+        primary_key = list(map(self._escape_key, self.get_primary_key(identifier_data)))
+        ret = {"_id": {"RED_uuid": uuid}}
+        if len(identifier_data.identifiers) > 0:
+            ret["_id"]["RED_primary_key"] = primary_key
        else:
-            await mongo_collection.delete_one({"_id": self.unique_cog_identifier})
+            for i, key in enumerate(primary_key):
+                keyname = f"RED_primary_key.{i}"
+                ret["_id"][keyname] = key
+        return ret
+
+    async def clear(self, identifier_data: IdentifierData):
+        # There are three cases here:
+        # 1) We're clearing out a subset of identifiers (aka identifiers is NOT empty)
+        # 2) We're clearing out full primary key and no identifiers
+        # 3) We're clearing out partial primary key and no identifiers
+        # 4) Primary key is empty, should wipe all documents in the collection
+        mongo_collection = self.get_collection(identifier_data.category)
+        pkey_filter = self.generate_primary_key_filter(identifier_data)
+        if len(identifier_data.identifiers) == 0:
+            # This covers cases 2-4
+            await mongo_collection.delete_many(pkey_filter)
+        else:
+            dot_identifiers = ".".join(map(self._escape_key, identifier_data.identifiers))
+            await mongo_collection.update_one(pkey_filter, update={"$unset": {dot_identifiers: 1}})

    @staticmethod
    def _escape_key(key: str) -> str: