Improve performance of pagify() (#5698)

Co-authored-by: jack1142 <6032823+jack1142@users.noreply.github.com>
2026-03-07 04:38:35 -05:00 · 2023-04-13 20:52:54 +02:00
parent 79d11e947c
commit 533f036ed2
3 changed files with 206 additions and 35 deletions
--- a/tests/core/test_utils.py
+++ b/tests/core/test_utils.py
@@ -1,5 +1,6 @@
 import asyncio
 import pytest
+import operator
 import random
 from redbot.core.utils import (
    bounded_gather,
@@ -7,6 +8,8 @@ from redbot.core.utils import (
    deduplicate_iterables,
    common_filters,
 )
+from redbot.core.utils.chat_formatting import pagify
+from typing import List


 def test_deduplicate_iterables():
@@ -137,3 +140,121 @@ async def test_bounded_gather_iter_cancel():
 def test_normalize_smartquotes():
    assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize"
    assert common_filters.normalize_smartquotes("Same String") == "Same String"
+
+
+@pytest.mark.parametrize(
+    "text,pages,page_length",
+    (
+        # base case
+        (
+            "Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
+            [
+                "Line 1\nA",
+                " longer line 2",
+                "\n'tis a",
+                " veeeeery long",
+                " line numero",
+                " tres\nand the",
+                " last line",
+            ],
+            15,
+        ),
+        # mid-word split
+        (
+            "Interdisciplinary collaboration improves the quality\nof care.",
+            ["Interdisciplinar", "y collaboration", " improves the", " quality\nof", " care."],
+            16,
+        ),
+        # off-by-one errors
+        ("Lorem ipsum dolor sit amet.", ["Lorem", " ipsum", " dolor", " sit", " amet."], 6),
+        (
+            "Lorem ipsum dolor sit amet.",
+            # TODO: "r" and " sit" can fit together but current logic doesn't support it properly
+            ["Lorem", " ipsu", "m", " dolo", "r", " sit", " amet", "."],
+            5,
+        ),
+        (
+            "Lorem ipsum dolor sit amet.",
+            ["Lore", "m", " ips", "um", " dol", "or", " sit", " ame", "t."],
+            4,
+        ),
+        # mass mentions
+        (
+            "@everyone listen to me!",
+            # TODO: off-by-one: " listen" and " to me!" should have been " listen to" and " me!"
+            ["@\u200beveryone", " listen", " to me!"],
+            10,
+        ),
+        (
+            "@everyone listen to me!",
+            ["@everyon", "e listen", " to me!"],
+            9,
+        ),
+        (
+            "@everyone listen to me!",
+            ["@everyon", "e", " listen", " to me!"],
+            8,
+        ),
+        ("Is anyone @here?", ["Is anyone", " @\u200bhere?"], 10),
+        # whitespace-only page skipping (`\n` skipped)
+        ("Split:\n Long-word", ["Split:", " Long-", "word"], 6),
+    ),
+)
+def test_pagify(text: str, pages: List[str], page_length: int):
+    result = []
+    for page in pagify(text, ("\n", " "), shorten_by=0, page_length=page_length):
+        # sanity check
+        assert len(page) <= page_length
+        result.append(page)
+
+    assert pages == result
+
+
+@pytest.mark.parametrize(
+    "text,pages,page_length",
+    (
+        # base case
+        (
+            "Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
+            [
+                "Line 1",
+                "\nA longer line",
+                " 2",
+                "\n'tis a",
+                " veeeeery long",
+                " line numero",
+                " tres",
+                "\nand the last",
+                " line",
+            ],
+            15,
+        ),
+        # mid-word split
+        (
+            "Interdisciplinary collaboration improves the quality\nof care.",
+            ["Interdisciplinar", "y collaboration", " improves the", " quality", "\nof care."],
+            16,
+        ),
+    ),
+)
+def test_pagify_priority(text: str, pages: List[str], page_length: int):
+    result = []
+    for page in pagify(text, ("\n", " "), priority=True, shorten_by=0, page_length=page_length):
+        # sanity check
+        assert len(page) <= page_length
+        result.append(page)
+
+    assert pages == result
+
+
+def test_pagify_length_hint():
+    it = pagify("A" * 100, shorten_by=0, page_length=10)
+    remaining = 100 // 10
+
+    assert operator.length_hint(it) == remaining
+
+    for page in it:
+        remaining -= 1
+        assert operator.length_hint(it) == remaining
+
+    assert operator.length_hint(it) == 0