Improve performance of pagify() (#5698)

Co-authored-by: jack1142 <6032823+jack1142@users.noreply.github.com>
This commit is contained in:
Jakub Kuczys
2023-04-13 20:52:54 +02:00
committed by GitHub
parent 79d11e947c
commit 533f036ed2
3 changed files with 206 additions and 35 deletions

View File

@@ -1,5 +1,6 @@
import asyncio
import pytest
import operator
import random
from redbot.core.utils import (
bounded_gather,
@@ -7,6 +8,8 @@ from redbot.core.utils import (
deduplicate_iterables,
common_filters,
)
from redbot.core.utils.chat_formatting import pagify
from typing import List
def test_deduplicate_iterables():
@@ -137,3 +140,121 @@ async def test_bounded_gather_iter_cancel():
def test_normalize_smartquotes():
assert common_filters.normalize_smartquotes("Should\u2018 normalize") == "Should' normalize"
assert common_filters.normalize_smartquotes("Same String") == "Same String"
@pytest.mark.parametrize(
"text,pages,page_length",
(
# base case
(
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
[
"Line 1\nA",
" longer line 2",
"\n'tis a",
" veeeeery long",
" line numero",
" tres\nand the",
" last line",
],
15,
),
# mid-word split
(
"Interdisciplinary collaboration improves the quality\nof care.",
["Interdisciplinar", "y collaboration", " improves the", " quality\nof", " care."],
16,
),
# off-by-one errors
("Lorem ipsum dolor sit amet.", ["Lorem", " ipsum", " dolor", " sit", " amet."], 6),
(
"Lorem ipsum dolor sit amet.",
# TODO: "r" and " sit" can fit together but current logic doesn't support it properly
["Lorem", " ipsu", "m", " dolo", "r", " sit", " amet", "."],
5,
),
(
"Lorem ipsum dolor sit amet.",
["Lore", "m", " ips", "um", " dol", "or", " sit", " ame", "t."],
4,
),
# mass mentions
(
"@everyone listen to me!",
# TODO: off-by-one: " listen" and " to me!" should have been " listen to" and " me!"
["@\u200beveryone", " listen", " to me!"],
10,
),
(
"@everyone listen to me!",
["@everyon", "e listen", " to me!"],
9,
),
(
"@everyone listen to me!",
["@everyon", "e", " listen", " to me!"],
8,
),
("Is anyone @here?", ["Is anyone", " @\u200bhere?"], 10),
# whitespace-only page skipping (`\n` skipped)
("Split:\n Long-word", ["Split:", " Long-", "word"], 6),
),
)
def test_pagify(text: str, pages: List[str], page_length: int):
result = []
for page in pagify(text, ("\n", " "), shorten_by=0, page_length=page_length):
# sanity check
assert len(page) <= page_length
result.append(page)
assert pages == result
@pytest.mark.parametrize(
"text,pages,page_length",
(
# base case
(
"Line 1\nA longer line 2\n'tis a veeeeery long line numero tres\nand the last line",
[
"Line 1",
"\nA longer line",
" 2",
"\n'tis a",
" veeeeery long",
" line numero",
" tres",
"\nand the last",
" line",
],
15,
),
# mid-word split
(
"Interdisciplinary collaboration improves the quality\nof care.",
["Interdisciplinar", "y collaboration", " improves the", " quality", "\nof care."],
16,
),
),
)
def test_pagify_priority(text: str, pages: List[str], page_length: int):
result = []
for page in pagify(text, ("\n", " "), priority=True, shorten_by=0, page_length=page_length):
# sanity check
assert len(page) <= page_length
result.append(page)
assert pages == result
def test_pagify_length_hint():
it = pagify("A" * 100, shorten_by=0, page_length=10)
remaining = 100 // 10
assert operator.length_hint(it) == remaining
for page in it:
remaining -= 1
assert operator.length_hint(it) == remaining
assert operator.length_hint(it) == 0