Improve performance of pagify() (#5698)

Co-authored-by: jack1142 <6032823+jack1142@users.noreply.github.com>
This commit is contained in:
Jakub Kuczys
2023-04-13 20:52:54 +02:00
committed by GitHub
parent 79d11e947c
commit 533f036ed2
3 changed files with 206 additions and 35 deletions

View File

@@ -1,5 +1,8 @@
from __future__ import annotations
import datetime
import itertools
import math
import textwrap
from io import BytesIO
from typing import Iterator, List, Optional, Sequence, SupportsInt, Union
@@ -200,17 +203,11 @@ def spoiler(text: str, escape_formatting: bool = True) -> str:
return f"||{escape(text, formatting=escape_formatting)}||"
def pagify(
text: str,
delims: Sequence[str] = ["\n"],
*,
priority: bool = False,
escape_mass_mentions: bool = True,
shorten_by: int = 8,
page_length: int = 2000,
) -> Iterator[str]:
class pagify(Iterator[str]):
"""Generate multiple pages from the given text.
The returned iterator supports length estimation with :func:`operator.length_hint()`.
Note
----
This does not respect code blocks or inline code.
@@ -244,33 +241,82 @@ def pagify(
Pages of the given text.
"""
in_text = text
page_length -= shorten_by
while len(in_text) > page_length:
this_page_len = page_length
if escape_mass_mentions:
this_page_len -= in_text.count("@here", 0, page_length) + in_text.count(
"@everyone", 0, page_length
)
closest_delim = (in_text.rfind(d, 1, this_page_len) for d in delims)
if priority:
closest_delim = next((x for x in closest_delim if x > 0), -1)
else:
closest_delim = max(closest_delim)
closest_delim = closest_delim if closest_delim != -1 else this_page_len
if escape_mass_mentions:
to_send = escape(in_text[:closest_delim], mass_mentions=True)
else:
to_send = in_text[:closest_delim]
if len(to_send.strip()) > 0:
yield to_send
in_text = in_text[closest_delim:]
if len(in_text.strip()) > 0:
if escape_mass_mentions:
yield escape(in_text, mass_mentions=True)
else:
yield in_text
# when changing signature of this method, please update it in docs/framework_utils.rst as well
def __init__(
self,
text: str,
delims: Sequence[str] = ("\n",),
*,
priority: bool = False,
escape_mass_mentions: bool = True,
shorten_by: int = 8,
page_length: int = 2000,
) -> None:
self._text = text
self._delims = delims
self._priority = priority
self._escape_mass_mentions = escape_mass_mentions
self._shorten_by = shorten_by
self._page_length = page_length - shorten_by
self._start = 0
self._end = len(text)
def __repr__(self) -> str:
text = self._text
if len(text) > 20:
text = f"{text[:19]}\N{HORIZONTAL ELLIPSIS}"
return (
"pagify("
f"{text!r},"
f" {self._delims!r},"
f" priority={self._priority!r},"
f" escape_mass_mentions={self._escape_mass_mentions!r},"
f" shorten_by={self._shorten_by!r},"
f" page_length={self._page_length + self._shorten_by!r}"
")"
)
def __length_hint__(self) -> int:
return math.ceil((self._end - self._start) / self._page_length)
def __iter__(self) -> pagify:
return self
def __next__(self) -> str:
text = self._text
escape_mass_mentions = self._escape_mass_mentions
page_length = self._page_length
start = self._start
end = self._end
while (end - start) > page_length:
stop = start + page_length
if escape_mass_mentions:
stop -= text.count("@here", start, stop) + text.count("@everyone", start, stop)
closest_delim_it = (text.rfind(d, start + 1, stop) for d in self._delims)
if self._priority:
closest_delim = next((x for x in closest_delim_it if x > 0), -1)
else:
closest_delim = max(closest_delim_it)
stop = closest_delim if closest_delim != -1 else stop
if escape_mass_mentions:
to_send = escape(text[start:stop], mass_mentions=True)
else:
to_send = text[start:stop]
start = self._start = stop
if len(to_send.strip()) > 0:
return to_send
if len(text[start:end].strip()) > 0:
self._start = end
if escape_mass_mentions:
return escape(text[start:end], mass_mentions=True)
else:
return text[start:end]
raise StopIteration
def strikethrough(text: str, escape_formatting: bool = True) -> str: