mirror of
https://github.com/ublue-os/bazzite.git
synced 2025-01-01 03:21:41 +00:00
docs: Always fetch the highest quality image by fetch_discourse_md
This commit is contained in:
parent
433dd2d3dd
commit
74130ee1fe
@ -109,8 +109,9 @@ class DiscourseProcessor:
|
||||
class Patterns:
|
||||
post_sep_markdown = re.compile(r"-------------------------")
|
||||
imgs_urls = re.compile(
|
||||
r"<img\ssrc=\"(?P<image_cdn_url>https://(?:[a-zA-Z0-9./_-]+)).*data-base62-sha1=\"(?P<sha1>[a-zA-Z0-9]+)\".*\">"
|
||||
r"(?P<tag><img\ssrc=\"(?P<image_cdn_url>https://(?:[a-zA-Z0-9./_-]+))\"\salt=\"(?P<alt>(?:\w+\s?)+)\".*data-base62-sha1=\"(?P<sha1>[a-zA-Z0-9]+)\".*\">)"
|
||||
)
|
||||
imgs_urls_biggest_img = r"(?<=srcset=\").*1\.5x,\s(https://.*)\s2x\""
|
||||
hashed_images_urls = re.compile(r"upload://[a-zA-Z0-9]{27}\.(?:jpe?g|png|svg)")
|
||||
|
||||
@staticmethod
|
||||
@ -191,7 +192,11 @@ class DiscourseProcessor:
|
||||
def get_images_url_assocs_from_page(cls, page: HTMLPage) -> ImageUrlAssocs:
|
||||
result: list[tuple] = []
|
||||
for match in re.finditer(DiscourseProcessor.Patterns.imgs_urls, page):
|
||||
(sha1, image_cdn_url) = match.group("sha1", "image_cdn_url")
|
||||
(sha1, image_cdn_url, img_tag) = match.group("sha1", "image_cdn_url", "tag")
|
||||
if img_big := re.search(
|
||||
DiscourseProcessor.Patterns.imgs_urls_biggest_img, img_tag
|
||||
):
|
||||
image_cdn_url = img_big.group(1)
|
||||
result.append((sha1, image_cdn_url))
|
||||
return result
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user