mirror of
https://github.com/HorlogeSkynet/SgEExt
synced 2024-11-24 20:00:09 +01:00
302 lines
11 KiB
Python
302 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
SgEExt is a Simple gemoji Emoji Extractor script, to download (mostly from GitHub) gemoji PNGs.
|
|
The logic is simple but offers some options, just run `[python3] sgeext[.py] -h` to list them.
|
|
"""
|
|
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
|
|
from shutil import copyfile
|
|
from subprocess import check_output, CalledProcessError, DEVNULL
|
|
from typing import List, Optional
|
|
|
|
import requests
|
|
|
|
|
|
# Emojis and "regular" images are served by GitHub here.
|
|
GITHUB_ASSETS_BASE_URL = "https://github.githubassets.com/images/icons/emoji/{}.png"
|
|
# The emojis database from the gemoji project is hosted here.
|
|
EMOJI_DB_URL = "https://github.com/github/gemoji/raw/master/db/emoji.json"
|
|
|
|
|
|
def open_and_load_emojis_db(file_path: str) -> List[dict]:
|
|
"""Open `file_path`, load its content as JSON and return it"""
|
|
try:
|
|
with open(file_path, mode="rb") as f_emojis_db:
|
|
emojis_db = json.load(f_emojis_db)
|
|
|
|
except (FileNotFoundError, json.JSONDecodeError) as error:
|
|
emojis_db = []
|
|
logging.error("Could not read or load the emojis database : %s.", error)
|
|
|
|
return emojis_db
|
|
|
|
|
|
def download_file(url: str, path: str = None, force: bool = False, real_name: str = None) -> bool:
|
|
"""
|
|
Download a file specified by `url` and save it locally under `path`.
|
|
Normalize path and / or create non-existing directory structure.
|
|
Returns `True` on success, and `False` on error.
|
|
See <https://stackoverflow.com/a/16696317/10599709>
|
|
"""
|
|
if not path:
|
|
path = os.getcwd()
|
|
elif not os.path.exists(path):
|
|
os.makedirs(path, mode=0o755)
|
|
|
|
# Save this entity under the specified name, or directly its remote name.
|
|
if real_name:
|
|
file_name = os.path.join(path, real_name + ".png")
|
|
else:
|
|
file_name = os.path.join(path, url.split("/")[-1])
|
|
|
|
if not force and os.path.exists(file_name):
|
|
# This file already exists, skip it when running non-force mode.
|
|
logging.info("The file %s already exists, run `-f` to download it again.", file_name)
|
|
return True
|
|
|
|
logging.info("Downloading <%s> to %s", url, file_name)
|
|
|
|
with requests.get(url, stream=True) as get_request:
|
|
if get_request.status_code != 200:
|
|
# This URL does not exist ; Don't try to download a thing !
|
|
logging.warning("The URL above does not exist, can't download.")
|
|
return False
|
|
|
|
with open(file_name, "wb") as f_image:
|
|
for chunk in get_request.iter_content(chunk_size=8192):
|
|
if chunk:
|
|
f_image.write(chunk)
|
|
|
|
f_image.flush()
|
|
|
|
return True
|
|
|
|
|
|
def localize_emoji_install() -> Optional[str]:
|
|
"""Return the root path of the local gemoji gem install, or `None`"""
|
|
try:
|
|
# Try to retrieve the path of the local installation of Gemoji Ruby gem.
|
|
gem_wich_gemoji_output = check_output(
|
|
["gem", "which", "gemoji"],
|
|
universal_newlines=True,
|
|
stderr=DEVNULL,
|
|
).strip()
|
|
except (FileNotFoundError, CalledProcessError) as error:
|
|
# Local gem not available ? Not an issue, we will figure something out.
|
|
logging.info("Localization of the gemoji gem installation failed : %s.", error)
|
|
return None
|
|
|
|
# Now, try to extract its grand-parent location.
|
|
# Usually `/var/lib/gems/X.Y.Z/gems/gemoji-T.U.V/lib/gemoji.rb` on GNU/Linux.
|
|
# Please check <https://github.com/github/gemoji> project structure.
|
|
escaped_path_sep = re.escape(os.sep)
|
|
gemoji_local_path = re.fullmatch(
|
|
rf"^(.+?{escaped_path_sep}gemoji-.+?{escaped_path_sep})lib{escaped_path_sep}gemoji\.rb$",
|
|
gem_wich_gemoji_output,
|
|
)
|
|
if gemoji_local_path is None:
|
|
logging.info(
|
|
"gemoji looks installed on your system, but couldn't locate it precisely."
|
|
" Please open an issue on the project repository."
|
|
)
|
|
return None
|
|
|
|
logging.info("Found gemoji gem installation folder : %s", gemoji_local_path.group(1))
|
|
return gemoji_local_path.group(1)
|
|
|
|
|
|
def retrieve_emoji_db(gemoji_local_path: str = None) -> List[dict]:
|
|
"""
|
|
This function tries anyhow to open and load an emoji database.
|
|
It may end up locally (see `gemoji_local_path`), or remote (gemoji sources on GitHub).
|
|
"""
|
|
# Now, let's try to load the emojis database (JSON).
|
|
if gemoji_local_path:
|
|
return open_and_load_emojis_db(os.path.join(gemoji_local_path + "db", "emoji.json"))
|
|
|
|
# If we don't have it locally, just temporarily fetch it from the GitHub project.
|
|
download_file(EMOJI_DB_URL)
|
|
emojis_db_local_file = os.path.join(os.getcwd(), "emoji.json")
|
|
emojis_db = open_and_load_emojis_db(emojis_db_local_file)
|
|
os.remove(emojis_db_local_file)
|
|
logging.info("The temporarily emojis database (%s) has been removed.", emojis_db_local_file)
|
|
|
|
return emojis_db
|
|
|
|
|
|
def handle_emoji_extraction(
|
|
emoji: dict, first_alias: str, path: str, force: bool, real_names: bool
|
|
):
|
|
"""Simple function reduce `perform_emojis_extraction` cyclomatic complexity"""
|
|
|
|
# Extract emoji Unicode value, and format it as an hexadecimal string.
|
|
code = "".join(format(ord(char), "x") for char in emoji["emoji"])
|
|
|
|
# Some emojis contain a "variation selector" at the end of their Unicode value.
|
|
# VS-15 : U+FE0E || VS-16 : U+FE0F
|
|
code = re.sub(r"fe0[ef]$", "", code, re.IGNORECASE)
|
|
|
|
# For "shrugging" emojis only (`1f937-*`), we have to replace `200d` by a real hyphen.
|
|
code = re.sub(r"^(1f937)(?:200d)(.*)$", r"\1-\2", code, re.IGNORECASE)
|
|
|
|
# For "flags" emojis only (`1f1??1f1??`), we have to add an extra hyphen...
|
|
code = re.sub(r"^(1f1)(..)(1f1)(..)$", r"\1\2-\3\4", code, re.IGNORECASE)
|
|
|
|
logging.info("Inferred %s Unicode value for %s", code, first_alias)
|
|
|
|
return download_file(
|
|
url=GITHUB_ASSETS_BASE_URL.format("unicode/" + code),
|
|
path=os.path.join(path, "unicode"),
|
|
force=force,
|
|
real_name=(first_alias if real_names else None),
|
|
)
|
|
|
|
|
|
def handle_github_emojis(
|
|
first_alias: str, path: str, force: bool, gemoji_local_path: str = None
|
|
) -> bool:
|
|
"""Simple function reducing `perform_emojis_extraction` cyclomatic complexity"""
|
|
if not gemoji_local_path:
|
|
# I told you it was not an issue, let's download it as well !
|
|
return download_file(url=GITHUB_ASSETS_BASE_URL.format(first_alias), path=path, force=force)
|
|
|
|
# We already have it locally somewhere, just copy it...
|
|
image_name = first_alias + ".png"
|
|
image_local_path = os.path.join(path, image_name)
|
|
if not force and os.path.exists(image_local_path):
|
|
# This file already exists, skip it when running non-force mode.
|
|
logging.info("The file %s already exists, run `-f` to copy it again.", image_local_path)
|
|
else:
|
|
logging.info("Copying %s from your local system.", image_local_path)
|
|
copyfile(os.path.join(gemoji_local_path, "images", image_name), image_local_path)
|
|
|
|
return True
|
|
|
|
|
|
def perform_emojis_extraction(
|
|
path: str, force: bool, subset: List[str], real_names: bool, only_real_emojis: bool
|
|
):
|
|
"""
|
|
Effectively perform the emojis extraction.
|
|
By default, run extraction on the whole set.
|
|
The `subset` parameter allows the user to provide a specific list of emojis.
|
|
"""
|
|
|
|
gemoji_local_path = localize_emoji_install()
|
|
emojis_db = retrieve_emoji_db(gemoji_local_path)
|
|
|
|
# Iterate over the elements, looking for "real" emojis and "regular" images.
|
|
i = 0
|
|
for emoji in emojis_db:
|
|
if subset:
|
|
# Intersect our `subset` names with this emoji's aliases !
|
|
# This allows us to "find" emojis whose user-supplied name is an alternative.
|
|
# For instance : Match `bow`, even if its "official" name is `bowing_man`.
|
|
match_names = set(emoji["aliases"]) & set(subset)
|
|
if not match_names:
|
|
continue
|
|
|
|
# The _first_ alias in the list is effectively used to compute its Unicode value.
|
|
first_alias = emoji["aliases"][0]
|
|
|
|
if "emoji" in emoji:
|
|
if handle_emoji_extraction(emoji, first_alias, path, force, real_names):
|
|
i += 1
|
|
|
|
elif not only_real_emojis:
|
|
# Those are GitHub "fake" emojis ("regular" images).
|
|
if handle_github_emojis(first_alias, path, force, gemoji_local_path):
|
|
i += 1
|
|
|
|
if subset:
|
|
# The operations above _should_ be OK, we may remove these elements from the set.
|
|
for match_name in match_names:
|
|
subset.remove(match_name)
|
|
if not subset:
|
|
# We reached the end of the user-supplied elements. We may stop the iteration.
|
|
break
|
|
|
|
# At this moment, `subset` contains only the elements that have not been found...
|
|
if subset:
|
|
logging.warning("The following emojis have not been found : %s", "', '".join(subset))
|
|
|
|
logging.info("Successfully downloaded / copied %i emojis !", i)
|
|
|
|
|
|
def main():
|
|
"""Simple entry point"""
|
|
parser = argparse.ArgumentParser(
|
|
description="A simple gemoji emojis extractor (for non macOS users)"
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--directory",
|
|
type=str,
|
|
default=os.path.join(os.getcwd(), "emoji"),
|
|
help="Extraction path location",
|
|
)
|
|
parser.add_argument(
|
|
"-f",
|
|
"--force",
|
|
default=False,
|
|
action="store_true",
|
|
help="Force file download, even if they already exist",
|
|
)
|
|
parser.add_argument(
|
|
"-l",
|
|
"--list",
|
|
type=str,
|
|
default=[],
|
|
nargs="+",
|
|
help="List of emojis aliases to operate on",
|
|
)
|
|
parser.add_argument(
|
|
"-n",
|
|
"--names",
|
|
default=False,
|
|
action="store_true",
|
|
help='Save emojis under their "real" name instead of Unicode',
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--only-emojis",
|
|
default=False,
|
|
action="store_true",
|
|
help='Ignores "fake" emojis (images) added by GitHub',
|
|
)
|
|
parser.add_argument(
|
|
"-v",
|
|
"--verbose",
|
|
default=False,
|
|
action="store_true",
|
|
help="Show debugging logs and monitor progression",
|
|
)
|
|
parser.add_argument(
|
|
"--version",
|
|
action="version",
|
|
version="%(prog)s : 2.6.0",
|
|
)
|
|
|
|
# Normalize the user-supplied target directory.
|
|
args = parser.parse_args()
|
|
|
|
# Set format and level for logging.
|
|
logging.basicConfig(
|
|
format="[%(levelname)s] : %(message)s",
|
|
level=(logging.INFO if args.verbose else logging.WARNING),
|
|
)
|
|
|
|
# EXTRACT ALL-THE-THINGS !
|
|
perform_emojis_extraction(args.directory, args.force, args.list, args.names, args.only_emojis)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|