From de0349ba6f85bbb9b42b0ff99dfefc1312f0fd30 Mon Sep 17 00:00:00 2001 From: ubq323 Date: Sun, 26 Nov 2023 18:21:04 +0000 Subject: update unidump --- bin/unidump | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 bin/unidump (limited to 'bin/unidump') diff --git a/bin/unidump b/bin/unidump new file mode 100755 index 0000000..702f8cc --- /dev/null +++ b/bin/unidump @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +# unidump version 0.1.1 +# made by ubq323 in the year 2021 +# please use this software for GOOD, not for EVIL + +import unicodedata +import sys + +uniquify = True +arg="" +args = sys.argv[1:] +while len(args) > 0: + arg = args.pop(0) + if arg[0] != '-' or arg == "--": + break + if arg == "-a": + uniquify = False + arg = "" + if arg == "-h" or arg == "--help": + print(f"usage: {sys.argv[0]} [-a] [text...]") + print("\t-a: don't deduplicate input characters") + print("\tif no text supplied, takes input from stdin") + sys.exit(0) + + +s = arg+" ".join(args) + +if len(s) == 0: + s = sys.stdin.read() + +def row(c): + try: + name = unicodedata.name(c).rjust(50) + except ValueError: + name = " "*50 + + number = ("U+"+hex(ord(c))[2:].zfill(4)).rjust(7).upper() + + to_c = c + if c == '\n': + to_c = ' ' + + cat = unicodedata.category(c) + if cat[0] == "M": + to_c = chr(0x25cc) + to_c + + + return f"{to_c} | {unicodedata.category(c)} | {name} | {number}" + +seen = set() +for c in s: + if uniquify: + if c in seen: + continue + seen.add(c) + print(row(c)) -- cgit v1.2.3