Using perl
, works in any shell as long as the arguments are encoded in UTF-8:
$ perl -CA -E 'for my $arg (@ARGV) { say map { my $cp = ord; $cp > 127 ? sprintf "U+%04X", $cp : $_ } split //, $arg }' "My"
MyU+1F4D4
Non-ASCII codepoints are printed as U+XXXX (0-padded, more hex digits if needed), ASCII ones as human-readable letters.
Or for maximum speed, a C program that does the same:
// Compile with: gcc -o print_unicode -std=c11 -O -Wall -Wextra print_unicode.c
#include <assert.h>
#include <inttypes.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <uchar.h>
#if __STDC_VERSION__ < 201112L
#error "Need C11 or newer"
#endif
#ifndef __STDC_UTF_32__
#error "Not using Unicode"
#endif
int main(int argc, char **argv) {
// arguments should be encoded according to locale's character set
setlocale(LC_CTYPE, "");
for (int i = 1; i < argc; i++) {
char *s = argv[i];
size_t len = strlen(argv[i]);
mbstate_t state;
memset(&state, 0, sizeof state);
while (len > 0) {
char32_t c;
size_t rc = mbrtoc32(&c, s, len, &state);
assert(rc != (size_t)-3);
if (rc == (size_t)-1) {
perror("mbrtoc32");
return EXIT_FAILURE;
} else if (rc == (size_t)-2) {
fprintf(stderr, "Argument %d is incomplete!\n", i);
return EXIT_FAILURE;
} else if (rc > 0) {
if (c > 127) {
printf("U+%04" PRIXLEAST32, c);
} else {
putchar((char)c);
}
s += rc;
len -= rc;
}
putchar('\n');
}
return 0;
}
$ ./print_unicode "My"
MyU+1F4D4