Skip to content

Commit

Permalink
#82 *Improved UTF8 input/output via console on Windows.
Browse files Browse the repository at this point in the history
  • Loading branch information
paladin-t committed Oct 10, 2023
1 parent 67ef347 commit f5aba59
Showing 4 changed files with 71 additions and 10 deletions.
3 changes: 3 additions & 0 deletions HISTORY
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Oct. 10 2023
Improved UTF8 input/output via console on Windows

Sep. 18 2023
Improved handling of Unicode quotes

20 changes: 10 additions & 10 deletions core/my_basic.c
Original file line number Diff line number Diff line change
@@ -1346,11 +1346,11 @@ static bool_t mb_is_little_endian(void);

/** Unicode handling */

#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
static int mb_bytes_to_wchar(const char* sz, wchar_t** out, size_t size);
static int mb_bytes_to_wchar_ansi(const char* sz, wchar_t** out, size_t size);
static int mb_wchar_to_bytes(const wchar_t* sz, char** out, size_t size);
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */

static int mb_uu_getbom(const char** ch);
#ifdef MB_ENABLE_UNICODE
@@ -3346,7 +3346,7 @@ static bool_t mb_is_little_endian(void) {

/** Unicode handling */

#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
/* Map a UTF8 character string to a UTF16 (wide character) string */
static int mb_bytes_to_wchar(const char* sz, wchar_t** out, size_t size) {
int result = MultiByteToWideChar(CP_UTF8, 0, sz, -1, 0, 0);
@@ -3373,7 +3373,7 @@ static int mb_wchar_to_bytes(const wchar_t* sz, char** out, size_t size) {

return result;
}
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */

/* Determine whether a string begins with a BOM, and ignore it */
static int mb_uu_getbom(const char** ch) {
@@ -4926,7 +4926,7 @@ static int _standard_printer(mb_interpreter_t* s, const char* fmt, ...) {

/* Print a string */
static void _print_string(mb_interpreter_t* s, _object_t* obj) {
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
char* str = 0;
_dynamic_buffer_t buf;
size_t lbuf = 0;
@@ -4940,11 +4940,11 @@ static void _print_string(mb_interpreter_t* s, _object_t* obj) {
}
_get_printer(s)(s, "%ls", _WCHAR_BUF_PTR(buf));
_DISPOSE_BUF(buf);
#else /* MB_CP_VC && MB_ENABLE_UNICODE */
#else /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
mb_assert(s && obj);

_get_printer(s)(s, "%s", obj->data.string ? obj->data.string : MB_NULL_STRING);
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
}

/** Parsing helpers */
@@ -18610,7 +18610,7 @@ static int _std_input(mb_interpreter_t* s, void** l) {
safe_free(obj->data.variable->data->data.string);
}
len = (size_t)_get_inputer(s)(s, pmt, line, sizeof(line));
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE
#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING
do {
_dynamic_buffer_t buf;
_dynamic_buffer_t wbuf;
@@ -18629,12 +18629,12 @@ static int _std_input(mb_interpreter_t* s, void** l) {
obj->data.variable->data->data.string = _HEAP_CHAR_BUF(buf);
obj->data.variable->data->is_ref = false;
} while(0);
#else /* MB_CP_VC && MB_ENABLE_UNICODE */
#else /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING */
obj->data.variable->data->data.string = mb_memdup(line, (unsigned)(len + 1));
#if MB_PRINT_INPUT_CONTENT
_get_printer(s)(s, "%s\n", obj->data.variable->data->data.string);
#endif /* MB_PRINT_INPUT_CONTENT */
#endif /* MB_CP_VC && MB_ENABLE_UNICODE */
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && MB_UNICODE_NEED_CONVERTING*/
ast = ast->next;
} else {
_handle_error_on_obj(s, SE_RN_INVALID_ID_USAGE, s->source_file, DON(ast), MB_FUNC_ERR, _exit, result);
4 changes: 4 additions & 0 deletions core/my_basic.h
Original file line number Diff line number Diff line change
@@ -153,6 +153,10 @@ extern "C" {
# endif
#endif /* MB_ENABLE_UNICODE_ID */

#ifndef MB_UNICODE_NEED_CONVERTING
# define MB_UNICODE_NEED_CONVERTING 0
#endif /* MB_UNICODE_NEED_CONVERTING */

#ifndef MB_ENABLE_FORK
# define MB_ENABLE_FORK
#endif /* MB_ENABLE_FORK */
54 changes: 54 additions & 0 deletions shell/main.c
Original file line number Diff line number Diff line change
@@ -33,6 +33,8 @@
#ifdef MB_CP_VC
# include <conio.h>
# include <crtdbg.h>
# include <fcntl.h>
# include <io.h>
# include <Windows.h>
#elif !defined MB_CP_BORLANDC && !defined MB_CP_TCC
# include <unistd.h>
@@ -1440,6 +1442,49 @@ static int beep(struct mb_interpreter_t* s, void** l) {
** Callbacks and handlers
*/

#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING
static int _on_input(struct mb_interpreter_t* s, const char* pmt, char* buf, int n) {
int result = 0;
mb_unrefvar(s);
mb_unrefvar(pmt);

if(buf && n) {
int wlen = n;
int save = _setmode(_fileno(stdin), _O_U16TEXT);
wchar_t* wstr = malloc(wlen * sizeof(wchar_t));
if(fgetws(wstr, wlen, stdin) == 0) {
_setmode(_fileno(stdin), save);

free(wstr);

fprintf(stderr, "Error reading.\n");

exit(1);
}
int len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, 0, 0, 0, 0);
if(!len) {
_setmode(_fileno(stdin), save);

free(wstr);

fprintf(stderr, "Error reading.\n");

exit(1);
}
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, buf, n, 0, 0);
free(wstr);
_setmode(_fileno(stdin), save);
result = len - 1;
if(buf[result - 1] == '\n') {
buf[result - 1] = '\0';
result--;
}
}

return result;
}
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING */

static int _on_prev_stepped(struct mb_interpreter_t* s, void** l, const char* f, int p, unsigned short row, unsigned short col) {
mb_unrefvar(s);
mb_unrefvar(l);
@@ -1526,14 +1571,23 @@ static void _on_startup(void) {
srand((unsigned)_ticks());
#endif /* _HAS_TICKS */

#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING
SetConsoleOutputCP(CP_UTF8);
SetConsoleCP(CP_UTF8);
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING */

setlocale(LC_ALL, "");
setlocale(LC_CTYPE, "C");
setlocale(LC_NUMERIC, "C");
setlocale(LC_TIME, "C");

mb_init();

mb_open(&bas);

#if defined MB_CP_VC && defined MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING
mb_set_inputer(bas, _on_input);
#endif /* MB_CP_VC && MB_ENABLE_UNICODE && !MB_UNICODE_NEED_CONVERTING */
mb_debug_set_stepped_handler(bas, _on_prev_stepped, _on_post_stepped);
mb_set_error_handler(bas, _on_error);
mb_set_import_handler(bas, _on_import);

0 comments on commit f5aba59

Please sign in to comment.