diff --git a/NEWS b/NEWS
index 1d546c5..3916943 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,10 @@
 	conversion routines will output debugging information
 	when strings are converted from R to Java.
 
+    o	JRI: console callbacks (Read/WriteConsole) encode and decode
+	strings between native encoding and Java. (#24)
+	Previously, only UTF-8 native locales were supported.
+
 
 1.0-6	2021-12-10
     o	remove obsolete autoconf macros
diff --git a/jri/src/Makefile.all b/jri/src/Makefile.all
index 081a9b2..16b7851 100644
--- a/jri/src/Makefile.all
+++ b/jri/src/Makefile.all
@@ -15,7 +15,10 @@ JRI.jar: $(JRI_JSRC) $(JNIPREFIX)jri$(JNISO)
 org_rosuda_JRI_Rengine.h: org/rosuda/JRI/Rengine.class
 	if [ -n "$(JAVAH)" ]; then $(JAVAH) -d . -classpath . org.rosuda.JRI.Rengine; fi
 
-Rcallbacks.o: Rcallbacks.c Rcallbacks.h globals.h org_rosuda_JRI_Rengine.h
+Rcallbacks.o: Rcallbacks.c Rcallbacks.h globals.h rjstring.h org_rosuda_JRI_Rengine.h
+	$(CC) -c -o $@ $< $(CFLAGS) $(CPICF) $(JAVAINC) $(RINC) $(JRI_CPPFLAGS)
+
+rjstring.o: rjstring.c rjstring.h
 	$(CC) -c -o $@ $< $(CFLAGS) $(CPICF) $(JAVAINC) $(RINC) $(JRI_CPPFLAGS)
 
 Rinit.o: Rinit.c Rinit.h Rcallbacks.h
@@ -33,7 +36,7 @@ Rengine.o: Rengine.c org_rosuda_JRI_Rengine.h globals.h Rcallbacks.h Rinit.h
 jri.o: jri.c
 	$(CC) -c -o $@ jri.c $(CFLAGS) $(CPICF) $(JAVAINC) $(RINC) $(JRI_CPPFLAGS)
 
-$(JNIPREFIX)jri$(JNISO): Rengine.o jri.o Rcallbacks.o Rinit.o globals.o rjava.o $(JRIDEPS)
+$(JNIPREFIX)jri$(JNISO): Rengine.o jri.o Rcallbacks.o Rinit.o globals.o rjava.o rjstring.o $(JRIDEPS)
 	$(CC) -o $@ $^ $(LDFLAGS) $(JNILD) $(RLD) $(JRI_LIBS)
 
 win32/libjvm.dll.a:
diff --git a/jri/src/Rcallbacks.c b/jri/src/Rcallbacks.c
index a4e0130..39f0e3c 100644
--- a/jri/src/Rcallbacks.c
+++ b/jri/src/Rcallbacks.c
@@ -5,6 +5,7 @@
 #include "globals.h"
 #include "Rdecl.h"
 #include "Rcallbacks.h"
+#include "rjstring.h"
 
 #include "org_rosuda_JRI_Rengine.h"
 #include <R_ext/Parse.h>
@@ -64,12 +65,15 @@ JNIEnv *checkEnvironment()
 
 int Re_ReadConsole(RCCONST char *prompt, RCSIGN char *buf, int len, int addtohistory)
 {
-	jstring r,s;
+	jstring r, s;
 	jmethodID mid;
-    JNIEnv *lenv=checkEnvironment();
-	
-    if (!lenv || !engineObj) return -1;
-	
+	JNIEnv *lenv=checkEnvironment();
+	const void *vmax = 0;
+	int ret = -1;
+	const char *c = 0;
+
+	if (!lenv || !engineObj) return -1;
+
 	jri_checkExceptions(lenv, 1);
 	mid=(*lenv)->GetMethodID(eenv, engineClass, "jriReadConsole", "(Ljava/lang/String;I)Ljava/lang/String;");
 #ifdef JRI_DEBUG
@@ -77,28 +81,46 @@ int Re_ReadConsole(RCCONST char *prompt, RCSIGN char *buf, int len, int addtohis
 #endif
 	jri_checkExceptions(lenv, 0);
 	if (!mid) return -1;
-		
-	s=(*lenv)->NewStringUTF(eenv, prompt);
-	r=(jstring) (*lenv)->CallObjectMethod(lenv, engineObj, mid, s, addtohistory);
+	vmax = vmaxget();
+	s = rj_newNativeJavaString(lenv, prompt, -1);
+	vmaxset(vmax);
+	if (!s) return -1;
+	r = (jstring) (*lenv)->CallObjectMethod(lenv, engineObj, mid, s, addtohistory);
 	jri_checkExceptions(lenv, 1);
 	(*lenv)->DeleteLocalRef(lenv, s);
 	jri_checkExceptions(lenv, 0);
-	if (r) {
-		const char *c=(*lenv)->GetStringUTFChars(lenv, r, 0);
-		if (!c) return -1;
-		{
-			int l=strlen(c);
-			strncpy((char*)buf, c, (l>len-1)?len-1:l);
-			buf[(l>len-1)?len-1:l]=0;
+	while (r) {
+		/* get string in Java UTF-8 */
+		c = (*lenv)->GetStringUTFChars(lenv, r, 0);
+		if (!c) break;
+		vmax = vmaxget();
+
+		/* convert from Java UTF-8 to real UTF-8 in a CHARSXP */
+		SEXP sRes = rj_mkCharUTF8_noerr(c);
+		if (!sRes) {
+			vmaxset(vmax);
+			break;
+		}
+
+		/* UTF8 -> native */
+		const char *rc = Rf_translateChar(sRes);
+		int l = strlen(rc);
+		strncpy((char*)buf, rc, (l > len - 1) ? len - 1 : l);
+		vmaxset(vmax);
+
+		/* truncate if needed */
+		buf[(l > len - 1) ? len - 1 : l] = 0;
 #ifdef JRI_DEBUG
-			printf("Re_ReadConsole succeeded: \"%s\"\n",buf);
+		printf("Re_ReadConsole succeeded: \"%s\"\n", buf);
 #endif
-		}
-		(*lenv)->ReleaseStringUTFChars(lenv, r, c);
+		ret = 1;
+		break;
+	}
+	if (r) {
+		if (c) (*lenv)->ReleaseStringUTFChars(lenv, r, c);
 		(*lenv)->DeleteLocalRef(lenv, r);
-		return 1;
-    }
-    return -1;
+	}
+	return ret;
 }
 
 void Re_Busy(int which)
@@ -118,20 +140,27 @@ void Re_Busy(int which)
 
 void Re_WriteConsoleEx(RCCONST char *buf, int len, int oType)
 {
-    JNIEnv *lenv=checkEnvironment();
-    jri_checkExceptions(lenv, 1);
-    {
-      jstring s=(*lenv)->NewStringUTF(lenv, buf);
-      jmethodID mid=(*lenv)->GetMethodID(lenv, engineClass, "jriWriteConsole", "(Ljava/lang/String;I)V");
-      jri_checkExceptions(lenv, 0);
+	JNIEnv *lenv = checkEnvironment();
+	jri_checkExceptions(lenv, 1);
+
+	const void *vmax = vmaxget();
+	jstring s = rj_newNativeJavaString(lenv, buf, len);
+	vmaxset(vmax);
+	if (!s) {
 #ifdef JRI_DEBUG
-      printf("jriWriteConsole mid=%x\n", mid);
+		printf("jriWriteConsole rj_newNativeJavaString() FAILED!\n");
 #endif
-      if (!mid) return;
-      (*lenv)->CallVoidMethod(lenv, engineObj, mid, s, oType);
-      jri_checkExceptions(lenv, 1);
-      (*lenv)->DeleteLocalRef(lenv, s);
-    }
+		return;
+	}
+	jmethodID mid = (*lenv)->GetMethodID(lenv, engineClass, "jriWriteConsole", "(Ljava/lang/String;I)V");
+	jri_checkExceptions(lenv, 0);
+#ifdef JRI_DEBUG
+	printf("jriWriteConsole mid=%x\n", mid);
+#endif
+	if (!mid) return;
+	(*lenv)->CallVoidMethod(lenv, engineObj, mid, s, oType);
+	jri_checkExceptions(lenv, 1);
+	(*lenv)->DeleteLocalRef(lenv, s);
 }
 
 /* old-style WriteConsole (for old R versions only) */
diff --git a/jri/src/rjstring.c b/jri/src/rjstring.c
new file mode 100644
index 0000000..6fce28b
--- /dev/null
+++ b/jri/src/rjstring.c
@@ -0,0 +1,231 @@
+#include "rjstring.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <R_ext/Riconv.h>
+#include <errno.h>
+
+#ifdef WIN32
+/* -- currently unused - was used to mimick reEnc()
+   extern unsigned int localeCP; 
+   static char cpbuf[16]; */
+#endif
+static jchar js_zero[2] = { 0, 0 };
+static jchar js_buf[128];
+
+/* if len = -1 then c is assumed to be NUL terminated */
+int rj_char_utf16(const char *c, int len, jchar **buf, const char *ifrom, int can_error) {
+    void *ih;
+    const char *ce = (len < 0) ? strchr(c, 0) : (c + len);
+    if (ce == c) {
+	buf[0] = js_zero;
+	return 0;
+    }
+    size_t osize = sizeof(jchar) * (ce - c + 1), isize = ce - c;
+    jchar *js = buf[0] = (osize < sizeof(js_buf)) ? js_buf : (jchar*) R_alloc(sizeof(jchar), ce - c + 1);
+    char *dst = (char*) js;
+    int end_test = 1, is_le = (((char*)&end_test)[0] == 1) ? 1 : 0;
+    if (!ifrom) ifrom = "";
+
+#ifdef DEBUG_ENCODING
+    fprintf(stderr, "rJava.rj_char_utf16_native:");
+    { const char *c0 = c; while (*c0) fprintf(stderr, " %02x", (int)((unsigned char)*(c0++))); }
+    fprintf(stderr, "\n");
+#endif
+
+    ih = Riconv_open(is_le ? "UTF-16LE" : "UTF-16BE", ifrom);
+    if (ih == (void *)(-1)) {
+	if (can_error)
+	    Rf_error("Unable to start conversion to UTF-16");
+	return -1;
+    }
+    while (c < ce) {
+	size_t res = Riconv(ih, &c, &isize, &dst, &osize);
+	/* this should never happen since we allocated far more than needed */
+	if (res == -1 && errno == E2BIG) {
+	    if (can_error)
+		Rf_error("Conversion to UTF-16 failed due to unexpectedly large buffer requirements.");
+	    return -1;
+	} else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) { /* invalid char */
+	    if (is_le) {
+		*(dst++) = '?';
+		*(dst++) = 0;
+	    } else {
+		*(dst++) = 0;
+		*(dst++) = '?';
+	    }
+	    osize -= 2;
+	    c++;
+	    isize--;
+	}
+    }
+    Riconv_close(ih);
+#ifdef DEBUG_ENCODING
+    { const jchar *j = js; while (j < (const jchar*)dst) fprintf(stderr, " %04x", (unsigned int)*(j++)); }
+    fprintf(stderr, "\n");
+#endif
+    return dst - (char*) js;
+}
+
+/* returns string from a CHARSXP making sure that the result is in UTF-16.
+   the buffer is owned by the function and may be static, so copy after use.
+
+   Returns the length of the resulting string or -1 on error (if
+   can_error is 0).
+ */
+static int rj_CHARSXP_utf16_(SEXP s, jchar **buf, int can_error) {
+    cetype_t ce_in = getCharCE(s);
+    const char *ifrom = "", *c = CHAR(s), *ce = strchr(c, 0);
+    if (ce == c) {
+	buf[0] = js_zero;
+	return 0;
+    }
+
+    switch (ce_in) {
+#ifdef WIN32
+    case CE_NATIVE:
+/* reEnc uses this, but translateCharUtf8 uses "" so let's go with ""
+	sprintf(cpbuf, "CP%d", localeCP);
+	ifrom = cpbuf;
+*/
+	break;
+    case CE_LATIN1: ifrom = "CP1252"; break;
+#else
+    case CE_NATIVE: break; /* is already "" */
+    case CE_LATIN1: ifrom = "latin1"; break;
+#endif
+    default:
+	ifrom = "UTF-8"; break;
+    }
+
+    return rj_char_utf16(c, ce - c, buf, ifrom, can_error);
+}
+
+int rj_rchar_utf16(SEXP s, jchar **buf) { return rj_CHARSXP_utf16_(s, buf, 1); }
+int rj_rchar_utf16_noerr(SEXP s, jchar **buf) { return rj_CHARSXP_utf16_(s, buf, 0); }
+
+/* FIXME: we should probably deprecate this as well and use UTF-16 instead.
+   The only reason not to is that we would have to fully implement
+   a full UTF-16 -> UTF-8 conversion including surrogate pairs ... */
+
+/* Java returns *modified* UTF-8 which is incompatible with UTF-8,
+   so we have to detect the illegal surrgoate pairs and convert them */
+SEXP rj_mkCharUTF8_(const char *src, int can_error) {
+    const unsigned char *s = (const unsigned char*) src;
+    const unsigned char *c = (const unsigned char*) s;
+    /* check if the string contains any surrogate pairs, i.e.
+       Unicode in the range 0xD800-0xDFFF
+       We want this to be fast since in 99.99% of cases it will
+       be false */
+    while (*c) {
+	if (c[0] == 0xED &&
+	    (c[1] & 0xE0) == 0xA0)
+	    break;
+	c++;
+    }
+    if (*c) { /* yes, we have to convert them */
+	SEXP res;
+	const unsigned char *e = (const unsigned char*) strchr((const char*)s, 0); /* find the end for size */
+	unsigned char *dst = 0, *d, sbuf[64];
+	if (!e) /* should never occur */
+	    return mkChar("");
+	/* we use static buffer for small strings and dynamic alloc for large */
+	if (e - s >= sizeof(sbuf)) {
+	    /* allocate temp buffer since our input is const */
+	    d = dst = (unsigned char *) malloc(e - s + 1);
+	    if (!dst) {
+		if (can_error)
+		    Rf_error("Cannot allocate memory for surrogate pair conversion");
+		return 0;
+	    }
+	} else
+	    d = (unsigned char *)sbuf;
+	if (c - s > 0) {
+	    memcpy(d, s, c - s);
+	    d += c - s;
+	}
+	while (*c) {
+	    unsigned int u1, u;
+	    *(d++) = *(c++);
+	    /* start of a sequence ? */
+	    if ((c[-1] & 0xC0) != 0xC0)
+		continue;
+	    if ((c[-1] & 0xE0) == 0xC0)  { /* 2-byte, not a surrogate pair */
+		if ((c[0] & 0xC0) != 0x80) {
+		    if (dst) free(dst);
+		    if (can_error)
+			Rf_error("illegal 2-byte sequence in Java string");
+		    return 0;
+		}
+		*(d++) = *(c++);
+		continue;
+	    }
+	    if ((c[-1] & 0xF0) != 0xE0) { /* must be 3-byte */
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal multi-byte seqeunce in Java string (>3-byte)");
+		return 0;
+	    }
+	    if (((c[0] & 0xC0) != 0x80 ||
+		 (c[1] & 0xC0) != 0x80)) {
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal 3-byte sequence in Java string");
+		return 0;
+	    }
+	    u1 = ((((unsigned int)c[-1]) & 0x0F) << 12) |
+		 ((((unsigned int)c[0]) & 0x3F) << 6) |
+		 (((unsigned int)c[1]) & 0x3F);
+	    if (u1 < 0xD800 || u1 > 0xDBFF) { /* not a surrogate pair -> regular copy */
+		*(d++) = *(c++);
+		*(d++) = *(c++);
+		continue;
+	    }
+	    if (u1 >= 0xDC00 && u1 <= 0xDFFF) { /* low surrogate pair ? */
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal sequence in Java string: low surrogate pair without a high one");
+		return 0;
+	    }
+	    c += 2; /* move to the low pair */
+	    if (c[0] != 0xED ||
+		(c[1] & 0xF0) != 0xB0 ||
+		(c[2] & 0xC0) != 0x80) {
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal sequence in Java string: high surrogate pair not followed by low one");
+		return 0;
+	    }
+	    /* the actually encoded unicode character */
+	    u = ((((unsigned int)c[1]) & 0x0F) << 6) |
+		(((unsigned int)c[2]) & 0x3F);
+	    u |= (u1 & 0x03FF) << 10;
+	    u += 0x10000;
+	    c += 3;
+	    /* it must be <= 0x10FFFF by design (each surrogate has 10 bits) */
+	    d[-1]  = (unsigned char) (((u >> 18) & 0x0F) | 0xF0);
+	    *(d++) = (unsigned char) (((u >> 12) & 0x3F) | 0x80);
+	    *(d++) = (unsigned char) (((u >> 6) & 0x3F) | 0x80);
+	    *(d++) = (unsigned char) ((u & 0x3F) | 0x80);
+	}
+	res = mkCharLenCE((const char*) (dst ? dst : sbuf), dst ? (d - dst) : (d - sbuf), CE_UTF8);
+	if (dst) free(dst);
+	return res;
+    }
+    return mkCharLenCE(src, c - s, CE_UTF8);
+}
+
+SEXP rj_mkCharUTF8(const char *src) { return rj_mkCharUTF8_(src, 0); }
+SEXP rj_mkCharUTF8_noerr(const char *src) { return rj_mkCharUTF8_(src, 1); }
+
+jstring rj_newJavaString(JNIEnv *env, SEXP sChar) {
+    jchar *s;
+    int len = rj_rchar_utf16(sChar, &s);
+    return (*env)->NewString(env, s, (len + 1) >> 1);
+}
+
+jstring rj_newNativeJavaString(JNIEnv *env, const char *str, int len) {
+    jchar *s;
+    int rlen = rj_char_utf16(str, len, &s, "", 0);
+    return (rlen < 0) ? 0 : (*env)->NewString(env, s, (rlen + 1) >> 1);
+}
diff --git a/jri/src/rjstring.h b/jri/src/rjstring.h
new file mode 100644
index 0000000..6fd24a1
--- /dev/null
+++ b/jri/src/rjstring.h
@@ -0,0 +1,25 @@
+#ifndef RJ_STRING_H__
+#define RJ_STRING_H__
+
+#include <jni.h>         /* for jchar */
+#include <Rinternals.h>  /* for SEXP */
+
+/* --- API --- */
+
+/* Returns static content for short strings so don't re-use.
+   For dynamic strings uses R_alloc */
+int rj_char_utf16(const char *c, int len, jchar **buf, const char *ifrom, int can_error);
+
+/* wrappers for above to use with CHARSXP to detect proper ifrom */
+int rj_rchar_utf16(SEXP s, jchar **buf);
+int rj_rchar_utf16_noerr(SEXP s, jchar **buf);
+
+/* return jstring, but do NOT check exceptions */
+jstring rj_newJavaString(JNIEnv *env, SEXP sChar);
+jstring rj_newNativeJavaString(JNIEnv *env, const char *str, int len);
+
+/* takes modified UTF-8 from Java, creates CHARSXP with valid UTF8 */
+SEXP rj_mkCharUTF8(const char *src);
+SEXP rj_mkCharUTF8_noerr(const char *src);
+
+#endif
diff --git a/src/Rglue.c b/src/Rglue.c
index c2ce7f4..f009c00 100644
--- a/src/Rglue.c
+++ b/src/Rglue.c
@@ -6,6 +6,7 @@
 #include <R_ext/Print.h>
 #include <R_ext/Riconv.h>
 #include <errno.h>
+#include "rjstring.h"
 
 /* R 4.0.1 broke EXTPTR_PTR ABI so re-map it to safety at
    the small expense of speed */
@@ -156,9 +157,9 @@ SEXP j2SEXP(JNIEnv *env, jobject o, int releaseLocal) {
   }
 }
 
-#if R_VERSION >= R_Version(2,7,0)
 /* returns string from a CHARSXP making sure that the result is in UTF-8
-   NOTE: this should NOT be used to create Java strings as they require UTF-16 natively */
+   NOTE: this should NOT be used to create Java strings as they require UTF-16 natively
+   For Java strings use rj_*_utf16 function from rjstring.h */
 const char *rj_char_utf8(SEXP s) {
 #ifdef DEBUG_ENCODING
     fprintf(stderr, "rJava.rj_char_utf8, CE=%d: \"%s\"\n", (int)Rf_getCharCE(s), CHAR(s));
@@ -168,176 +169,10 @@ const char *rj_char_utf8(SEXP s) {
     return (Rf_getCharCE(s) == CE_UTF8) ? CHAR(s) : Rf_reEnc(CHAR(s), getCharCE(s), CE_UTF8, 0); /* subst. invalid chars: 1=hex, 2=., 3=?, other=skip */
 }
 
-#ifdef WIN32
-extern unsigned int localeCP;
-static char cpbuf[16];
-#endif
-static jchar js_zero[2] = { 0, 0 };
-static jchar js_buf[128];
-/* returns string from a CHARSXP making sure that the result is in UTF-16.
-   the buffer is owned by the function and may be static, so copy after use */
-int rj_char_utf16(SEXP s, jchar **buf) {
-    void *ih;
-    cetype_t ce_in = getCharCE(s);
-    const char *ifrom = "", *c = CHAR(s), *ce = strchr(c, 0);
-    if (ce == c) {
-	buf[0] = js_zero;
-	return 0;
-    }
-    size_t osize = sizeof(jchar) * (ce - c + 1), isize = ce - c;
-    jchar *js = buf[0] = (osize < sizeof(js_buf)) ? js_buf : (jchar*) R_alloc(sizeof(jchar), ce - c + 1);
-    char *dst = (char*) js;
-    int end_test = 1;
-
-#ifdef DEBUG_ENCODING
-    fprintf(stderr, "rJava.rj_char_utf16, CE=%d:", (int)ce_in);
-    { const char *c0 = c; while (*c0) fprintf(stderr, " %02x", (int)((unsigned char)*(c0++))); }
-    fprintf(stderr, "\n");
-#endif
-
-    switch (ce_in) {
-#ifdef WIN32
-    case CE_NATIVE:
-/* reEnc uses this, but translateCharUtf8 uses "" so let's go with ""
-	sprintf(cpbuf, "CP%d", localeCP);
-	ifrom = cpbuf;
-*/
-	break;
-    case CE_LATIN1: ifrom = "CP1252"; break;
-#else
-    case CE_NATIVE: break; /* is already "" */
-    case CE_LATIN1: ifrom = "latin1"; break;
-#endif
-    default:
-	ifrom = "UTF-8"; break;
-    }
-
-#ifdef DEBUG_ENCODING
-    fprintf(stderr, "  '%s' -> UTF-16: ", ifrom);
-#endif
-    ih = Riconv_open(((char*)&end_test)[0] == 1 ? "UTF-16LE" : "UTF-16BE", ifrom);
-    if(ih == (void *)(-1))
-	Rf_error("Unable to start conversion to UTF-16");
-    while (c < ce) {
-	size_t res = Riconv(ih, &c, &isize, &dst, &osize);
-	/* this should never happen since we allocated far more than needed */
-	if (res == -1 && errno == E2BIG)
-	    Rf_error("Conversion to UTF-16 failed due to unexpectedly large buffer requirements.");
-	else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) { /* invalid char */
-	    *(dst++) = '?';
-	    *(dst++) = 0;
-	    osize -= 2;
-	    c++;
-	    isize--;
-	}
-    }
-    Riconv_close(ih);
-#ifdef DEBUG_ENCODING
-    { const jchar *j = js; while (j < (const jchar*)dst) fprintf(stderr, " %04x", (unsigned int)*(j++)); }
-    fprintf(stderr, "\n");
-#endif
-    return dst - (char*) js;
-}
-
-/* Java returns *modified* UTF-8 which is incompatible with UTF-8,
-   so we have to detect the illegal surrgoate pairs and convert them */
-SEXP mkCharUTF8(const char *src) {
-    const unsigned char *s = (const unsigned char*) src;
-    const unsigned char *c = (const unsigned char*) s;
-    /* check if the string contains any surrogate pairs, i.e.
-       Unicode in the range 0xD800-0xDFFF
-       We want this to be fast since in 99.99% of cases it will
-       be false */
-    while (*c) {
-	if (c[0] == 0xED &&
-	    (c[1] & 0xE0) == 0xA0)
-	    break;
-	c++;
-    }
-    if (*c) { /* yes, we have to convert them */
-	SEXP res;
-	const unsigned char *e = (const unsigned char*) strchr((const char*)s, 0); /* find the end for size */
-	unsigned char *dst = 0, *d, sbuf[64];
-	if (!e) /* should never occur */
-	    return mkChar("");
-	/* we use static buffer for small strings and dynamic alloc for large */
-	if (e - s >= sizeof(sbuf)) {
-	    /* allocate temp buffer since our input is const */
-	    d = dst = (unsigned char *) malloc(e - s + 1);
-	    if (!dst)
-		Rf_error("Cannot allocate memory for surrogate pair conversion");
-	} else
-	    d = (unsigned char *)sbuf;
-	if (c - s > 0) {
-	    memcpy(d, s, c - s);
-	    d += c - s;
-	}
-	while (*c) {
-	    unsigned int u1, u;
-	    *(d++) = *(c++);
-	    /* start of a sequence ? */
-	    if ((c[-1] & 0xC0) != 0xC0)
-		continue;
-	    if ((c[-1] & 0xE0) == 0xC0)  { /* 2-byte, not a surrogate pair */
-		if ((c[0] & 0xC0) != 0x80) {
-		    if (dst) free(dst);
-		    Rf_error("illegal 2-byte sequence in Java string");
-		}
-		*(d++) = *(c++);
-		continue;
-	    }
-	    if ((c[-1] & 0xF0) != 0xE0) { /* must be 3-byte */
-		if (dst) free(dst);
-		Rf_error("illegal multi-byte seqeunce in Java string (>3-byte)");
-	    }
-	    if (((c[0] & 0xC0) != 0x80 ||
-		 (c[1] & 0xC0) != 0x80)) {
-		if (dst) free(dst);
-		Rf_error("illegal 3-byte sequence in Java string");
-	    }
-	    u1 = ((((unsigned int)c[-1]) & 0x0F) << 12) |
-		 ((((unsigned int)c[0]) & 0x3F) << 6) |
-		 (((unsigned int)c[1]) & 0x3F);
-	    if (u1 < 0xD800 || u1 > 0xDBFF) { /* not a surrogate pair -> regular copy */
-		*(d++) = *(c++);
-		*(d++) = *(c++);
-		continue;
-	    }
-	    if (u1 >= 0xDC00 && u1 <= 0xDFFF) { /* low surrogate pair ? */
-		if (dst) free(dst);
-		Rf_error("illegal sequence in Java string: low surrogate pair without a high one");
-	    }
-	    c += 2; /* move to the low pair */
-	    if (c[0] != 0xED ||
-		(c[1] & 0xF0) != 0xB0 ||
-		(c[2] & 0xC0) != 0x80) {
-		if (dst) free(dst);
-		Rf_error("illegal sequence in Java string: high surrogate pair not followed by low one");
-	    }
-	    /* the actually encoded unicode character */
-	    u = ((((unsigned int)c[1]) & 0x0F) << 6) |
-		(((unsigned int)c[2]) & 0x3F);
-	    u |= (u1 & 0x03FF) << 10;
-	    u += 0x10000;
-	    c += 3;
-	    /* it must be <= 0x10FFFF by design (each surrogate has 10 bits) */
-	    d[-1]  = (unsigned char) (((u >> 18) & 0x0F) | 0xF0);
-	    *(d++) = (unsigned char) (((u >> 12) & 0x3F) | 0x80);
-	    *(d++) = (unsigned char) (((u >> 6) & 0x3F) | 0x80);
-	    *(d++) = (unsigned char) ((u & 0x3F) | 0x80);
-	}
-	res = mkCharLenCE((const char*) (dst ? dst : sbuf), dst ? (d - dst) : (d - sbuf), CE_UTF8);
-	if (dst) free(dst);
-	return res;
-    }
-    return mkCharLenCE(src, c - s, CE_UTF8);
-}
-
-#endif
 
 static jstring newJavaString(JNIEnv *env, SEXP sChar) {
     jchar *s;
-    size_t len = rj_char_utf16(sChar, &s);
+    size_t len = rj_rchar_utf16(sChar, &s);
     return newString16(env, s, (len + 1) >> 1);
 }
 
diff --git a/src/rJava.h b/src/rJava.h
index 9e5bf78..b4c4c8f 100644
--- a/src/rJava.h
+++ b/src/rJava.h
@@ -97,14 +97,17 @@ void profReport(char *fmt, ...);
 #define END_RJAVA_CALL };
 #endif
 
-/* define mkCharUTF8 in a compatible fashion */
+/* define mkCharUTF8 in a compatible fashion
+   NOTE: those should NOT be used anymore since native
+   Java strings use UTF-16 so use only in cases where UTF8 is required */
 #if R_VERSION < R_Version(2,7,0)
 #define mkCharUTF8(X) mkChar(X)
 #define CHAR_UTF8(X) CHAR(X)
 #else
+#define mkCharUTF8(X) rj_mkCharUTF8(X)
 #define CHAR_UTF8(X) rj_char_utf8(X)
-extern SEXP mkCharUTF8(const char *);
-extern const char *rj_char_utf8(SEXP);
+extern SEXP rj_mkCharUTF8(const char *); /* rjstring.c */
+extern const char *rj_char_utf8(SEXP);   /* Rglue.c */
 #endif
 
 /* signatures are stored in a local buffer if they fit. Only if they don't fit a heap buffer is allocated and used. */
diff --git a/src/rjstring.c b/src/rjstring.c
new file mode 100644
index 0000000..6fce28b
--- /dev/null
+++ b/src/rjstring.c
@@ -0,0 +1,231 @@
+#include "rjstring.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <R_ext/Riconv.h>
+#include <errno.h>
+
+#ifdef WIN32
+/* -- currently unused - was used to mimick reEnc()
+   extern unsigned int localeCP; 
+   static char cpbuf[16]; */
+#endif
+static jchar js_zero[2] = { 0, 0 };
+static jchar js_buf[128];
+
+/* if len = -1 then c is assumed to be NUL terminated */
+int rj_char_utf16(const char *c, int len, jchar **buf, const char *ifrom, int can_error) {
+    void *ih;
+    const char *ce = (len < 0) ? strchr(c, 0) : (c + len);
+    if (ce == c) {
+	buf[0] = js_zero;
+	return 0;
+    }
+    size_t osize = sizeof(jchar) * (ce - c + 1), isize = ce - c;
+    jchar *js = buf[0] = (osize < sizeof(js_buf)) ? js_buf : (jchar*) R_alloc(sizeof(jchar), ce - c + 1);
+    char *dst = (char*) js;
+    int end_test = 1, is_le = (((char*)&end_test)[0] == 1) ? 1 : 0;
+    if (!ifrom) ifrom = "";
+
+#ifdef DEBUG_ENCODING
+    fprintf(stderr, "rJava.rj_char_utf16_native:");
+    { const char *c0 = c; while (*c0) fprintf(stderr, " %02x", (int)((unsigned char)*(c0++))); }
+    fprintf(stderr, "\n");
+#endif
+
+    ih = Riconv_open(is_le ? "UTF-16LE" : "UTF-16BE", ifrom);
+    if (ih == (void *)(-1)) {
+	if (can_error)
+	    Rf_error("Unable to start conversion to UTF-16");
+	return -1;
+    }
+    while (c < ce) {
+	size_t res = Riconv(ih, &c, &isize, &dst, &osize);
+	/* this should never happen since we allocated far more than needed */
+	if (res == -1 && errno == E2BIG) {
+	    if (can_error)
+		Rf_error("Conversion to UTF-16 failed due to unexpectedly large buffer requirements.");
+	    return -1;
+	} else if(res == -1 && (errno == EILSEQ || errno == EINVAL)) { /* invalid char */
+	    if (is_le) {
+		*(dst++) = '?';
+		*(dst++) = 0;
+	    } else {
+		*(dst++) = 0;
+		*(dst++) = '?';
+	    }
+	    osize -= 2;
+	    c++;
+	    isize--;
+	}
+    }
+    Riconv_close(ih);
+#ifdef DEBUG_ENCODING
+    { const jchar *j = js; while (j < (const jchar*)dst) fprintf(stderr, " %04x", (unsigned int)*(j++)); }
+    fprintf(stderr, "\n");
+#endif
+    return dst - (char*) js;
+}
+
+/* returns string from a CHARSXP making sure that the result is in UTF-16.
+   the buffer is owned by the function and may be static, so copy after use.
+
+   Returns the length of the resulting string or -1 on error (if
+   can_error is 0).
+ */
+static int rj_CHARSXP_utf16_(SEXP s, jchar **buf, int can_error) {
+    cetype_t ce_in = getCharCE(s);
+    const char *ifrom = "", *c = CHAR(s), *ce = strchr(c, 0);
+    if (ce == c) {
+	buf[0] = js_zero;
+	return 0;
+    }
+
+    switch (ce_in) {
+#ifdef WIN32
+    case CE_NATIVE:
+/* reEnc uses this, but translateCharUtf8 uses "" so let's go with ""
+	sprintf(cpbuf, "CP%d", localeCP);
+	ifrom = cpbuf;
+*/
+	break;
+    case CE_LATIN1: ifrom = "CP1252"; break;
+#else
+    case CE_NATIVE: break; /* is already "" */
+    case CE_LATIN1: ifrom = "latin1"; break;
+#endif
+    default:
+	ifrom = "UTF-8"; break;
+    }
+
+    return rj_char_utf16(c, ce - c, buf, ifrom, can_error);
+}
+
+int rj_rchar_utf16(SEXP s, jchar **buf) { return rj_CHARSXP_utf16_(s, buf, 1); }
+int rj_rchar_utf16_noerr(SEXP s, jchar **buf) { return rj_CHARSXP_utf16_(s, buf, 0); }
+
+/* FIXME: we should probably deprecate this as well and use UTF-16 instead.
+   The only reason not to is that we would have to fully implement
+   a full UTF-16 -> UTF-8 conversion including surrogate pairs ... */
+
+/* Java returns *modified* UTF-8 which is incompatible with UTF-8,
+   so we have to detect the illegal surrgoate pairs and convert them */
+SEXP rj_mkCharUTF8_(const char *src, int can_error) {
+    const unsigned char *s = (const unsigned char*) src;
+    const unsigned char *c = (const unsigned char*) s;
+    /* check if the string contains any surrogate pairs, i.e.
+       Unicode in the range 0xD800-0xDFFF
+       We want this to be fast since in 99.99% of cases it will
+       be false */
+    while (*c) {
+	if (c[0] == 0xED &&
+	    (c[1] & 0xE0) == 0xA0)
+	    break;
+	c++;
+    }
+    if (*c) { /* yes, we have to convert them */
+	SEXP res;
+	const unsigned char *e = (const unsigned char*) strchr((const char*)s, 0); /* find the end for size */
+	unsigned char *dst = 0, *d, sbuf[64];
+	if (!e) /* should never occur */
+	    return mkChar("");
+	/* we use static buffer for small strings and dynamic alloc for large */
+	if (e - s >= sizeof(sbuf)) {
+	    /* allocate temp buffer since our input is const */
+	    d = dst = (unsigned char *) malloc(e - s + 1);
+	    if (!dst) {
+		if (can_error)
+		    Rf_error("Cannot allocate memory for surrogate pair conversion");
+		return 0;
+	    }
+	} else
+	    d = (unsigned char *)sbuf;
+	if (c - s > 0) {
+	    memcpy(d, s, c - s);
+	    d += c - s;
+	}
+	while (*c) {
+	    unsigned int u1, u;
+	    *(d++) = *(c++);
+	    /* start of a sequence ? */
+	    if ((c[-1] & 0xC0) != 0xC0)
+		continue;
+	    if ((c[-1] & 0xE0) == 0xC0)  { /* 2-byte, not a surrogate pair */
+		if ((c[0] & 0xC0) != 0x80) {
+		    if (dst) free(dst);
+		    if (can_error)
+			Rf_error("illegal 2-byte sequence in Java string");
+		    return 0;
+		}
+		*(d++) = *(c++);
+		continue;
+	    }
+	    if ((c[-1] & 0xF0) != 0xE0) { /* must be 3-byte */
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal multi-byte seqeunce in Java string (>3-byte)");
+		return 0;
+	    }
+	    if (((c[0] & 0xC0) != 0x80 ||
+		 (c[1] & 0xC0) != 0x80)) {
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal 3-byte sequence in Java string");
+		return 0;
+	    }
+	    u1 = ((((unsigned int)c[-1]) & 0x0F) << 12) |
+		 ((((unsigned int)c[0]) & 0x3F) << 6) |
+		 (((unsigned int)c[1]) & 0x3F);
+	    if (u1 < 0xD800 || u1 > 0xDBFF) { /* not a surrogate pair -> regular copy */
+		*(d++) = *(c++);
+		*(d++) = *(c++);
+		continue;
+	    }
+	    if (u1 >= 0xDC00 && u1 <= 0xDFFF) { /* low surrogate pair ? */
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal sequence in Java string: low surrogate pair without a high one");
+		return 0;
+	    }
+	    c += 2; /* move to the low pair */
+	    if (c[0] != 0xED ||
+		(c[1] & 0xF0) != 0xB0 ||
+		(c[2] & 0xC0) != 0x80) {
+		if (dst) free(dst);
+		if (can_error)
+		    Rf_error("illegal sequence in Java string: high surrogate pair not followed by low one");
+		return 0;
+	    }
+	    /* the actually encoded unicode character */
+	    u = ((((unsigned int)c[1]) & 0x0F) << 6) |
+		(((unsigned int)c[2]) & 0x3F);
+	    u |= (u1 & 0x03FF) << 10;
+	    u += 0x10000;
+	    c += 3;
+	    /* it must be <= 0x10FFFF by design (each surrogate has 10 bits) */
+	    d[-1]  = (unsigned char) (((u >> 18) & 0x0F) | 0xF0);
+	    *(d++) = (unsigned char) (((u >> 12) & 0x3F) | 0x80);
+	    *(d++) = (unsigned char) (((u >> 6) & 0x3F) | 0x80);
+	    *(d++) = (unsigned char) ((u & 0x3F) | 0x80);
+	}
+	res = mkCharLenCE((const char*) (dst ? dst : sbuf), dst ? (d - dst) : (d - sbuf), CE_UTF8);
+	if (dst) free(dst);
+	return res;
+    }
+    return mkCharLenCE(src, c - s, CE_UTF8);
+}
+
+SEXP rj_mkCharUTF8(const char *src) { return rj_mkCharUTF8_(src, 0); }
+SEXP rj_mkCharUTF8_noerr(const char *src) { return rj_mkCharUTF8_(src, 1); }
+
+jstring rj_newJavaString(JNIEnv *env, SEXP sChar) {
+    jchar *s;
+    int len = rj_rchar_utf16(sChar, &s);
+    return (*env)->NewString(env, s, (len + 1) >> 1);
+}
+
+jstring rj_newNativeJavaString(JNIEnv *env, const char *str, int len) {
+    jchar *s;
+    int rlen = rj_char_utf16(str, len, &s, "", 0);
+    return (rlen < 0) ? 0 : (*env)->NewString(env, s, (rlen + 1) >> 1);
+}
diff --git a/src/rjstring.h b/src/rjstring.h
new file mode 100644
index 0000000..6fd24a1
--- /dev/null
+++ b/src/rjstring.h
@@ -0,0 +1,25 @@
+#ifndef RJ_STRING_H__
+#define RJ_STRING_H__
+
+#include <jni.h>         /* for jchar */
+#include <Rinternals.h>  /* for SEXP */
+
+/* --- API --- */
+
+/* Returns static content for short strings so don't re-use.
+   For dynamic strings uses R_alloc */
+int rj_char_utf16(const char *c, int len, jchar **buf, const char *ifrom, int can_error);
+
+/* wrappers for above to use with CHARSXP to detect proper ifrom */
+int rj_rchar_utf16(SEXP s, jchar **buf);
+int rj_rchar_utf16_noerr(SEXP s, jchar **buf);
+
+/* return jstring, but do NOT check exceptions */
+jstring rj_newJavaString(JNIEnv *env, SEXP sChar);
+jstring rj_newNativeJavaString(JNIEnv *env, const char *str, int len);
+
+/* takes modified UTF-8 from Java, creates CHARSXP with valid UTF8 */
+SEXP rj_mkCharUTF8(const char *src);
+SEXP rj_mkCharUTF8_noerr(const char *src);
+
+#endif