本站首页    管理页面    写新日志    退出


«August 2025»
12
3456789
10111213141516
17181920212223
24252627282930
31


公告

戒除浮躁,读好书,交益友


我的分类(专题)

日志更新

最新评论

留言板

链接

Blog信息
blog名称:邢红瑞的blog
日志总数:523
评论数量:1142
留言数量:0
访问次数:9687427
建立时间:2004年12月20日




[jvm]未公开的mustang核心秘密(二):jni的返回中文问题
原创空间,  软件技术,  电脑与网络

邢红瑞 发表于 2007/6/17 11:21:05

严格的说,我基本没有做过jni的开发,只是修改过bsd和windows下的jdk源码。这些是openjdk的jni部分函数,这函数完全用c写的,幸好不是c++,没有用复杂的设计模式,我还看得懂。下面这段代码,从控制台通过jni输入字符串,java里面打印class Prompt {     // native method that prints a prompt and reads a line    private native String getLine(String prompt);     public static void main(String args[]) {        Prompt p = new Prompt();        String input = p.getLine("Type a line: ");        System.out.println("User typed: " + input);    }     static {        System.loadLibrary("jnistudy");    }}promt.c的代码#include <jni.h>#include <stdio.h>#include "Prompt.h" JNIEXPORT jstring JNICALL Java_Prompt_getLine(JNIEnv *env, jobject obj, jstring prompt){    char buf[128];    const char *str;    str = (*env)->GetStringUTFChars(env, prompt, NULL);    if (str == NULL) {        return NULL; /* OutOfMemoryError already thrown */    }    printf("%s", str);    (*env)->ReleaseStringUTFChars(env, prompt, str);    /* We assume here that the user does not type more than     * 127 characters */    scanf("%s", buf);    return (*env)->NewStringUTF(env, buf);}GetStringUTFChars将字符串转为UTF8格式的c语言字符串,如果内存分配不成功,最后不要直接返回NULL,虽然也是抛出OutOfMemoryErro异常,应该使用jdk的通用作法,返回 JNU_ThrowOutOfMemoryError(env, 0);最后调用ReleaseStringUTFChars释放内存,防止泄漏。NewStringUTF将一个UTF8格式c语言字符串转为java String,如果JVM不能分配足够的内存,抛出OutOfMemoryError异常,并返回NULL。GetStringChars和ReleaseStringChars返回unicode的字符串,UTF-8以‘\0’结束,使用strlen或GetStringUTFLength得到长度,unicode必须使用GetStringLength,因为JVM的字符串是不可以变的,GetStringChars的最后一个参数设为JNI_FALSE直接访问原始java.lang.String实例的指针,jni代码不能修改返回string的内容,因为String实例是immutable.GetStringChars最后一个参数设为JNI_TRUE,返回原始java.lang.String实例的内容的一份copy。如果不是特别在意返回的是内容copy还是原始实例的指针,就传送NULL即可。无论最后一个参数是什末,ReleaseStringChars一定要被调用。应该是在jdk1.2平台后,为了应付线程的问题,加入Get/ReleaseStringCritical.你可以block string防止jvm的另一个线程访问,当GetStringCritical处理指针时,垃圾回收就不能进行,非常容易的引起JVM死锁。JVM没有GetStringUTFCritical和 ReleaseStringUTFCritical函数,GetStringRegion和GetStringUTFRegion可以在已经分配内存的buffer上复制字符串的内容,GetStringUTFRegion可以设定字符串的索引和copy unicode字符的数量,越界抛出StringIndexOutOfBoundsException异常,最好少于128个字符,因为没有内存分配,不用考虑内存分配失败的情况。如果字符少的情况,使用Get/SetStringRegion and Get/SetStringUTFRegion最好。如果控制台输入中文,java是unicode编码,到JVM是utf8编码,输出肯定是乱码。如果单纯是转化中文,也是很麻烦的,http://blogger.org.cn/blog/more.asp?name=hongrui&id=25509使用的办法也是麻烦。 这是jdk用于转码的函数 JNIEXPORT jstringNewStringPlatform(JNIEnv *env, const char *str){    return JNU_NewStringPlatform(env, str);} JNIEXPORT jstring JNICALLJNU_NewStringPlatform(JNIEnv *env, const char *str){    jstring result;    jbyteArray hab = 0;    int len;     if (fastEncoding == NO_ENCODING_YET)        initializeEncoding(env);     if ((fastEncoding == FAST_8859_1) || (fastEncoding == NO_ENCODING_YET))        return newString8859_1(env, str);    if (fastEncoding == FAST_646_US)         return newString646_US(env, str);    if (fastEncoding == FAST_CP1252)        return newStringCp1252(env, str);        if ((*env)->EnsureLocalCapacity(env, 2) < 0)        return 0;     len = (int)strlen(str);    hab = (*env)->NewByteArray(env, len);    if (hab != 0) {        (*env)->SetByteArrayRegion(env, hab, 0, len, (jbyte *)str);        if (jnuEncodingSupported(env)) {            result = (*env)->NewObject(env, JNU_ClassString(env),                          String_init_ID, hab, jnuEncoding);        } else {            /*If the encoding specified in sun.jnu.encoding is not endorsed              by "Charset.isSupported" we have to fall back to use String(byte[])              explicitly here without specifying the encoding name, in which the              StringCoding class will pickup the iso-8859-1 as the fallback               converter for us.       */            jmethodID mid = (*env)->GetMethodID(env, JNU_ClassString(env),     "<init>", "([B)V");            result = (*env)->NewObject(env, JNU_ClassString(env), mid, hab);        } (*env)->DeleteLocalRef(env, hab); return result;    }    return 0;}里面调用了initializeEncodingJ/* Initialize the fast encoding.  If the "sun.jnu.encoding" property * has not yet been set, we leave fastEncoding == NO_ENCODING_YET. */static voidinitializeEncoding(JNIEnv *env){    jstring propname = 0;    jstring enc = 0;     if ((*env)->EnsureLocalCapacity(env, 3) < 0)        return;     propname = (*env)->NewStringUTF(env, "sun.jnu.encoding");    if (propname) {        jboolean exc;        enc = JNU_CallStaticMethodByName                (env,   &exc,   "java/lang/System",   "getProperty",   "(Ljava/lang/String;)Ljava/lang/String;",   propname).l; if (!exc) {     if (enc) {         const char* encname = (*env)->GetStringUTFChars(env, enc, 0);  if (encname) {    /*     * On Solaris with nl_langinfo() called in GetJavaProperties():     *     *   locale undefined -> NULL -> hardcoded default     *   "C" locale       -> "" -> hardcoded default (on 2.6)     *   "C" locale       -> "ISO646-US"   (on Sol 7/8)     *   "en_US" locale -> "ISO8859-1"     *   "en_GB" locale -> "ISO8859-1"   (on Sol 7/8)     *   "en_UK" locale -> "ISO8859-1"   (on 2.6)     */      if ((strcmp(encname, "8859_1") == 0) ||                         (strcmp(encname, "ISO8859-1") == 0) ||                         (strcmp(encname, "ISO8859_1") == 0))          fastEncoding = FAST_8859_1;      else if (strcmp(encname, "ISO646-US") == 0)   fastEncoding = FAST_646_US;                    else if (strcmp(encname, "Cp1252") == 0 ||                             /* This is a temporary fix until we move */                             /* to wide character versions of all Windows */                             /* calls. */                                                         strcmp(encname, "utf-16le") == 0)                        fastEncoding = FAST_CP1252;                    else {                        fastEncoding = NO_FAST_ENCODING;                        jnuEncoding = (jstring)(*env)->NewGlobalRef(env, enc);                    }      (*env)->ReleaseStringUTFChars(env, enc, encname);  }     } } else {     (*env)->ExceptionClear(env); }    } else {        (*env)->ExceptionClear(env);    }    (*env)->DeleteLocalRef(env, propname);    (*env)->DeleteLocalRef(env, enc);     /* Initialize method-id cache */    String_getBytes_ID = (*env)->GetMethodID(env, JNU_ClassString(env),          "getBytes", "(Ljava/lang/String;)[B");    String_init_ID = (*env)->GetMethodID(env, JNU_ClassString(env),      "<init>", "([BLjava/lang/String;)V");}static jboolean isJNUEncodingSupported = JNI_FALSE;static jboolean jnuEncodingSupported(JNIEnv *env) {    jboolean exe;    if (isJNUEncodingSupported == JNI_TRUE) {        return JNI_TRUE;    }    isJNUEncodingSupported = (jboolean) JNU_CallStaticMethodByName (        env, &exe,                                    "java/nio/charset/Charset",                                    "isSupported",                             "(Ljava/lang/String;)Z",                                    jnuEncoding).z;    return isJNUEncodingSupported;}上面的promt.c代码改为#include <jni.h>#include <stdio.h>#include "Prompt.h"#include "jni_util.h" JNIEXPORT jstring JNICALL Java_Prompt_getLine(JNIEnv *env, jobject obj, jstring prompt){    char buf[128];    const char *str;  int len = (*env)->GetStringLength(env, prompt);  printf("%d\n",len);    str = (*env)->GetStringUTFChars(env, prompt, NULL);    if (str == NULL) {        return NULL; /* OutOfMemoryError already thrown */    }    printf("%s", str);    (*env)->ReleaseStringUTFChars(env, prompt, str);    /* We assume here that the user does not type more than     * 127 characters */    scanf("%s", buf);    return NewStringPlatform(env, buf);} 运行Prompt.class,输入邢红瑞User typed: 邢红瑞13Type a line: 这就解决了中文平台java和jni的转码问题。


阅读全文(6989) | 回复(0) | 编辑 | 精华
 



发表评论:
昵称:
密码:
主页:
标题:
验证码:  (不区分大小写,请仔细填写,输错需重写评论内容!)



站点首页 | 联系我们 | 博客注册 | 博客登陆

Sponsored By W3CHINA
W3CHINA Blog 0.8 Processed in 0.559 second(s), page refreshed 144753984 times.
《全国人大常委会关于维护互联网安全的决定》  《计算机信息网络国际联网安全保护管理办法》
苏ICP备05006046号