java GBK与UTF-8的转换

论坛 期权论坛 脚本     
匿名技术用户   2020-12-30 19:09   11   0

源码如下:

import java.io.UnsupportedEncodingException;


public class CharsetConvertor {

public static void main(String[] args){
// test a method getHexString(byte b)
// System.out.println(getHexString((byte)256));
//test a method getBinaryString(int i)
// System.out.println(getBinaryString(8));

try {
System.out.println(gbToUtf8("经"));


} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public static String gbToUtf8(String source) throws UnsupportedEncodingException{
StringBuffer sb = new StringBuffer();
for(int i=0; i<source.length(); i++){
String s = source.substring(i, i+1);
if(s.charAt(0)>0x80){
byte[] bytes = s.getBytes("Unicode");
String binaryStr = "";
for(int j=2;j<bytes.length;j+=2){
//the first byte
String hexStr = getHexString(bytes[j+1]);
String binStr = getBinaryString(Integer.valueOf(hexStr, 16));
binaryStr += binStr;

//the second byte
hexStr = getHexString(bytes[j]);
binStr = getBinaryString(Integer.valueOf(hexStr, 16));

binaryStr += binStr;
}

String s1 = "1110"+binaryStr.substring(0, 4);
String s2 = "10"+binaryStr.substring(4, 10);
String s3 = "10"+binaryStr.substring(10, 16);

byte[] bs = new byte[3];
bs[0] = Integer.valueOf(s1,2).byteValue();
bs[1] = Integer.valueOf(s2,2).byteValue();
bs[2] = Integer.valueOf(s3,2).byteValue();

String ss = new String(bs,"UTF-8");
sb.append(ss);
}
}
return sb.toString();
}

public static String getHexString(byte b){
String hexStr = Integer.toHexString(b);
int m = hexStr.length();
if(m<2){
hexStr = "0"+hexStr;
}else{
hexStr = hexStr.substring(m-2);
}
return hexStr;
}

private static String getBinaryString(int i){
String binaryStr = Integer.toBinaryString(i);
int length = binaryStr.length();
for(int k=0; k < 8-length;k++){
binaryStr = "0"+binaryStr;
}
return binaryStr;
}
}

转载于:https://www.cnblogs.com/xinzhuangzi/archive/2010/07/30/4100568.html

分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:7942463
帖子:1588486
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP