initial commit

author: chenjinsong <[email protected]> 2018-09-27 16:11:54 +0800
committer: chenjinsong <[email protected]> 2018-09-27 16:11:54 +0800
commit: 56d71f261a8bd6031e47e2bf80867049a2aa13da (patch)
tree: f09257b2143782a333a9eda3395137837d9bdad1 /src/com/nis/nmsclient/util/io/UnicodeInputStream.java
1 files changed, 118 insertions, 0 deletions
diff --git a/src/com/nis/nmsclient/util/io/UnicodeInputStream.java b/src/com/nis/nmsclient/util/io/UnicodeInputStream.java
new file mode 100644
index 0000000..9e61d42
--- /dev/null
+++ b/src/com/nis/nmsclient/util/io/UnicodeInputStream.java
@@ -0,0 +1,118 @@
+package com.nis.nmsclient.util.io;
+/**
+ version: 1.1 / 2007-01-25
+ - changed BOM recognition ordering (longer boms first)
+
+ Original pseudocode   : Thomas Weidenfeller
+ Implementation tweaked: Aki Nieminen
+
+ http://www.unicode.org/unicode/faq/utf_bom.html
+ BOMs in byte length ordering:
+   00 00 FE FF    = UTF-32, big-endian
+   FF FE 00 00    = UTF-32, little-endian
+   EF BB BF       = UTF-8,
+   FE FF          = UTF-16, big-endian
+   FF FE          = UTF-16, little-endian
+
+ Win2k Notepad:
+   Unicode format = UTF-16LE
+ ***/
+
+import java.io.*;
+
+/**
+ * This inputstream will recognize unicode BOM marks and will skip bytes if
+ * getEncoding() method is called before any of the read(...) methods.
+ * 
+ * Usage pattern: String enc = "ISO-8859-1"; // or NULL to use systemdefault
+ * FileInputStream fis = new FileInputStream(file); UnicodeInputStream uin = new
+ * UnicodeInputStream(fis, enc); enc = uin.getEncoding(); // check and skip
+ * possible BOM bytes InputStreamReader in; if (enc == null) in = new
+ * InputStreamReader(uin); else in = new InputStreamReader(uin, enc);
+ */
+public class UnicodeInputStream extends InputStream {
+	PushbackInputStream internalIn;
+	boolean isInited = false;
+	String defaultEnc;
+	String encoding;
+
+	private static final int BOM_SIZE = 4;
+
+	UnicodeInputStream(InputStream in, String defaultEnc) {
+		internalIn = new PushbackInputStream(in, BOM_SIZE);
+		this.defaultEnc = defaultEnc;
+	}
+
+	public String getDefaultEncoding() {
+		return defaultEnc;
+	}
+
+	public String getEncoding() {
+		if (!isInited) {
+			try {
+				init();
+			} catch (IOException ex) {
+				IllegalStateException ise = new IllegalStateException(
+						"Init method failed.");
+				ise.initCause(ise);
+				throw ise;
+			}
+		}
+		return encoding;
+	}
+
+	/**
+	 * Read-ahead four bytes and check for BOM marks. Extra bytes are unread
+	 * back to the stream, only BOM bytes are skipped.
+	 */
+	protected void init() throws IOException {
+		if (isInited)
+			return;
+
+		byte bom[] = new byte[BOM_SIZE];
+		int n, unread;
+		n = internalIn.read(bom, 0, bom.length);
+
+		if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00)
+				&& (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
+			encoding = "UTF-32BE";
+			unread = n - 4;
+		} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)
+				&& (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
+			encoding = "UTF-32LE";
+			unread = n - 4;
+		} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB)
+				&& (bom[2] == (byte) 0xBF)) {
+			encoding = "UTF-8";
+			unread = n - 3;
+		} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
+			encoding = "UTF-16BE";
+			unread = n - 2;
+		} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
+			encoding = "UTF-16LE";
+			unread = n - 2;
+		} else {
+			// Unicode BOM mark not found, unread all bytes
+			encoding = defaultEnc;
+			unread = n;
+		}
+		// System.out.println("read=" + n + ", unread=" + unread);
+
+		if (unread > 0)
+			internalIn.unread(bom, (n - unread), unread);
+
+		isInited = true;
+	}
+
+	public void close() throws IOException {
+		// init();
+		isInited = true;
+		internalIn.close();
+	}
+
+	public int read() throws IOException {
+		// init();
+		isInited = true;
+		return internalIn.read();
+	}
+}
author	chenjinsong <[email protected]>	2018-09-27 16:11:54 +0800
committer	chenjinsong <[email protected]>	2018-09-27 16:11:54 +0800
commit	56d71f261a8bd6031e47e2bf80867049a2aa13da (patch)
tree	f09257b2143782a333a9eda3395137837d9bdad1 /src/com/nis/nmsclient/util/io/UnicodeInputStream.java