2 files changed, 238 insertions, 0 deletions
diff --git a/src/com/nis/nmsclient/util/io/UnicodeInputStream.java b/src/com/nis/nmsclient/util/io/UnicodeInputStream.java
new file mode 100644
index 0000000..9e61d42
--- /dev/null
+++ b/src/com/nis/nmsclient/util/io/UnicodeInputStream.java
@@ -0,0 +1,118 @@
+package com.nis.nmsclient.util.io;
+/**
+ version: 1.1 / 2007-01-25
+ - changed BOM recognition ordering (longer boms first)
+
+ Original pseudocode   : Thomas Weidenfeller
+ Implementation tweaked: Aki Nieminen
+
+ http://www.unicode.org/unicode/faq/utf_bom.html
+ BOMs in byte length ordering:
+   00 00 FE FF    = UTF-32, big-endian
+   FF FE 00 00    = UTF-32, little-endian
+   EF BB BF       = UTF-8,
+   FE FF          = UTF-16, big-endian
+   FF FE          = UTF-16, little-endian
+
+ Win2k Notepad:
+   Unicode format = UTF-16LE
+ ***/
+
+import java.io.*;
+
+/**
+ * This inputstream will recognize unicode BOM marks and will skip bytes if
+ * getEncoding() method is called before any of the read(...) methods.
+ * 
+ * Usage pattern: String enc = "ISO-8859-1"; // or NULL to use systemdefault
+ * FileInputStream fis = new FileInputStream(file); UnicodeInputStream uin = new
+ * UnicodeInputStream(fis, enc); enc = uin.getEncoding(); // check and skip
+ * possible BOM bytes InputStreamReader in; if (enc == null) in = new
+ * InputStreamReader(uin); else in = new InputStreamReader(uin, enc);
+ */
+public class UnicodeInputStream extends InputStream {
+	PushbackInputStream internalIn;
+	boolean isInited = false;
+	String defaultEnc;
+	String encoding;
+
+	private static final int BOM_SIZE = 4;
+
+	UnicodeInputStream(InputStream in, String defaultEnc) {
+		internalIn = new PushbackInputStream(in, BOM_SIZE);
+		this.defaultEnc = defaultEnc;
+	}
+
+	public String getDefaultEncoding() {
+		return defaultEnc;
+	}
+
+	public String getEncoding() {
+		if (!isInited) {
+			try {
+				init();
+			} catch (IOException ex) {
+				IllegalStateException ise = new IllegalStateException(
+						"Init method failed.");
+				ise.initCause(ise);
+				throw ise;
+			}
+		}
+		return encoding;
+	}
+
+	/**
+	 * Read-ahead four bytes and check for BOM marks. Extra bytes are unread
+	 * back to the stream, only BOM bytes are skipped.
+	 */
+	protected void init() throws IOException {
+		if (isInited)
+			return;
+
+		byte bom[] = new byte[BOM_SIZE];
+		int n, unread;
+		n = internalIn.read(bom, 0, bom.length);
+
+		if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00)
+				&& (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
+			encoding = "UTF-32BE";
+			unread = n - 4;
+		} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)
+				&& (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
+			encoding = "UTF-32LE";
+			unread = n - 4;
+		} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB)
+				&& (bom[2] == (byte) 0xBF)) {
+			encoding = "UTF-8";
+			unread = n - 3;
+		} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
+			encoding = "UTF-16BE";
+			unread = n - 2;
+		} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
+			encoding = "UTF-16LE";
+			unread = n - 2;
+		} else {
+			// Unicode BOM mark not found, unread all bytes
+			encoding = defaultEnc;
+			unread = n;
+		}
+		// System.out.println("read=" + n + ", unread=" + unread);
+
+		if (unread > 0)
+			internalIn.unread(bom, (n - unread), unread);
+
+		isInited = true;
+	}
+
+	public void close() throws IOException {
+		// init();
+		isInited = true;
+		internalIn.close();
+	}
+
+	public int read() throws IOException {
+		// init();
+		isInited = true;
+		return internalIn.read();
+	}
+}
diff --git a/src/com/nis/nmsclient/util/io/UnicodeReader.java b/src/com/nis/nmsclient/util/io/UnicodeReader.java
new file mode 100644
index 0000000..c4169b5
--- /dev/null
+++ b/src/com/nis/nmsclient/util/io/UnicodeReader.java
@@ -0,0 +1,120 @@
+package com.nis.nmsclient.util.io;
+/**
+ version: 1.1 / 2007-01-25
+ - changed BOM recognition ordering (longer boms first)
+
+ Original pseudocode   : Thomas Weidenfeller
+ Implementation tweaked: Aki Nieminen
+
+ http://www.unicode.org/unicode/faq/utf_bom.html
+ BOMs:
+   00 00 FE FF    = UTF-32, big-endian
+   FF FE 00 00    = UTF-32, little-endian
+   EF BB BF       = UTF-8,
+   FE FF          = UTF-16, big-endian
+   FF FE          = UTF-16, little-endian
+
+ Win2k Notepad:
+   Unicode format = UTF-16LE
+ ***/
+
+import java.io.*;
+
+/**
+ * Generic unicode textreader, which will use BOM mark to identify the encoding
+ * to be used. If BOM is not found then use a given default or system encoding.
+ */
+public class UnicodeReader extends Reader {
+	PushbackInputStream internalIn;
+	InputStreamReader internalIn2 = null;
+	String defaultEnc;
+
+	private static final int BOM_SIZE = 4;
+
+	/**
+	 * 
+	 * @param in
+	 *            inputstream to be read
+	 * @param defaultEnc
+	 *            default encoding if stream does not have BOM marker. Give NULL
+	 *            to use system-level default.
+	 */
+	public UnicodeReader(InputStream in, String defaultEnc) {
+		internalIn = new PushbackInputStream(in, BOM_SIZE);
+		this.defaultEnc = defaultEnc;
+	}
+
+	public String getDefaultEncoding() {
+		return defaultEnc;
+	}
+
+	/**
+	 * Get stream encoding or NULL if stream is uninitialized. Call init() or
+	 * read() method to initialize it.
+	 */
+	public String getEncoding() {
+		if (internalIn2 == null)
+			return null;
+		return internalIn2.getEncoding();
+	}
+
+	/**
+	 * Read-ahead four bytes and check for BOM marks. Extra bytes are unread
+	 * back to the stream, only BOM bytes are skipped.
+	 */
+	protected void init() throws IOException {
+		if (internalIn2 != null)
+			return;
+
+		String encoding;
+		byte bom[] = new byte[BOM_SIZE];
+		int n, unread;
+		n = internalIn.read(bom, 0, bom.length);
+
+		if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00)
+				&& (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
+			encoding = "UTF-32BE";
+			unread = n - 4;
+		} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)
+				&& (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
+			encoding = "UTF-32LE";
+			unread = n - 4;
+		} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB)
+				&& (bom[2] == (byte) 0xBF)) {
+			encoding = "UTF-8";
+			unread = n - 3;
+		} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
+			encoding = "UTF-16BE";
+			unread = n - 2;
+		} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
+			encoding = "UTF-16LE";
+			unread = n - 2;
+		} else {
+			// Unicode BOM mark not found, unread all bytes
+			encoding = defaultEnc;
+			unread = n;
+		}
+		// System.out.println("read=" + n + ", unread=" + unread);
+
+		if (unread > 0)
+			internalIn.unread(bom, (n - unread), unread);
+
+		// Use given encoding
+		if (encoding == null) {
+			internalIn2 = new InputStreamReader(internalIn);
+		} else {
+			internalIn2 = new InputStreamReader(internalIn, encoding);
+		}
+	}
+
+	public void close() throws IOException {
+		init();
+		internalIn2.close();
+	}
+
+	public int read(char[] cbuf, int off, int len) throws IOException {
+		init();
+		return internalIn2.read(cbuf, off, len);
+	}
+
+}
+\ No newline at end of file