commit 45627ad72b96dca9f35dbb9b9df38a410165d9be Author: Ryan McGrath Date: Mon May 30 15:00:10 2011 +0900 Initial diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..432d8f0 --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +This work is licensed under the Creative Commons Attribution 3.0 Unported License. To view a copy of this license, visit http://creativecommons.org/licenses/by/3.0/ or send a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA. diff --git a/lib/utf8.js b/lib/utf8.js new file mode 100644 index 0000000..f52806e --- /dev/null +++ b/lib/utf8.js @@ -0,0 +1,54 @@ +/* Utf8.js + * + * A port of the Utf8 encoding functions written by Chris Veness, about as far + * back as 2002. I simply ported this to Node's module architecture and pushed it to + * npm (package manager), because... well, this is a ridiculously useful module to NOT have. ;P + * + * @Authors: Chris Veness, Ryan McGrath + * @Requires: Nothing + */ + +/* Encode multi-byte Unicode string into utf-8 multiple single-byte characters + * (BMP / basic multilingual plane only) + * + * Chars in range U+0080 - U+07FF are encoded in 2 chars, U+0800 - U+FFFF in 3 chars + * + * @param {String} strUni Unicode string to be encoded as UTF-8 + * @returns {String} encoded string + */ +exports.encode = function(strUni) { + /* Use regular expressions & String.replace callback function for better efficiency + * than procedural approaches + */ + var strUtf = strUni.replace(/[\u0080-\u07ff]/g, function(c) { + var cc = c.charCodeAt(0); + return String.fromCharCode(0xc0 | cc>>6, 0x80 | cc&0x3f); + }); + + strUtf = strUtf.replace(/[\u0800-\uffff]/g, function(c) { + var cc = c.charCodeAt(0); + return String.fromCharCode(0xe0 | cc>>12, 0x80 | cc>>6&0x3F, 0x80 | cc&0x3f); + }); + + return strUtf; +}; + +/* Decode utf-8 encoded string back into multi-byte Unicode characters + * + * @param {String} strUtf UTF-8 string to be decoded back to Unicode + * @returns {String} decoded string + */ +exports.decode = function(strUtf) { + /* note: decode 3-byte chars first as decoded 2-byte strings could appear to be 3-byte char! */ + var strUni = strUtf.replace(/[\u00e0-\u00ef][\u0080-\u00bf][\u0080-\u00bf]/g, function(c) { + var cc = ((c.charCodeAt(0)&0x0f)<<12) | ((c.charCodeAt(1)&0x3f)<<6) | ( c.charCodeAt(2)&0x3f); + return String.fromCharCode(cc); + }); + + strUni = strUni.replace(/[\u00c0-\u00df][\u0080-\u00bf]/g, function(c) { + var cc = (c.charCodeAt(0)&0x1f)<<6 | c.charCodeAt(1)&0x3f; + return String.fromCharCode(cc); + }); + + return strUni; +}; \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..c4e0091 --- /dev/null +++ b/package.json @@ -0,0 +1,32 @@ +{ + "name": "utf8", + "description": "Basic Utf-8 encoding/decoding library to alleviate confusion among people.", + "version": "1.0.0", + "author": "Ryan McGrath ", + + "repository": { + "type" : "git", + "url": "https://ryanmcgrath@github.com/ryanmcgrath/node-utf8.git" + }, + + "bugs": { + "web": "http://github.com/ryanmcgrath/node-utf8/issues" + }, + + "os": [ "linux", "darwin", "freebsd" ], + + "directories": { + "lib": "./lib/" + }, + + "main": "./lib/utf8", + + "engines": { + "node": ">=0.1.97" + }, + + "licenses": [{ + "type" : "Creative Commons 3.0", + "url" : "http://github.com/ryanmcgrath/node-utf8/raw/master/LICENSE" + }] +} diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..d4fb1e8 --- /dev/null +++ b/readme.md @@ -0,0 +1,26 @@ +Node Utf-8 Encoding/Decoding Functions +============================================================================== +This is a port of the work done by Chris Veness over the past ten years in relation +to encoding and decoding Utf-8 data in a JavaScript environment. I didn't write the core +of this, merely modified it to work with Node's package structure and be published on npm, where +people can by and large get used to it and stop being confused about this issue. ;P + + +Installation +------------------------------------------------------------------------------ + npm install utf8 + + +Usage +------------------------------------------------------------------------------- +``` javascript +var utf8 = require('utf8'); + +var c = utf8.encode("私は") +utf8.decode(c); +``` + + +Questions, Comments? +-------------------------------------------------------------------------------- +**[@ryanmcgrath](http://twitter.com/ryanmcgrath/)**