This commit is contained in:
Ryan McGrath 2011-05-30 15:00:10 +09:00
commit 45627ad72b
4 changed files with 113 additions and 0 deletions

1
LICENSE Normal file
View file

@ -0,0 +1 @@
This work is licensed under the Creative Commons Attribution 3.0 Unported License. To view a copy of this license, visit http://creativecommons.org/licenses/by/3.0/ or send a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA.

54
lib/utf8.js Normal file
View file

@ -0,0 +1,54 @@
/* Utf8.js
*
* A port of the Utf8 encoding functions written by Chris Veness, about as far
* back as 2002. I simply ported this to Node's module architecture and pushed it to
* npm (package manager), because... well, this is a ridiculously useful module to NOT have. ;P
*
* @Authors: Chris Veness, Ryan McGrath <ryan@venodesigns.net>
* @Requires: Nothing
*/
/* Encode multi-byte Unicode string into utf-8 multiple single-byte characters
* (BMP / basic multilingual plane only)
*
* Chars in range U+0080 - U+07FF are encoded in 2 chars, U+0800 - U+FFFF in 3 chars
*
* @param {String} strUni Unicode string to be encoded as UTF-8
* @returns {String} encoded string
*/
exports.encode = function(strUni) {
/* Use regular expressions & String.replace callback function for better efficiency
* than procedural approaches
*/
var strUtf = strUni.replace(/[\u0080-\u07ff]/g, function(c) {
var cc = c.charCodeAt(0);
return String.fromCharCode(0xc0 | cc>>6, 0x80 | cc&0x3f);
});
strUtf = strUtf.replace(/[\u0800-\uffff]/g, function(c) {
var cc = c.charCodeAt(0);
return String.fromCharCode(0xe0 | cc>>12, 0x80 | cc>>6&0x3F, 0x80 | cc&0x3f);
});
return strUtf;
};
/* Decode utf-8 encoded string back into multi-byte Unicode characters
*
* @param {String} strUtf UTF-8 string to be decoded back to Unicode
* @returns {String} decoded string
*/
exports.decode = function(strUtf) {
/* note: decode 3-byte chars first as decoded 2-byte strings could appear to be 3-byte char! */
var strUni = strUtf.replace(/[\u00e0-\u00ef][\u0080-\u00bf][\u0080-\u00bf]/g, function(c) {
var cc = ((c.charCodeAt(0)&0x0f)<<12) | ((c.charCodeAt(1)&0x3f)<<6) | ( c.charCodeAt(2)&0x3f);
return String.fromCharCode(cc);
});
strUni = strUni.replace(/[\u00c0-\u00df][\u0080-\u00bf]/g, function(c) {
var cc = (c.charCodeAt(0)&0x1f)<<6 | c.charCodeAt(1)&0x3f;
return String.fromCharCode(cc);
});
return strUni;
};

32
package.json Normal file
View file

@ -0,0 +1,32 @@
{
"name": "utf8",
"description": "Basic Utf-8 encoding/decoding library to alleviate confusion among people.",
"version": "1.0.0",
"author": "Ryan McGrath <ryan@venodesigns.net>",
"repository": {
"type" : "git",
"url": "https://ryanmcgrath@github.com/ryanmcgrath/node-utf8.git"
},
"bugs": {
"web": "http://github.com/ryanmcgrath/node-utf8/issues"
},
"os": [ "linux", "darwin", "freebsd" ],
"directories": {
"lib": "./lib/"
},
"main": "./lib/utf8",
"engines": {
"node": ">=0.1.97"
},
"licenses": [{
"type" : "Creative Commons 3.0",
"url" : "http://github.com/ryanmcgrath/node-utf8/raw/master/LICENSE"
}]
}

26
readme.md Normal file
View file

@ -0,0 +1,26 @@
Node Utf-8 Encoding/Decoding Functions
==============================================================================
This is a port of the work done by Chris Veness over the past ten years in relation
to encoding and decoding Utf-8 data in a JavaScript environment. I didn't write the core
of this, merely modified it to work with Node's package structure and be published on npm, where
people can by and large get used to it and stop being confused about this issue. ;P
Installation
------------------------------------------------------------------------------
npm install utf8
Usage
-------------------------------------------------------------------------------
``` javascript
var utf8 = require('utf8');
var c = utf8.encode("私は")
utf8.decode(c);
```
Questions, Comments?
--------------------------------------------------------------------------------
**[@ryanmcgrath](http://twitter.com/ryanmcgrath/)**