Initial
This commit is contained in:
commit
45627ad72b
4 changed files with 113 additions and 0 deletions
1
LICENSE
Normal file
1
LICENSE
Normal file
|
|
@ -0,0 +1 @@
|
|||
This work is licensed under the Creative Commons Attribution 3.0 Unported License. To view a copy of this license, visit http://creativecommons.org/licenses/by/3.0/ or send a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA.
|
||||
54
lib/utf8.js
Normal file
54
lib/utf8.js
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
/* Utf8.js
|
||||
*
|
||||
* A port of the Utf8 encoding functions written by Chris Veness, about as far
|
||||
* back as 2002. I simply ported this to Node's module architecture and pushed it to
|
||||
* npm (package manager), because... well, this is a ridiculously useful module to NOT have. ;P
|
||||
*
|
||||
* @Authors: Chris Veness, Ryan McGrath <ryan@venodesigns.net>
|
||||
* @Requires: Nothing
|
||||
*/
|
||||
|
||||
/* Encode multi-byte Unicode string into utf-8 multiple single-byte characters
|
||||
* (BMP / basic multilingual plane only)
|
||||
*
|
||||
* Chars in range U+0080 - U+07FF are encoded in 2 chars, U+0800 - U+FFFF in 3 chars
|
||||
*
|
||||
* @param {String} strUni Unicode string to be encoded as UTF-8
|
||||
* @returns {String} encoded string
|
||||
*/
|
||||
exports.encode = function(strUni) {
|
||||
/* Use regular expressions & String.replace callback function for better efficiency
|
||||
* than procedural approaches
|
||||
*/
|
||||
var strUtf = strUni.replace(/[\u0080-\u07ff]/g, function(c) {
|
||||
var cc = c.charCodeAt(0);
|
||||
return String.fromCharCode(0xc0 | cc>>6, 0x80 | cc&0x3f);
|
||||
});
|
||||
|
||||
strUtf = strUtf.replace(/[\u0800-\uffff]/g, function(c) {
|
||||
var cc = c.charCodeAt(0);
|
||||
return String.fromCharCode(0xe0 | cc>>12, 0x80 | cc>>6&0x3F, 0x80 | cc&0x3f);
|
||||
});
|
||||
|
||||
return strUtf;
|
||||
};
|
||||
|
||||
/* Decode utf-8 encoded string back into multi-byte Unicode characters
|
||||
*
|
||||
* @param {String} strUtf UTF-8 string to be decoded back to Unicode
|
||||
* @returns {String} decoded string
|
||||
*/
|
||||
exports.decode = function(strUtf) {
|
||||
/* note: decode 3-byte chars first as decoded 2-byte strings could appear to be 3-byte char! */
|
||||
var strUni = strUtf.replace(/[\u00e0-\u00ef][\u0080-\u00bf][\u0080-\u00bf]/g, function(c) {
|
||||
var cc = ((c.charCodeAt(0)&0x0f)<<12) | ((c.charCodeAt(1)&0x3f)<<6) | ( c.charCodeAt(2)&0x3f);
|
||||
return String.fromCharCode(cc);
|
||||
});
|
||||
|
||||
strUni = strUni.replace(/[\u00c0-\u00df][\u0080-\u00bf]/g, function(c) {
|
||||
var cc = (c.charCodeAt(0)&0x1f)<<6 | c.charCodeAt(1)&0x3f;
|
||||
return String.fromCharCode(cc);
|
||||
});
|
||||
|
||||
return strUni;
|
||||
};
|
||||
32
package.json
Normal file
32
package.json
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"name": "utf8",
|
||||
"description": "Basic Utf-8 encoding/decoding library to alleviate confusion among people.",
|
||||
"version": "1.0.0",
|
||||
"author": "Ryan McGrath <ryan@venodesigns.net>",
|
||||
|
||||
"repository": {
|
||||
"type" : "git",
|
||||
"url": "https://ryanmcgrath@github.com/ryanmcgrath/node-utf8.git"
|
||||
},
|
||||
|
||||
"bugs": {
|
||||
"web": "http://github.com/ryanmcgrath/node-utf8/issues"
|
||||
},
|
||||
|
||||
"os": [ "linux", "darwin", "freebsd" ],
|
||||
|
||||
"directories": {
|
||||
"lib": "./lib/"
|
||||
},
|
||||
|
||||
"main": "./lib/utf8",
|
||||
|
||||
"engines": {
|
||||
"node": ">=0.1.97"
|
||||
},
|
||||
|
||||
"licenses": [{
|
||||
"type" : "Creative Commons 3.0",
|
||||
"url" : "http://github.com/ryanmcgrath/node-utf8/raw/master/LICENSE"
|
||||
}]
|
||||
}
|
||||
26
readme.md
Normal file
26
readme.md
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
Node Utf-8 Encoding/Decoding Functions
|
||||
==============================================================================
|
||||
This is a port of the work done by Chris Veness over the past ten years in relation
|
||||
to encoding and decoding Utf-8 data in a JavaScript environment. I didn't write the core
|
||||
of this, merely modified it to work with Node's package structure and be published on npm, where
|
||||
people can by and large get used to it and stop being confused about this issue. ;P
|
||||
|
||||
|
||||
Installation
|
||||
------------------------------------------------------------------------------
|
||||
npm install utf8
|
||||
|
||||
|
||||
Usage
|
||||
-------------------------------------------------------------------------------
|
||||
``` javascript
|
||||
var utf8 = require('utf8');
|
||||
|
||||
var c = utf8.encode("私は")
|
||||
utf8.decode(c);
|
||||
```
|
||||
|
||||
|
||||
Questions, Comments?
|
||||
--------------------------------------------------------------------------------
|
||||
**[@ryanmcgrath](http://twitter.com/ryanmcgrath/)**
|
||||
Reference in a new issue