Initial, etc

2012-04-11 06:30:19 -04:00 · 2012-04-11 06:30:19 -04:00 · 4e04fbe997
commit 4e04fbe997
6 changed files with 524 additions and 0 deletions
--- a/21
+++ b/21
@ -0,0 +1,21 @@
 The MIT License
 Copyright (c) 2009 - 2010 Ryan McGrath
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
--- a/jTransliterate/init.py
+++ b/jTransliterate/init.py
@ -0,0 +1,254 @@
 # -*- coding: utf-8 -*- 
 #!/usr/bin/python
 __author__ = "Ryan McGrath <ryan@venodesigns.net>"
 __version__ = "1.0"
 """
    A class that allows for easy transliteration of [Hirag/Katak]ana
    and English/Latin. Much of the work here is inspired/inherited/etc
    from Kim Ahlström and his work on "Ve", built in Ruby. 
    Credit where credit is due:
    https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb
 """
 import re
 # Lookup tables for character conversions. Much of this is borrowed from the work of
 # Kim Ahlström and Ve: https://github.com/Kimtaro/ve/
 #
 # Ve's Transliterators are written in Ruby, and I wanted Python. Consider it a nice port. ;)
 from translation_maps import H_SYLLABIC_N, H_SMALL_TSU, HIRA_TO_LATN, LATN_TO_HIRA
 def defaultToSelfText(fn):
    """
        A fun little decorator that makes it so we can default to
        the text stored on a class instance, but also let people just
        instantiate and re-use calls while supplying new text. Whee.
    """
    def wrapper(self, text = None):
        if text is None:
            text = self.text
        return fn(self, text = text)
    return wrapper
 class JapaneseTransliterator(object):
    def __init__(self, text):
        """__init__(self, text)
            JapaneseTransliterator("fadjfnjsfnjsafnjsdnf")
            I envisioned storing the original text on the instantiated object
            itself, and allowing it to be overridden on a per-function-call basis.
            So I did.
            Parameters:
                 text - Text to be operated on. Unicode please!
        """
        self.text = text
    @defaultToSelfText
    def transliterate_from_hrkt_to_latn(self, text):
        """transliterate_from_hrkt_to_latn(self, text)
            Transliterates from [Hirag/Katak]ana to Latin/En.
            Parameters:
                text - Optional. Use different text than what's on
                    the class instance.
        """
        text = self.transliterate_from_kana_to_hira(text)
        return self.transliterate_from_hira_to_latn(text)
    @defaultToSelfText    
    def transliterate_from_hira_to_latn(self, text):
        """transliterate_from_hira_to_latn(self, text)
            Transliterates from Hiragana to Latin/En. Phonetics, that is. 
            Parameters:
                text - Optional. Use different text than what's on
                    the class instance.
        """
        # Decode once, not twice   
        _H_SMALL_TSU = H_SMALL_TSU.decode('utf-8')
        _H_SYLLABIC_N = H_SYLLABIC_N.decode('utf-8')
        kana = (text * 1).decode('utf-8')
        romaji = ''
        geminate = False
        index = 0
        klength = len(kana)
        while klength > 0:
            for length in [2, 1]:
                mora = ''
                for_conversion = kana[index:(index + length)]
                if for_conversion == _H_SMALL_TSU:
                    geminate = True
                    index += length
                    klength -= length
                    break
                elif for_conversion == _H_SYLLABIC_N and re.match(u'[\u3084-\u3088]', kana[(index + 1):(index + 2)]):
                    # Syllabic N before ya, yu or yo
                    mora = "n'"
                elif for_conversion in HIRA_TO_LATN:
                    mora = HIRA_TO_LATN[for_conversion]
                if len(mora) > 0:
                    if geminate:
                        geminate = False
                        romaji += mora[index:index + 1]
                    romaji += mora
                    index += length
                    klength -= length
                    break
                elif length == 1:
                    romaji += for_conversion
                    index += length
                    klength -= length
        return romaji
    @defaultToSelfText
    def transliterate_from_latn_to_hrkt(self, text):
        """transliterate_from_latn_to_hrkt(self, text)
            Transliterates from Latin/En to Hiragana (mostly).
            Parameters:
                text - Optional. Use different text than what's on
                    the class instance.
        """
        # Duplicate the text...
        romaji = text * 1
        kana = ''
        romaji = re.sub('/m([BbPp])/', 'n\1', romaji)
        romaji = re.sub('/M([BbPp])/', 'N\1', romaji)
        index = 0
        rlength = len(romaji) - 1
        while rlength > 0:
            for for_removal in [3, 2, 1]:
                mora = ''
                for_conversion = romaji[index:(index + for_removal)]
                is_upper = True if re.search('[A-Z][^A-Z]*', for_conversion) else False
                for_conversion = for_conversion.lower()
                if re.match('/nn[aiueo]/', for_conversion):
                    mora = H_SYLLABIC_N
                    for_removal = 1
                elif for_conversion in LATN_TO_HIRA:
                    mora = LATN_TO_HIRA[for_conversion]
                elif for_conversion == 'tch' or (for_removal == 2  and re.match('/([kgsztdnbpmyrlwc])\1/', for_conversion)):
                    mora = H_SMALL_TSU
                    for_removal = 1
                if mora != '':
                    if is_upper:
                        kana += self.transliterate_from_hira_to_kana(text = (mora * 1))
                    else:
                        kana += mora
                    index += for_removal
                    rlength -= for_removal
                    break
                elif for_removal == 1:
                    kana += for_conversion
                    index += 1
                    rlength -= 1
        return kana
    @defaultToSelfText
    def transliterate_from_kana_to_hira(self, text):
        """transliterate_from_kana_to_hira(self, text)
            Transliterates from Katakana to Hiragana.
            Parameters:
                text - Optional. Use different text than what's on
                    the class instance.
        """
        return JapaneseTransliterator.transpose_codepoints_in_range(text, -96, 12449, 12534)
    @defaultToSelfText
    def transliterate_from_hira_to_kana(self, text):        
        """transliterate_from_hira_to_kana(self, text)
            Transliterates from Hiragana to Katakana.
            Parameters:
                text - Optional. Use different text than what's on
                    the class instance.
        """        
        return JapaneseTransliterator.transpose_codepoints_in_range(text, 96, 12353, 12438)
    @defaultToSelfText
    def transliterate_from_fullwidth_to_halfwidth(self, text):
        """transliterate_from_fullwidth_to_halfwidth(self, text)
            Transliterates from full-width to half-width.
            Parameters:
                text - Optional. Use different text than what's on
                    the class instance.
        """
        text = JapaneseTransliterator.transpose_codepoints_in_range(text, -65248, 65281, 65374)
        return JapaneseTransliterator.transpose_codepoints_in_range(text, -12256, 12288, 12288)
    @defaultToSelfText
    def transliterate_from_halfwidth_to_fullwidth(self, text):
        """transliterate_from_fullwidth_to_halfwidth(self, text)
            Transliterates from half-width to full-width.
            Parameters:
                text - Optional. Use different text than what's on
                    the class instance.
        """
        text = JapaneseTransliterator.transpose_codepoints_in_range(text, 65248, 33, 126)
        return JapaneseTransliterator.transpose_codepoints_in_range(text, 12256, 32, 32)
    @staticmethod
    def transpose_codepoints_in_range(text, distance, range_start, range_end):
        """JapaneseTransliterator.transpose_codepoints_in_range(text, distance, range_start, range_end)
            Given a set of text (unicode...), coupled with distance and range, transposes
            it for a corresponding swap and returns the new set.
            Parameters:
                text - text to be transposed, codepoint-wise
                distance - to the other side of the map
                range_start - start of the range we're interested in, codepont-wise
                range_end - end of the range we're interested in, codepoint-wise
            Returns:
                string, text, etc
        """
        if not isinstance(text, unicode):
            # Python will raise a UnicodeEncodeError here if there are any
            # outstanding issues, otherwise things should be fine. *shrug*
            text = unicode(text, 'utf-8')
        transposed_text = u''
        codepoints = map(lambda char: ord(char), list(text))
        for codepoint in codepoints:
            print codepoint
            if codepoint >= range_start and codepoint <= range_end:
                transposed_text += unichr(codepoint + distance)
            else:
                transposed_text += unichr(codepoint)
        return transposed_text
--- a/jTransliterate/test.py
+++ b/jTransliterate/test.py
@ -0,0 +1,30 @@
 # -*- coding: utf-8 -*-
 from __init__ import JapaneseTransliterator
 # Transliterate from Latin/English to [Hirag/Katak]ana
 x = JapaneseTransliterator('kanazawa')
 print x.transliterate_from_latn_to_hrkt()
 # Should print "かなざわ"
 # Transliterate from Hiragana to Latin/English
 b = JapaneseTransliterator('かなざわ')
 print b.transliterate_from_hira_to_latn()
 # Should print "kanazawa"
 # Transliterate from either Hiragana or Katakana to Latin/English
 print b.transliterate_from_hrkt_to_latn(text = 'カナザワ')
 # Should print "kanazawa"
 # Transliterate from Katakan to Hiragana (You... probably never need to do this)
 print b.transliterate_from_kana_to_hira(text = 'キットカート')
 # Should print "きっとかーと"
 # Transliterate from Hiragana to Katakana
 print b.transliterate_from_hira_to_kana(text = 'かなざわ')
 # Should print "カナザワ" 
 # If you want to convert between half/full width kana, you can use the following
 # functions. I didn't care enough to do demos here. ;|
 b.transliterate_from_halfwidth_to_fullwidth()
 b.transliterate_from_fullwidth_to_halfwidth()
--- a/jTransliterate/translation_maps.py
+++ b/jTransliterate/translation_maps.py
@ -0,0 +1,128 @@
 # -*- coding: utf-8 -*-
 H_SYLLABIC_N= 'ん'
 H_SMALL_TSU = 'っ'
 """
 Python sucks with regards to unicode-fun, but I'm leaving this here as
 a fun reference for anyone deciphering all this. Enjoy. -- Ryan
 HIRA_TO_LATN = {
    "あ":"a", "い":"i", "う":"", "え":"e", "お":"o",
    "か":"ka", "き":"ki", "く":"k", "け":"ke", "こ":"ko",
    "が":"ga", "ぎ":"gi", "ぐ":"g", "げ":"ge", "ご":"go",
    "さ":"sa", "し":"shi", "す":"s", "せ":"se", "そ":"so",
    "ざ":"za", "じ":"ji", "ず":"z", "ぜ":"ze", "ぞ":"zo",
    "た":"ta", "ち":"chi", "つ":"ts", "て":"te", "と":"to",
    "だ":"da", "ぢ":"ji", "づ":"z", "で":"de", "ど":"do",
    "な":"na", "に":"ni", "ぬ":"n", "ね":"ne", "の":"no",
    "は":"ha", "ひ":"hi", "ふ":"f", "へ":"he", "ほ":"ho",
    "ば":"ba", "び":"bi", "ぶ":"b", "べ":"be", "ぼ":"bo",
    "ぱ":"pa", "ぴ":"pi", "ぷ":"p", "ぺ":"pe", "ぽ":"po",
    "ま":"ma", "み":"mi", "む":"m", "め":"me", "も":"mo",
    "や":"ya", "ゆ":"y", "よ":"yo",
    "ら":"ra", "り":"ri", "る":"r", "れ":"re", "ろ":"ro",
    "わ":"wa", "うぃ":"whi", "うぇ":"whe", "を":"wo",
    "ゑ":"wye", "ゐ":"wyi", "ー":"-", "ん":"n",
    "きゃ":"kya", "きゅ":"ky", "きょ":"kyo", "きぇ":"kye", "きぃ":"kyi",
    "ぎゃ":"gya", "ぎゅ":"gy", "ぎょ":"gyo", "ぎぇ":"gye", "ぎぃ":"gyi",
    "くぁ":"kwa", "くぃ":"kwi", "くぅ":"kw", "くぇ":"kwe", "くぉ":"kwo",
    "ぐぁ":"qwa", "ぐぃ":"gwi", "ぐぅ":"gw", "ぐぇ":"gwe", "ぐぉ":"gwo",
    "しゃ":"sha", "しぃ":"syi", "しゅ":"sh", "しぇ":"she", "しょ":"sho",
    "じゃ":"jya", "じゅ":"zy", "じぇ":"zye", "じょ":"zyo", "じぃ":"zyi",
    "すぁ":"swa", "すぃ":"swi", "すぅ":"sw", "すぇ":"swe", "すぉ":"swo",
    "ちゃ":"tya", "ちゅ":"ty", "ちぇ":"tye", "ちょ":"tyo", "ちぃ":"tyi",
    "ぢゃ":"dya", "ぢぃ":"dyi", "ぢゅ":"dy", "ぢぇ":"dye", "ぢょ":"dyo",
    "つぁ":"tsa", "つぃ":"tsi", "つぇ":"tse", "つぉ":"tso", "てゃ":"tha",
    "てぃ":"thi", "てゅ":"th", "てぇ":"the", "てょ":"tho", "とぁ":"twa",
    "とぃ":"twi", "とぅ":"tw", "とぇ":"twe", "とぉ":"two", "でゃ":"dha",
    "でぃ":"dhi", "でゅ":"dh", "でぇ":"dhe", "でょ":"dho", "どぁ":"dwa",
    "どぃ":"dwi", "どぅ":"dw", "どぇ":"dwe", "どぉ":"dwo", "にゃ":"nya",
    "にゅ":"ny", "にょ":"nyo", "にぇ":"nye", "にぃ":"nyi", "ひゃ":"hya",
    "ひぃ":"hyi", "ひゅ":"hy", "ひぇ":"hye", "ひょ":"hyo", "びゃ":"bya",
    "びぃ":"byi", "びゅ":"by", "びぇ":"bye", "びょ":"byo", "ぴゃ":"pya",
    "ぴぃ":"pyi", "ぴゅ":"py", "ぴぇ":"pye", "ぴょ":"pyo", "ふぁ":"fwa",
    "ふぃ":"fyi", "ふぇ":"fye", "ふぉ":"fwo", "ふぅ":"fw", "ふゃ":"fya",
    "ふゅ":"fy", "ふょ":"fyo", "みゃ":"mya", "みぃ":"myi", "みゅ":"my",
    "みぇ":"mye", "みょ":"myo", "りゃ":"rya", "りぃ":"ryi", "りゅ":"ry",
    "りぇ":"rye", "りょ":"ryo",
    "ゔぁ":"va", "ゔぃ":"vyi", "ゔ":"v", "ゔぇ":"vye", "ゔぉ":"vo",
    "ゔゃ":"vya", "ゔゅ":"vy", "ゔょ":"vyo",
    "うぁ":"wha", "いぇ":"ye", "うぉ":"who",
    "ぁ":"xa", "ぃ":"xi", "ぅ":"x", "ぇ":"xe", "ぉ":"xo",
    "ゕ":"xka", "ゖ":"xke", "ゎ":"xwa"
 }
 """
 HIRA_TO_LATN = {u'\u3057\u3047': 'she', u'\u3057\u3043': 'syi', u'\u308b': 'ru', u'\u3093': 'n', u'\u3074\u3047': 'pye', u'\u3074\u3043': 'pyi', u'\u304d\u3085': 'kyu', u'\u304d\u3087': 'kyo', u'\u304d\u3083': 'kya', u'\u3067\u3047': 'dhe', u'\u3050\u3041': 'qwa', u'\u3067\u3043': 'dhi', u'\u3094\u3087': 'vyo', u'\u308a\u3043': 'ryi', u'\u3094\u3083': 'vya', u'\u3048': 'e', u'\u3050': 'gu', u'\u3058': 'ji', u'\u3060': 'da', u'\u3064\u3049': 'tso', u'\u3064\u3047': 'tse', u'\u3064\u3043': 'tsi', u'\u3064\u3041': 'tsa', u'\u3070': 'ba', u'\u3078': 'he', u'\u3080': 'mu', u'\u3088': 'yo', u'\u3043': 'xi', u'\u3090': 'wyi', u'\u3050\u3043': 'gwi', u'\u3072': 'hi', u'\u3050\u3047': 'gwe', u'\u3050\u3045': 'gwu', u'\u3050\u3049': 'gwo', u'\u3057\u3087': 'sho', u'\u3057\u3085': 'shu', u'\u3057\u3083': 'sha', u'\u304b': 'ka', u'\u3053': 'ko', u'\u3074\u3087': 'pyo', u'\u305b': 'se', u'\u3074\u3085': 'pyu', u'\u3074\u3083': 'pya', u'\u304d\u3047': 'kye', u'\u3068\u3041': 'twa', u'\u304d\u3043': 'kyi', u'\u306b': 'ni', u'\u3067\u3087': 'dho', u'\u3067\u3085': 'dhu', u'\u3067\u3083': 'dha', u'\u3094\u3049': 'vo', u'\u3094\u3047': 'vye', u'\u307b': 'ho', u'\u3094\u3043': 'vyi', u'\u3094\u3041': 'va', u'\u3081': 'me', u'\u3089': 'ra', u'\u3091': 'wye', u'\u3046\u3041': 'wha', u'\u3046\u3043': 'whi', u'\u3046\u3047': 'whe', u'\u3073\u3083': 'bya', u'\u3046\u3049': 'who', u'\u3073\u3087': 'byo', u'\u3073\u3085': 'byu', u'\u3066\u3083': 'tha', u'\u3066\u3085': 'thu', u'\u3066\u3087': 'tho', u'\u3046': 'u', u'\u304e': 'gi', u'\u3056': 'za', u'\u308a\u3047': 'rye', u'\u305e': 'zo', u'\u3094\u3085': 'vyu', u'\u3066': 'te', u'\u306e': 'no', u'\u3076': 'bu', u'\u307e': 'ma', u'\u3059\u3049': 'swo', u'\u3086': 'yu', u'\u3059\u3041': 'swa', u'\u3059\u3043': 'swi', u'\u3059\u3045': 'swu', u'\u3059\u3047': 'swe', u'\u308e': 'xwa', u'\u3096': 'xke', u'\u308a\u3085': 'ryu', u'\u308a\u3087': 'ryo', u'\u308a\u3083': 'rya', u'\u3073': 'bi', u'\u3069\u3049': 'dwo', u'\u3069\u3041': 'dwa', u'\u3069\u3043': 'dwi', u'\u3069\u3045': 'dwu', u'\u3069\u3047': 'dwe', u'\u3041': 'xa', u'\u3049': 'xo', u'\u3051': 'ke', u'\u3073\u3043': 'byi', u'\u3073\u3047': 'bye', u'\u3061': 'chi', u'\u3069': 'do', u'\u3071': 'pa', u'\u3066\u3043': 'thi', u'\u3066\u3047': 'the', u'\u3079': 'be', u'\u308f': 'wa', u'\u3062\u3085': 'dyu', u'\u3062\u3087': 'dyo', u'\u3062\u3083': 'dya', u'\u307f\u3087': 'myo', u'\u307f\u3085': 'myu', u'\u307f\u3083': 'mya', u'\u3044': 'i', u'\u304c': 'ga', u'\u3072\u3085': 'hyu', u'\u3072\u3087': 'hyo', u'\u3054': 'go', u'\u3072\u3083': 'hya', u'\u305c': 'ze', u'\u3064': 'tsu', u'\u304f\u3049': 'kwo', u'\u304f\u3047': 'kwe', u'\u304f\u3045': 'kwu', u'\u304f\u3043': 'kwi', u'\u306c': 'nu', u'\u304f\u3041': 'kwa', u'\u3074': 'pi', u'\u3068': 'to', u'\u307c': 'bo', u'\u3084': 'ya', u'\u308c': 're', u'\u3072\u3047': 'hye', u'\u3094': 'vu', u'\u3072\u3043': 'hyi', u'\u3045': 'xu', u'\u3047': 'xe', u'\u304f': 'ku', u'\u3057': 'shi', u'\u305f': 'ta', u'\u3062\u3047': 'dye', u'\u3067': 'de', u'\u3062\u3043': 'dyi', u'\u306f': 'ha', u'\u3077': 'pu', u'\u307f\u3047': 'mye', u'\u307f\u3043': 'myi', u'\u30fc': '-', u'\u307f': 'mi', u'\u306b\u3083': 'nya', u'\u306b\u3087': 'nyo', u'\u306b\u3085': 'nyu', u'\u308d': 'ro', u'\u3059': 'su', u'\u3095': 'xka', u'\u304e\u3043': 'gyi', u'\u304e\u3047': 'gye', u'\u3042': 'a', u'\u3058\u3043': 'zyi', u'\u304a': 'o', u'\u3058\u3047': 'zye', u'\u3052': 'ge', u'\u3075\u3049': 'fwo', u'\u3075\u3045': 'fwu', u'\u3075\u3047': 'fye', u'\u305a': 'zu', u'\u3075\u3041': 'fwa', u'\u3075\u3043': 'fyi', u'\u3061\u3083': 'tya', u'\u3062': 'ji', u'\u3061\u3085': 'tyu', u'\u3061\u3087': 'tyo', u'\u306a': 'na', u'\u3044\u3047': 'ye', u'\u3068\u3049': 'two', u'\u3068\u3043': 'twi', u'\u307a': 'pe', u'\u3068\u3047': 'twe', u'\u3068\u3045': 'twu', u'\u3082': 'mo', u'\u3058\u3083': 'jya', u'\u308a': 'ri', u'\u3058\u3087': 'zyo', u'\u3058\u3085': 'zyu', u'\u3092': 'wo', u'\u3075\u3085': 'fyu', u'\u3075\u3087': 'fyo', u'\u3075\u3083': 'fya', u'\u3061\u3043': 'tyi', u'\u3061\u3047': 'tye', u'\u306b\u3043': 'nyi', u'\u306b\u3047': 'nye', u'\u304d': 'ki', u'\u3055': 'sa', u'\u305d': 'so', u'\u3065': 'zu', u'\u304e\u3083': 'gya', u'\u306d': 'ne', u'\u304e\u3085': 'gyu', u'\u304e\u3087': 'gyo', u'\u3075': 'fu', u'\u307d': 'po'}
 LATN_TO_HIRA = {
    'a': 'あ', 'i': 'い', 'u': 'う', 'e': 'え', 'o': 'お',
    'ka': 'か', 'ki': 'き', 'ku': 'く', 'ke': 'け', 'ko': 'こ',
    'ga': 'が', 'gi': 'ぎ', 'gu': 'ぐ', 'ge': 'げ', 'go': 'ご',
    'sa': 'さ', 'si': 'し', 'shi': 'し', 'su': 'す', 'se': 'せ', 'so': 'そ',
    'za': 'ざ', 'zi': 'じ', 'ji': 'じ', 'zu': 'ず', 'ze': 'ぜ', 'zo': 'ぞ',
    'ta': 'た', 'ti': 'ち', 'chi': 'ち', 'tu': 'つ', 'tsu': 'つ', 'te': 'て','to': 'と',
    'da': 'だ', 'di': 'ぢ', 'du': 'づ', 'dzu': 'づ', 'de': 'で','do': 'ど',
    'na': 'な', 'ni': 'に', 'nu': 'ぬ','ne': 'ね','no': 'の',
    'ha': 'は', 'hi': 'ひ', 'hu': 'ふ', 'fu': 'ふ', 'he': 'へ','ho': 'ほ',
    'ba': 'ば', 'bi': 'び', 'bu': 'ぶ','be': 'べ','bo': 'ぼ',
    'pa': 'ぱ', 'pi': 'ぴ', 'pu': 'ぷ','pe': 'ぺ','po': 'ぽ',
    'ma': 'ま', 'mi': 'み', 'mu': 'む','me': 'め','mo': 'も',
    'ya': 'や', 'yu': 'ゆ', 'yo': 'よ',
    'ra': 'ら', 'ri': 'り', 'ru': 'る','re': 'れ','ro': 'ろ',
    'la': 'ら', 'li': 'り', 'lu': 'る','le': 'れ','lo': 'ろ',
    'wa': 'わ', 'wi': 'うぃ', 'we': 'うぇ', 'wo': 'を',
    'wye': 'ゑ', 'wyi': 'ゐ', '-': 'ー',
    'n': 'ん', 'nn': 'ん', "n'": 'ん',
    'kya': 'きゃ', 'kyu': 'きゅ', 'kyo': 'きょ', 'kye': 'きぇ', 'kyi': 'きぃ',
    'gya': 'ぎゃ', 'gyu': 'ぎゅ', 'gyo': 'ぎょ', 'gye': 'ぎぇ', 'gyi': 'ぎぃ',
    'kwa': 'くぁ', 'kwi': 'くぃ', 'kwu': 'くぅ', 'kwe': 'くぇ', 'kwo': 'くぉ',
    'gwa': 'ぐぁ', 'gwi': 'ぐぃ', 'gwu': 'ぐぅ', 'gwe': 'ぐぇ', 'gwo': 'ぐぉ',
    'qwa': 'ぐぁ', 'gwi': 'ぐぃ', 'gwu': 'ぐぅ', 'gwe': 'ぐぇ', 'gwo': 'ぐぉ',
    'sya': 'しゃ', 'syi': 'しぃ', 'syu': 'しゅ', 'sye': 'しぇ', 'syo': 'しょ',
    'sha': 'しゃ','shu': 'しゅ', 'she': 'しぇ', 'sho': 'しょ',
    'ja': 'じゃ','ju': 'じゅ', 'je': 'じぇ', 'jo': 'じょ',
    'jya': 'じゃ', 'jyi': 'じぃ', 'jyu': 'じゅ', 'jye': 'じぇ', 'jyo': 'じょ',
    'zya': 'じゃ', 'zyu': 'じゅ', 'zyo': 'じょ', 'zye': 'じぇ', 'zyi': 'じぃ',
    'swa': 'すぁ', 'swi': 'すぃ', 'swu': 'すぅ', 'swe': 'すぇ', 'swo': 'すぉ',
    'cha': 'ちゃ','chu': 'ちゅ', 'che': 'ちぇ', 'cho': 'ちょ',
    'cya': 'ちゃ', 'cyi': 'ちぃ', 'cyu': 'ちゅ', 'cye': 'ちぇ', 'cyo': 'ちょ',
    'tya': 'ちゃ', 'tyi': 'ちぃ', 'tyu': 'ちゅ', 'tye': 'ちぇ', 'tyo': 'ちょ',
    'dya': 'ぢゃ', 'dyi': 'ぢぃ', 'dyu': 'ぢゅ', 'dye': 'ぢぇ', 'dyo': 'ぢょ',
    'tsa': 'つぁ', 'tsi': 'つぃ','tse': 'つぇ', 'tso': 'つぉ',
    'tha': 'てゃ', 'thi': 'てぃ', 'thu': 'てゅ', 'the': 'てぇ', 'tho': 'てょ',
    'twa': 'とぁ', 'twi': 'とぃ', 'twu': 'とぅ', 'twe': 'とぇ', 'two': 'とぉ',
    'dha': 'でゃ', 'dhi': 'でぃ', 'dhu': 'でゅ', 'dhe': 'でぇ', 'dho': 'でょ',
    'dwa': 'どぁ', 'dwi': 'どぃ', 'dwu': 'どぅ', 'dwe': 'どぇ', 'dwo': 'どぉ',
    'nya': 'にゃ', 'nyu': 'にゅ', 'nyo': 'にょ', 'nye': 'にぇ', 'nyi': 'にぃ',
    'hya': 'ひゃ', 'hyi': 'ひぃ', 'hyu': 'ひゅ', 'hye': 'ひぇ', 'hyo': 'ひょ',
    'bya': 'びゃ', 'byi': 'びぃ', 'byu': 'びゅ', 'bye': 'びぇ', 'byo': 'びょ',
    'pya': 'ぴゃ', 'pyi': 'ぴぃ', 'pyu': 'ぴゅ', 'pye': 'ぴぇ', 'pyo': 'ぴょ',
    'fa': 'ふぁ', 'fi': 'ふぃ','fe': 'ふぇ', 'fo': 'ふぉ',
    'fwa': 'ふぁ', 'fwi': 'ふぃ', 'fwu': 'ふぅ', 'fwe': 'ふぇ', 'fwo': 'ふぉ',
    'fya': 'ふゃ', 'fyi': 'ふぃ', 'fyu': 'ふゅ', 'fye': 'ふぇ', 'fyo': 'ふょ',
    'mya': 'みゃ', 'myi': 'みぃ', 'myu': 'みゅ', 'mye': 'みぇ', 'myo': 'みょ',
    'rya': 'りゃ', 'ryi': 'りぃ', 'ryu': 'りゅ', 'rye': 'りぇ', 'ryo': 'りょ',
    'lya': 'りゃ', 'lyu': 'りゅ', 'lyo': 'りょ', 'lye': 'りぇ', 'lyi': 'りぃ',
    'va': 'ゔぁ', 'vi': 'ゔぃ', 'vu': 'ゔ','ve': 'ゔぇ',  'vo': 'ゔぉ',
    'vya': 'ゔゃ', 'vyi': 'ゔぃ', 'vyu': 'ゔゅ', 'vye': 'ゔぇ', 'vyo': 'ゔょ',
    'wha': 'うぁ', 'whi': 'うぃ', 'ye': 'いぇ', 'whe': 'うぇ', 'who': 'うぉ',
    'xa': 'ぁ', 'xi': 'ぃ', 'xu': 'ぅ', 'xe': 'ぇ', 'xo': 'ぉ',
    'xya': 'ゃ', 'xyu': 'ゅ', 'xyo': 'ょ',
    'xtu': 'っ', 'xtsu': 'っ',
    'xka': 'ゕ', 'xke': 'ゖ', 'xwa': 'ゎ',
    '@@': '　', '#[': '「', '#]': '」', '#,': '、', '#.': '。', '#/': '・',
 }
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,57 @@
 jTransliterate - [Hirag/Katak]ana to Latin/English & Back
 ===========================================================================
 Sometimes you may want to convert from Hiragana to Katakana, or back again, or...
 I dunno, maybe you wanna get the English pronunciation of these words. I'll
 be honest and say it's of no concern or interest to me, but I needed this in
 Python and so I ported it, figured I'd release it.
 It's MIT licensed. Credit for much of this also belongs to Kim Ahlström and
 his linguistics/etc work on **[Ve](https://github.com/Kimtaro/ve/blob/master/lib/providers/japanese_transliterators.rb)**.
 Installation
 ---------------------------------------------------------------------------
    pip install jTransliterate
 Examples && Documentation
 ---------------------------------------------------------------------------
 ``` python
 # -*- coding: utf-8 -*-
 from jTransliterate import JapaneseTransliterator
 # Transliterate from Latin/English to [Hirag/Katak]ana
 x = JapaneseTransliterator('kanazawa')
 print x.transliterate_from_latn_to_hrkt()
 # Should print "かなざわ"
 # Transliterate from Hiragana to Latin/English
 b = JapaneseTransliterator('かなざわ')
 print b.transliterate_from_hira_to_latn()
 # Should print "kanazawa"
 # Transliterate from either Hiragana or Katakana to Latin/English
 print b.transliterate_from_hrkt_to_latn(text = 'カナザワ')
 # Should print "kanazawa"
 # Transliterate from Katakan to Hiragana (You... probably never need to do this)
 print b.transliterate_from_kana_to_hira(text = 'キットカート')
 # Should print "きっとかーと"
 # Transliterate from Hiragana to Katakana
 print b.transliterate_from_hira_to_kana(text = 'かなざわ')
 # Should print "カナザワ" 
 # If you want to convert between half/full width kana, you can use the following
 # functions. I didn't care enough to do demos here. ;|
 b.transliterate_from_halfwidth_to_fullwidth()
 b.transliterate_from_fullwidth_to_halfwidth()
 ```
 Questions, Comments, Complaints and/or etc
 ---------------------------------------------------------------------------
 Hit me up on them Twitters or find me on them internets at the links below.
 Twitter: **[@ryanmcgrath](http://twitter.com/ryanmcgrath/)**  
 Web: **[Veno Designs](http://venodesigns.net/)**  
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,34 @@
 #!/usr/bin/env python
 from setuptools import setup
 from setuptools import find_packages
 __author__ = 'Ryan McGrath <ryan@venodesigns.net>'
 __version__ = '1.0.0'
 setup(
    # Basic package information.
    name='jTransliterate',
    version=__version__,
    packages=find_packages(),
    # Packaging options.
    include_package_data=True,
    # Metadata for PyPI.
    author='Ryan McGrath',
    author_email='ryan@venodesigns.net',
    license='MIT License',
    url='http://github.com/ryanmcgrath/twython/tree/master',
    keywords='japanese translation transliterate katakana hiragana latin',
    description='Transliterate [Hirag/Katak]ana to Latin/English and back. Convert half/full-width Japanese text.',
    long_description=open('readme.md').read(),
    classifiers=[
        'Development Status :: 4 - Beta',
        'Intended Audience :: Developers',
        'License :: OSI Approved :: MIT License',
        'Topic :: Software Development :: Libraries :: Python Modules',
        'Topic :: Communications :: Chat',
        'Topic :: Internet'
    ]
 )