Source code for indic_transliteration.xsanscript
# -*- coding: utf-8 -*-
"""
This is a variant of :py:mod:`~indic_transliteration.sanscript` which supports more intuitive transliteration for non-sanskrit characters in Indian languages (like hrasva e and o in draviDian ones).
"""
import copy
from indic_transliteration import sanscript
import sys
SCHEMES = {}
# Brahmi schemes
# -------------
DEVANAGARI = 'devanagari'
KANNADA = 'kannada'
MALAYALAM = 'malayalam'
TAMIL = 'tamil'
TELUGU = 'telugu'
# Roman schemes
# -------------
HK = 'hk'
IAST = 'iast'
ITRANS = 'itrans'
OPTITRANS = 'optitrans'
KOLKATA = 'kolkata'
SLP1 = 'slp1'
VELTHUIS = 'velthuis'
WX = 'wx'
[docs]def transliterate(data, _from=None, _to=None, scheme_map=None, **kw):
"""Transliterate `data` with the given parameters::
output = transliterate('idam adbhutam', HK, DEVANAGARI)
Each time the function is called, a new :class:`SchemeMap` is created
to map the input scheme to the output scheme. This operation is fast
enough for most use cases. But for higher performance, you can pass a
pre-computed :class:`SchemeMap` instead::
scheme_map = SchemeMap(SCHEMES[HK], SCHEMES[DEVANAGARI])
output = transliterate('idam adbhutam', scheme_map=scheme_map)
:param data: the data to transliterate
:param _from: the name of a source scheme
:param _to: the name of a destination scheme
:param scheme_map: the :class:`SchemeMap` to use. If specified, ignore
`_from` and `_to`. If unspecified, create a
:class:`SchemeMap` from `_from` to `_to`.
"""
if scheme_map is None:
from_scheme = SCHEMES[_from]
to_scheme = SCHEMES[_to]
scheme_map = sanscript.SchemeMap(from_scheme, to_scheme)
return sanscript.transliterate(data=data, scheme_map=scheme_map)
def _setup():
"""Add a variety of default schemes."""
s = str.split
if sys.version_info < (3, 0):
# noinspection PyUnresolvedReferences
s = unicode.split
def pop_all(some_dict, some_list):
for scheme in some_list:
some_dict.pop(scheme)
global SCHEMES
SCHEMES = copy.deepcopy(sanscript.SCHEMES)
pop_all(SCHEMES, [sanscript.ORIYA, sanscript.BENGALI, sanscript.GUJARATI])
SCHEMES[DEVANAGARI].update({
'vowels': sanscript.SCHEMES[DEVANAGARI]['vowels'] + s("""ऎ ऒ"""),
'marks': sanscript.SCHEMES[DEVANAGARI]['marks'] + s("""ॆ ॊ"""),
'consonants': sanscript.SCHEMES[DEVANAGARI]['consonants'] + s("""ऩ ऱ ऴ""")
})
SCHEMES[HK].update({
'vowels': s("""a A i I u U R RR lR lRR E ai O au""") + s("""e o"""),
'marks': s("""A i I u U R RR lR lRR E ai O au""") + s("""e o"""),
'consonants': sanscript.SCHEMES[HK]['consonants'] + s("""n2 r2 zh""")
})
SCHEMES[ITRANS].update({
'vowels': s("""a A i I u U R RR LLi LLI E ai O au""") + s("""e o"""),
'marks': s("""A i I u U R RR LLi LLI E ai O au""") + s("""e o"""),
'consonants': sanscript.SCHEMES[ITRANS]['consonants'] + s("""n2 r2 zh""")
})
pop_all(SCHEMES[ITRANS].synonym_map, s("""e o"""))
SCHEMES[OPTITRANS].update({
'vowels': s("""a A i I u U R RR LLi LLI E ai O au""") + s("""e o"""),
'marks': s("""A i I u U R RR LLi LLI E ai O au""") + s("""e o"""),
'consonants': sanscript.SCHEMES[OPTITRANS]['consonants'] + s("""n2 r2 zh""")
})
pop_all(SCHEMES[OPTITRANS].synonym_map, s("""e o"""))
SCHEMES[IAST].update({
'vowels': sanscript.SCHEMES[ITRANS]['vowels'] + s("""ê ô"""),
'marks': sanscript.SCHEMES[ITRANS]['marks'] + s("""ê ô"""),
'consonants': sanscript.SCHEMES[ITRANS]['consonants'] + s("""n r̂ ḷ""")
})
SCHEMES[KANNADA].update({
'vowels': sanscript.SCHEMES[KANNADA]['vowels'] + s("""ಎ ಒ"""),
'marks': sanscript.SCHEMES[KANNADA]['marks'] + s("""ೆ ೊ"""),
'consonants': sanscript.SCHEMES[KANNADA]['consonants'] + s("""ऩ ಱ ೞ""")
})
SCHEMES[MALAYALAM].update({
'vowels': sanscript.SCHEMES[MALAYALAM]['vowels'] + s("""എ ഓ"""),
'marks': sanscript.SCHEMES[MALAYALAM]['marks'] + s("""െ ൊ"""),
'consonants': sanscript.SCHEMES[MALAYALAM]['consonants'] + s("""ഩ ള ൟ"""),
})
SCHEMES[TAMIL].update({
'vowels': sanscript.SCHEMES[TAMIL]['vowels'] + SCHEMES[TAMIL]['vowels'] + s("""எ ஒ"""),
'marks': sanscript.SCHEMES[TAMIL]['marks'] + ['ெ', 'ொ'],
'consonants': sanscript.SCHEMES[TAMIL]['consonants'] + s("""ன ற ழ""")
})
SCHEMES[TELUGU].update({
'vowels': sanscript.SCHEMES[TELUGU]['vowels'] + s("""ఎ ఒ"""),
'marks': sanscript.SCHEMES[TELUGU]['marks'] + s("""ె ొ"""),
'consonants': sanscript.SCHEMES[TELUGU]['consonants'] + s("""ऩ ఴ ౚ""")
})
_setup()