Mô đun:okm-translit
Giao diện
local export = {}
local gsub = mw.ustring.gsub
local chars_Hani = require('Module:scripts').getByCode('Hani'):getCharacters()
local chars_Hang = require('Module:scripts').getByCode('Hang'):getCharacters()
-- https://github.com/szc126/rime-slg-korean/blob/main/slg_break_jamo.yaml
-- https://github.com/szc126/rime-slg-korean/blob/main/soolegi_yethangeul.custom.yaml
local tt_complex = {
['ᄢ']='ᄇᄉᄀ',
['ᄣ']='ᄇᄉᄃ',
['ᄤ']='ᄇᄉᄇ',
['ᄥ']='ᄇᄉᄉ',
['ᄦ']='ᄇᄉᄌ',
['ᄳ']='ᄉᄇᄀ',
['ᄴ']='ᄉᄉᄉ',
['ꥥ']='ᄅᄀᄀ',
['ꥧ']='ᄅᄃᄃ',
['ꥪ']='ᄅᄇᄇ',
['ꥲ']='ᄇᄉᄐ',
['ꥵ']='ᄉᄉᄇ',
['ꥸ']='ᄌᄌᄒ',
['ᄁ']='ᄀᄀ',
['ᄄ']='ᄃᄃ',
['ᄈ']='ᄇᄇ',
['ᄊ']='ᄉᄉ',
['ᄍ']='ᄌᄌ',
['ᄓ']='ᄂᄀ',
['ᄔ']='ᄂᄂ',
['ᄕ']='ᄂᄃ',
['ᄖ']='ᄂᄇ',
['ᄗ']='ᄃᄀ',
['ᄘ']='ᄅᄂ',
['ᄙ']='ᄅᄅ',
['ᄚ']='ᄅᄒ',
['ᄜ']='ᄆᄇ',
['ᄞ']='ᄇᄀ',
['ᄟ']='ᄇᄂ',
['ᄠ']='ᄇᄃ',
['ᄡ']='ᄇᄉ',
['ᄧ']='ᄇᄌ',
['ᄨ']='ᄇᄎ',
['ᄩ']='ᄇᄐ',
['ᄪ']='ᄇᄑ',
['ᄬ']='ᄫᄫ',
['ᄭ']='ᄉᄀ',
['ᄮ']='ᄉᄂ',
['ᄯ']='ᄉᄃ',
['ᄰ']='ᄉᄅ',
['ᄱ']='ᄉᄆ',
['ᄲ']='ᄉᄇ',
['ᄵ']='ᄉᄋ',
['ᄶ']='ᄉᄌ',
['ᄷ']='ᄉᄎ',
['ᄸ']='ᄉᄏ',
['ᄹ']='ᄉᄐ',
['ᄺ']='ᄉᄑ',
['ᄻ']='ᄉᄒ',
['ᄽ']='ᄼᄼ',
['ᄿ']='ᄾᄾ',
['ᅁ']='ᄋᄀ',
['ᅂ']='ᄋᄃ',
['ᅃ']='ᄋᄆ',
['ᅄ']='ᄋᄇ',
['ᅅ']='ᄋᄉ',
['ᅆ']='ᄋᅀ',
['ᅇ']='ᄋᄋ',
['ᅈ']='ᄋᄌ',
['ᅉ']='ᄋᄎ',
['ᅊ']='ᄋᄐ',
['ᅋ']='ᄋᄑ',
['ᅍ']='ᄌᄋ',
['ᅏ']='ᅎᅎ',
['ᅑ']='ᅐᅐ',
['ᅒ']='ᄎᄏ',
['ᅓ']='ᄎᄒ',
['ᅖ']='ᄑᄇ',
['ᅘ']='ᄒᄒ',
['ᅚ']='ᄀᄃ',
['ᅛ']='ᄂᄉ',
['ᅜ']='ᄂᄌ',
['ᅝ']='ᄂᄒ',
['ᅞ']='ᄃᄅ',
['ꥠ']='ᄃᄆ',
['ꥡ']='ᄃᄇ',
['ꥢ']='ᄃᄉ',
['ꥣ']='ᄃᄌ',
['ꥤ']='ᄅᄀ',
['ꥦ']='ᄅᄃ',
['ꥨ']='ᄅᄆ',
['ꥩ']='ᄅᄇ',
['ꥫ']='ᄅᄫ',
['ꥬ']='ᄅᄉ',
['ꥭ']='ᄅᄌ',
['ꥮ']='ᄅᄏ',
['ꥯ']='ᄆᄀ',
['ꥰ']='ᄆᄃ',
['ꥱ']='ᄆᄉ',
['ꥳ']='ᄇᄏ',
['ꥴ']='ᄇᄒ',
['ꥶ']='ᄋᄅ',
['ꥷ']='ᄋᄒ',
['ꥹ']='ᄐᄐ',
['ꥺ']='ᄑᄒ',
['ꥻ']='ᄒᄉ',
['ꥼ']='ᅙᅙ',
['ᆅ']='@ᅩ@ᅡ@',
['ᆒ']='@ᅮ@ᅥ@',
['ᅹ']='@ᅡ@ᅩ',
['ᆄ']='@ᅩ@ᅡ',
['ᆆ']='@ᅩ@ᅥ',
['ᆑ']='@ᅮ@ᅥ',
['ᆥ']='@ᅥ@ᅡ',
['ᆐ']='@ᅮᅥ@',
['ힳ']='@ᅩᅡ@',
['ힷ']='@ᅮᅡ@',
['ᆁ']='ᅩ@ᅥ@',
['ᆌ']='ᅮ@ᅥ@',
['ᆧ']='ᅩ@ᅡ@',
['ힽ']='ᅵ@ᅡᅩ',
['ힾ']='ᅵ@ᅡ@',
['ퟀ']='ᅵ@ᅥ@',
['ᅤ']='@ᅡ@',
['ᅨ']='@ᅥ@',
['ᅸ']='@ᅡᅩ',
['ᅽ']='@ᅥᅩ',
['ᅾ']='@ᅥᅮ',
['ᆇ']='@ᅩᅩ',
['ᆈ']='@ᅩ@',
['ᆎ']='@ᅮᅡ',
['ᆏ']='@ᅮᅥ',
['ᆓ']='@ᅮᅮ',
['ᆔ']='@ᅮ@',
['ᆤ']='@ᅡᅮ',
['ힲ']='@ᅩᅡ',
['ힴ']='@ᅩᅥ',
['ힸ']='@ᅮᅩ',
['ᆙ']='ᅵ@ᅡ',
['ᆦ']='ᅩ@ᅡ',
['ힰ']='ᅩ@ᅥ',
['ힵ']='ᅮ@ᅥ',
['ힿ']='ᅵ@ᅥ',
['ퟂ']='ᅵ@ᅩ',
['ퟃ']='ᅵ@ᅮ',
['ᅫ']='ᅩᅡ@',
['ᅰ']='ᅮᅥ@',
['ᆀ']='ᅩᅥ@',
['ᆊ']='ᅮᅡ@',
['ᆋ']='ᅮᅥᅳ',
['ᆗ']='ᅳᅵᅮ',
['ힱ']='ᅩᅩᅵ',
['ힶ']='ᅮᅵ@',
['ힻ']='ᅳᅥ@',
['ퟁ']='ᅵᅩᅵ',
['ퟆ']='ᆞᅥ@',
['ᅣ']='@ᅡ',
['ᅧ']='@ᅥ',
['ᅭ']='@ᅩ',
['ᅲ']='@ᅮ',
['ᅢ']='ᅡ@',
['ᅦ']='ᅥ@',
['ᅪ']='ᅩᅡ',
['ᅬ']='ᅩ@',
['ᅯ']='ᅮᅥ',
['ᅱ']='ᅮ@',
['ᅴ']='ᅳ@',
['ᅶ']='ᅡᅩ',
['ᅷ']='ᅡᅮ',
['ᅺ']='ᅥᅩ',
['ᅻ']='ᅥᅮ',
['ᅼ']='ᅥᅳ',
['ᅿ']='ᅩᅥ',
['ᆂ']='ᅩᅩ',
['ᆃ']='ᅩᅮ',
['ᆉ']='ᅮᅡ',
['ᆍ']='ᅮᅮ',
['ᆕ']='ᅳᅮ',
['ᆖ']='ᅳᅳ',
['ᆘ']='ᅵᅡ',
['ᆚ']='ᅵᅩ',
['ᆛ']='ᅵᅮ',
['ᆜ']='ᅵᅳ',
['ᆝ']='ᅵᆞ',
['ᆟ']='ᆞᅥ',
['ᆠ']='ᆞᅮ',
['ᆡ']='ᆞ@',
['ᆢ']='ᆞᆞ',
['ᆣ']='ᅡᅳ',
['ힹ']='ᅳᅡ',
['ힺ']='ᅳᅥ',
['ힼ']='ᅳᅩ',
['ퟄ']='ᅵ@',
['ퟅ']='ᆞᅡ',
['ᇄ']='ᆨᆺᆨ',
['ᇌ']='ᆯᆨᆺ',
['ᇏ']='ᆯᆮᇂ',
['ᇑ']='ᆯᆷᆨ',
['ᇒ']='ᆯᆷᆺ',
['ᇓ']='ᆯᆸᆺ',
['ᇔ']='ᆯᆸᇂ',
['ᇖ']='ᆯᆺᆺ',
['ᇞ']='ᆷᆺᆺ',
['ᇭ']='ᇰᆨᆨ',
['ퟎ']='ᆮᆮᆸ',
['ퟑ']='ᆮᆺᆨ',
['ퟕ']='ᆯᆨᆨ',
['ퟖ']='ᆯᆨᇂ',
['ퟗ']='ᆯᆯᆿ',
['ퟘ']='ᆯᆷᇂ',
['ퟙ']='ᆯᆸᆮ',
['ퟚ']='ᆯᆸᇁ',
['ퟜ']='ᆯᇹᇂ',
['ퟟ']='ᆷᆫᆫ',
['ퟡ']='ᆷᆸᆺ',
['ퟤ']='ᆸᆯᇁ',
['ퟧ']='ᆸᆺᆮ',
['ퟬ']='ᆺᆺᆨ',
['ퟭ']='ᆺᆺᆮ',
['ퟸ']='ᆽᆸᆸ',
['ᆩ']='ᆨᆨ',
['ᆪ']='ᆨᆺ',
['ᆬ']='ᆫᆽ',
['ᆭ']='ᆫᇂ',
['ᆰ']='ᆯᆨ',
['ᆱ']='ᆯᆷ',
['ᆲ']='ᆯᆸ',
['ᆳ']='ᆯᆺ',
['ᆴ']='ᆯᇀ',
['ᆵ']='ᆯᇁ',
['ᆶ']='ᆯᇂ',
['ᆹ']='ᆸᆺ',
['ᆻ']='ᆺᆺ',
['ᇃ']='ᆨᆯ',
['ᇅ']='ᆫᆨ',
['ᇆ']='ᆫᆮ',
['ᇇ']='ᆫᆺ',
['ᇈ']='ᆫᇫ',
['ᇉ']='ᆫᇀ',
['ᇊ']='ᆮᆨ',
['ᇋ']='ᆮᆯ',
['ᇍ']='ᆯᆫ',
['ᇎ']='ᆯᆮ',
['ᇐ']='ᆯᆯ',
['ᇕ']='ᆯᇦ',
['ᇗ']='ᆯᇫ',
['ᇘ']='ᆯᆿ',
['ᇙ']='ᆯᇹ',
['ᇚ']='ᆷᆨ',
['ᇛ']='ᆷᆯ',
['ᇜ']='ᆷᆸ',
['ᇝ']='ᆷᆺ',
['ᇟ']='ᆷᇫ',
['ᇠ']='ᆷᆾ',
['ᇡ']='ᆷᇂ',
['ᇣ']='ᆸᆯ',
['ᇤ']='ᆸᇁ',
['ᇥ']='ᆸᇂ',
['ᇧ']='ᆺᆨ',
['ᇨ']='ᆺᆮ',
['ᇩ']='ᆺᆯ',
['ᇪ']='ᆺᆸ',
['ᇬ']='ᇰᆨ',
['ᇮ']='ᇰᇰ',
['ᇯ']='ᇰᆿ',
['ᇱ']='ᇰᆺ',
['ᇲ']='ᇰᇫ',
['ᇳ']='ᇁᆸ',
['ᇵ']='ᇂᆫ',
['ᇶ']='ᇂᆯ',
['ᇷ']='ᇂᆷ',
['ᇸ']='ᇂᆸ',
['ᇺ']='ᆨᆫ',
['ᇻ']='ᆨᆸ',
['ᇼ']='ᆨᆾ',
['ᇽ']='ᆨᆿ',
['ᇾ']='ᆨᇂ',
['ᇿ']='ᆫᆫ',
['ퟋ']='ᆫᆯ',
['ퟌ']='ᆫᆾ',
['ퟍ']='ᆮᆮ',
['ퟏ']='ᆮᆸ',
['ퟐ']='ᆮᆺ',
['ퟒ']='ᆮᆽ',
['ퟓ']='ᆮᆾ',
['ퟔ']='ᆮᇀ',
['ퟛ']='ᆯᇰ',
['ퟞ']='ᆷᆫ',
['ퟠ']='ᆷᆷ',
['ퟢ']='ᆷᆽ',
['ퟣ']='ᆸᆮ',
['ퟥ']='ᆸᆷ',
['ퟦ']='ᆸᆸ',
['ퟨ']='ᆸᆽ',
['ퟩ']='ᆸᆾ',
['ퟪ']='ᆺᆷ',
['ퟫ']='ᆺᇦ',
['ퟮ']='ᆺᇫ',
['ퟯ']='ᆺᆽ',
['ퟰ']='ᆺᆾ',
['ퟱ']='ᆺᇀ',
['ퟲ']='ᆺᇂ',
['ퟳ']='ᇫᆸ',
['ퟴ']='ᇫᇦ',
['ퟵ']='ᇰᆷ',
['ퟶ']='ᇰᇂ',
['ퟷ']='ᆽᆸ',
['ퟹ']='ᆽᆽ',
['ퟺ']='ᇁᆺ',
['ퟻ']='ᇁᇀ',
-- compatibility jamo
['ㅩ']='ᄅᄀᄉ',
['ㅫ']='ᄅᄇᄉ',
['ㅴ']='ᄇᄉᄀ',
['ㅵ']='ᄇᄉᄃ',
['ㄲ']='ᄀᄀ',
['ㄸ']='ᄃᄃ',
['ㅃ']='ᄇᄇ',
['ㄳ']='ᄀᄉ',
['ㄵ']='ᄂᄌ',
['ㄶ']='ᄂᄒ',
['ㄺ']='ᄅᄀ',
['ㄻ']='ᄅᄆ',
['ㄼ']='ᄅᄇ',
['ㄽ']='ᄅᄉ',
['ㄾ']='ᄅᄐ',
['ㄿ']='ᄅᄑ',
['ㅀ']='ᄅᄒ',
['ㅄ']='ᄇᄉ',
['ㅆ']='ᄉᄉ',
['ㅉ']='ᄌᄌ',
['ㅥ']='ᄂᄂ',
['ㅦ']='ᄂᄃ',
['ㅧ']='ᄂᄉ',
['ㅨ']='ᄂᅀ',
['ㅪ']='ᄅᄃ',
['ㅬ']='ᄅᅀ',
['ㅭ']='ᄅᅙ',
['ㅮ']='ᄆᄇ',
['ㅯ']='ᄆᄉ',
['ㅰ']='ᄆᅀ',
['ㅲ']='ᄇᄀ',
['ㅳ']='ᄇᄃ',
['ㅶ']='ᄇᄌ',
['ㅷ']='ᄇᄐ',
['ㅹ']='ᄫᄫ',
['ㅺ']='ᄉᄀ',
['ㅻ']='ᄉᄂ',
['ㅼ']='ᄉᄃ',
['ㅽ']='ᄉᄇ',
['ㅾ']='ᄉᄌ',
['ㆀ']='ᄋᄋ',
['ㆂ']='ᅌᄉ',
['ㆃ']='ᅌᅀ',
['ㆅ']='ᄒᄒ',
['ㄱ']='ᄀ',
['ㄴ']='ᄂ',
['ㄷ']='ᄃ',
['ㄹ']='ᄅ',
['ㅁ']='ᄆ',
['ㅂ']='ᄇ',
['ㅅ']='ᄉ',
['ㅇ']='ᄋ',
['ㅈ']='ᄌ',
['ㅊ']='ᄎ',
['ㅋ']='ᄏ',
['ㅌ']='ᄐ',
['ㅍ']='ᄑ',
['ㅎ']='ᄒ',
['ㅤ']='ᅟ', -- filler
['ㅱ']='ᄝ',
['ㅸ']='ᄫ',
['ㅿ']='ᅀ',
['ㆁ']='ᅌ',
['ㆄ']='ᅗ',
['ㆆ']='ᅙ',
['ㆈ']='@ᅩ@ᅡᅵ',
['ㆋ']='@ᅮ@ᅥᅵ',
['ㆇ']='@ᅩ@ᅡ',
['ㆊ']='@ᅮ@ᅥ',
['ㅒ']='@ᅡᅵ',
['ㅖ']='@ᅥᅵ',
['ㅙ']='ᅩᅡᅵ',
['ㅞ']='ᅮᅥᅵ',
['ㆉ']='@ᅩᅵ',
['ㆌ']='@ᅮᅵ',
['ㅐ']='ᅡᅵ',
['ㅑ']='@ᅡ',
['ㅔ']='ᅥᅵ',
['ㅕ']='@ᅥ',
['ㅘ']='ᅩᅡ',
['ㅚ']='ᅩᅵ',
['ㅛ']='@ᅩ',
['ㅝ']='ᅮᅥ',
['ㅟ']='ᅮᅵ',
['ㅠ']='@ᅮ',
['ㅢ']='ᅳᅵ',
['ㅏ']='ᅡ',
['ㅓ']='ᅥ',
['ㅗ']='ᅩ',
['ㅜ']='ᅮ',
['ㅡ']='ᅳ',
['ㅣ']='ᅵ',
['ㆍ']='ᆞ',
}
local tt = [==[
BREAK 1
# remove hanja from (ex.) 사뎐(辭典)
# caps prob. isn't necessary since the "base" text is actually hangeul?
# Hani regex is a reasonable subset of Hani from [[Module:scripts/data]],
# last checked on 20220221
%([一-鿿㐀-䶿𠀀-𰀀-]+%) ×
# to yale
# non-simple
gᄋ Ğ # voiced velar fricative /ɣ/
ᄋᄋ Ő
@ᅮ yu
@ᅩ yo
ᅩᅡ wa
ᅮᅥ we
ᅵᆞ yo
ᆞᆞ yo
# choseong
ᄀ K
ᄂ N
ᄃ T
ᄅ L
ᄆ M
ᄇ P
ᄉ S
ᄋ Ø
ᄌ C
ᄎ CH
ᄏ KH
ᄐ TH
ᄑ PH
ᄒ H
ᄝ ◆
ᄫ Ƃ
ᅗ ◆
ᄛ ◆
ᅌ Ŋ
ᅀ Z
ᅙ Q
ᄼ ◆
ᅎ ◆
ᅔ ◆
ᄾ ◆
ᅐ ◆
ᅕ ◆
ᅟ × # filler
# jungseong
@ y
ᅡ a
ᅥ e
ᅩ wo
ᅮ wu
ᅳ u
ᅵ i
ᆞ o
ᅠ × # filler
# jongseong
ᆨ k
ᆫ n
ᆮ t
ᆯ l
ᆷ m
ᆸ p
ᆺ s
ᆼ ø
ᆽ c
ᆾ ch
ᆿ kh
ᇀ th
ᇁ ph
ᇂ h
ᇢ ◆
ᇦ ƃ
ᇴ ◆
ퟝ ◆
ᇰ ŋ
ᇫ z
ᇹ q
# tone
〮 ↑
〯 →
# tone diacritic location
([aiueo]+)([y]?)([↑→↓]) %1%3%2
# hyphens within syllables
# CV-y
# CVC-C
# CV-C
# C-V
%-%-%-%-(.-[wyaiueo↑→↓]+)(y) %1-%2
%-%-%-(.-[wyaiueo↑→↓]+[^wyaiueo ])([^wyaiueo ]) %1-%2
%-%-%-(.-[wyaiueo↑→↓]+) %1-
%-%-(.-)([wyaiueo]) %1-%2
# 子(ᄌᆞ)ㅣ
(%))(%-?)i %1%2y
Ø ×
BREAK 2
↑ ́
→ ̌
↓ ̀
ğ G
ő OO
Ø NG # capitalized hanja readings
ø ng
ƃ W
Ŋ NG # capitalized hanja readings
ŋ ng
]==]
tt = mw.text.trim(tt)
tt = mw.ustring.gsub(tt, '%s*#[^\n]+', '') -- remove comments
tt = mw.ustring.gsub(tt, '\n+', '\n') -- remove empty lines
local a, b, c, d = 'ᄀᄂᄃᄅᄆᄇᄉᄋᄌᄎᄏᄐᄑᄒᄝᄫᅗᄛᅌᅀᅙᄼᅎᅔᄾᅐᅕᅟ', '@ᅡᅥᅩᅮᅳᅵᆞᅠ', 'ᆨᆫᆮᆯᆷᆸᆺᆼᆽᆾᆿᇀᇁᇂᇢᇦᇴퟝᇰᇫᇹ', '〮〯'
function export.tr(text, lang, sc)
text = gsub(text, "%<%/?r[pt]%>", "")
text = gsub(text, "%<%/?ruby%>", "")
if not mw.ustring.match(text, '[' .. chars_Hang .. ']') then
return nil
end
local bool_tone_marking = mw.ustring.find(text, ('[%s]'):format(d))
text = mw.ustring.toNFD(text)
text = mw.ustring.gsub(text, '.', tt_complex)
for line in mw.text.gsplit(tt, '\n') do
local _, __, pattern, repl = mw.ustring.find(line, '(.+)\t(.+)')
if pattern .. repl == 'BREAK1' then
-- add period between hanja readings
text = mw.ustring.gsub(text, '([' .. chars_Hani .. '])%((.-)%)', function(hanja, reading)
return hanja .. '(' .. mw.ustring.gsub(reading, ('([%s]+)'):format(a), '.%1') .. ')'
end)
if bool_tone_marking then
-- move the location of tone marks for easier handling and
-- mark low tone
text = mw.ustring.gsub(text, ('([%s]+)([%s]+)([%s]*)([%s]*)'):format(a, b, c, d), function(a, b, c, d)
return a .. b .. (d == '' and '↓' or d) .. (c == '' and '' or c)
end)
end
elseif pattern .. repl == 'BREAK2' then
text = mw.ustring.lower(text)
-- hanja readings
-- ref. [[Module:Ethi-translit]]
text = mw.ustring.gsub(text, '()([' .. chars_Hani .. ']+)%((.-)%)()', function(start_pos, hanja, reading, end_pos)
-- treat final ieung as null if tones are marked (is this a safe assumption?)
if bool_tone_marking then
reading = mw.ustring.gsub(reading, 'ø', '')
end
-- convert to uppercase
reading = mw.ustring.upper(reading)
return reading
end)
-- remove hanja reading leading period
text = mw.ustring.gsub(text, '^%.', '')
text = mw.ustring.gsub(text, "'''%.", "'''")
text = mw.ustring.gsub(text, '(%s)%.', '%1')
else
if repl == '×' then
repl = ''
end
text = mw.ustring.gsub(text, pattern, repl)
end
end
-- track failed romanizations
-- (black diamond instead of U+FFFD to avoid warnings when saving this page)
if mw.ustring.match(text, '◆') then
require('Module:debug').track('okm-translit/failed romanization')
end
return text
end
return export