Skip to content

Commit

Permalink
Improve detection
Browse files Browse the repository at this point in the history
  • Loading branch information
ZwerOxotnik committed May 17, 2024
1 parent a0de3f8 commit d2e790a
Showing 1 changed file with 103 additions and 14 deletions.
117 changes: 103 additions & 14 deletions sitelen_pona.lua
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,45 @@ M.__syntax = __syntax
local __characters_syntax = require("characters_to_sitelen_pona")
M.__characters_syntax = __characters_syntax

local __special_chars_length = {} -- messy workaround due to Lua bug
local __dots = {
[""] = true,
}
local __commas = {
[""] = true,
}
local __special_char_expr = "(["
local __spec_string_delimeters = {
[""] = "", -- for Chinese Simplified language
[""] = "", -- for Chinese Simplified language
[""] = "", -- for Chinese Simplified language
["«"] = "»", -- for German language
[""] = "", -- for Japanese language
[""] = "", -- for several language
[""] = "", -- for several language
}
for k, v in pairs(__spec_string_delimeters) do
local _, char
_, _, char = k:find("([" .. k .. "])")
__special_chars_length[char] = #k
_, _, char = v:find("([" .. v .. "])")
__special_chars_length[char] = #v
__special_char_expr = __special_char_expr .. k .. v
end
for k in pairs(__dots) do
local _, char
_, _, char = k:find("([" .. k .. "])")
__special_chars_length[char] = #k
__special_char_expr = __special_char_expr .. k
end
for k in pairs(__commas) do
local _, char
_, _, char = k:find("([" .. k .. "])")
__special_chars_length[char] = #k
__special_char_expr = __special_char_expr .. k
end
__special_char_expr = __special_char_expr .. "])"


---@class SitelenPonaPart : table
---@field sitelep_pona SitelenPona?
Expand All @@ -44,31 +83,80 @@ function M.toki_pona_mute_to_sitelen_pona(_text, new_line_pattern)
local is_sitelen_pona = false
local result = {}

---@param word string
---@return string
local function find_special_characters(word)
local last_part = word
local last_result_i = 1
while true do
local first_i, last_i, char = last_part:find(__special_char_expr, last_result_i)
if first_i == nil then
if last_result_i == 1 then
return word
else
return last_part:sub(last_result_i, #last_part)
end
end

if last_result_i < last_i then
local prev_part = last_part:sub(last_result_i, last_i-1)
local sitelen_pona_char = __syntax[prev_part]
if sitelen_pona_char then
result[#result+1] = {
sitelep_pona = sitelen_pona_char,
original = prev_part
}
else
result[#result+1] = {original = prev_part}
end
end

last_result_i = last_i + (__special_chars_length[char] or 1)
local original_char = last_part:sub(last_i, last_result_i-1)
local sitelen_pona_char = __characters_syntax[original_char]
if sitelen_pona_char then
result[#result+1] = {
sitelep_pona = sitelen_pona_char,
original = original_char
}
else
result[#result+1] = {original = original_char}
end
if last_i == #char then
return ""
end
end
end

local function parse(text)
local _, end_i, punc, word, punc2 = text:find("^([%p]*)([^%p]*)([%p]*)")
local sitelen_pona_char = __characters_syntax[punc]
word = find_special_characters(word)
local sitelen_pona_char
local is_end = #text == end_i
if sitelen_pona_char then
result[#result+1] = {
sitelep_pona = sitelen_pona_char,
original = word,
is_add_space = (is_end and word == nil)
}
else
result[#result+1] = {
original = punc,
is_add_space = (is_end and word == nil)
}
if punc ~= "" then
sitelen_pona_char = __characters_syntax[punc]
if sitelen_pona_char then
result[#result+1] = {
sitelep_pona = sitelen_pona_char,
original = punc,
is_add_space = (is_end and word == nil)
}
else
result[#result+1] = {
original = punc,
is_add_space = (is_end and word == nil)
}
end
end
if word then
if word and word ~= "" then
local sitelen_pona = __syntax[word]
result[#result+1] = {
sitelep_pona = sitelen_pona,
original = word,
is_add_space = (is_end and punc2 == nil)
}
end
if punc2 then
if punc2 and punc2 ~= "" then
sitelen_pona_char = __characters_syntax[punc2]
if sitelen_pona_char then
result[#result+1] = {
Expand All @@ -83,6 +171,7 @@ function M.toki_pona_mute_to_sitelen_pona(_text, new_line_pattern)
}
end
end

if not is_end then
parse(text:sub(end_i+1, #text))
end
Expand Down

0 comments on commit d2e790a

Please sign in to comment.