Mark
01/29/2021, 2:37 AMString
once for each pattern?
fun String.romanizeAccentedChars(): String {
if (isBlank()) {
return this
}
return this
.replaceByRegex("[áàâ]", "a")
.replaceByRegex("[äæ]", "ae")
.replaceByRegex("[éèêëę]", "e")
.replaceByRegex("[íîï]", "i")
.replaceByRegex("[óōô]", "o")
.replaceByRegex("[öø]", "oe")
.replaceByRegex("[ūúùû]", "u")
.replaceByRegex("[ü]", "ue")
.replaceByRegex("[çć]", "c")
.replaceByRegex("[ß]", "ss")
}
fun String.replaceByRegex(regexStr: String, replacement: String) =
replace(regexStr.toRegex(), replacement)
private val ACCENTED_CHARS_REPLACEMENTS = mapOf(
"[áàâ]" to "a",
"[äæ]" to "ae",
"[éèêëę]" to "e",
"[íîï]" to "i",
"[óōô]" to "o",
"[öø]" to "oe",
"[ūúùû]" to "u",
"[ü]" to "ue",
"[çć]" to "c",
"[ß]" to "ss",
)
private val ACCENTED_CHARS_REGEXES by lazy {
ACCENTED_CHARS_REPLACEMENTS.mapKeys { (regexStr, _) ->
regexStr.toRegex()
}
}
fun String.romanizeAccentedChars(): String {
if (isBlank()) {
return this
}
return ACCENTED_CHARS_REGEXES.entries.fold(this) { acc, (regex, replacement) ->
acc.replace(regex, replacement)
}
}
Guillermo Alcantara
01/29/2021, 2:51 AMMark
01/29/2021, 2:53 AMprivate val ACCENTED_CHARS_REGEXES by lazy {
mapOf(
"a" to "[áàâ]".toRegex(),
"ae" to "[äæ]".toRegex(),
"e" to "[éèêëę]".toRegex(),
"i" to "[íîï]".toRegex(),
"o" to "[óōô]".toRegex(),
"oe" to "[öø]".toRegex(),
"u" to "[ūúùû]".toRegex(),
"ue" to "[ü]".toRegex(),
"c" to "[çć]".toRegex(),
"ss" to "[ß]".toRegex(),
)
}
fun String.romanizeAccentedChars(): String {
if (isBlank()) {
return this
}
return ACCENTED_CHARS_REGEXES.entries.fold(this) { acc, (replacement, regex) ->
acc.replace(regex, replacement)
}
}
Guillermo Alcantara
01/29/2021, 3:02 AMMark
01/29/2021, 3:10 AMprivate class ReplacementsSpec(val allCharsRegex: Regex, val replacementMap: Map<String, Regex>) {
fun replace(str: String): String {
if (!allCharsRegex.containsMatchIn(str)) {
return str
}
return replacementMap.entries.fold(str) { acc, (replacement, regex) ->
acc.replace(regex, replacement)
}
}
companion object {
operator fun invoke(vararg replacementPairs: Pair<String, String>): ReplacementsSpec {
val rawMap = replacementPairs.toMap()
val regexMap = rawMap.mapValues { (_, charsStr) ->
"[${charsStr}]".toRegex()
}
val allCharsRegex = rawMap.values.joinToString(
separator = "",
prefix = "[",
postfix = "]"
).toRegex()
return ReplacementsSpec(allCharsRegex, regexMap)
}
}
}
private val ACCENTED_CHARS_REPLACEMENTS_SPEC by lazy {
ReplacementsSpec( // replacement to source characters
"a" to "áàâ",
"ae" to "äæ",
"c" to "çć",
"e" to "éèêëę",
"i" to "íîï",
"o" to "óōô",
"oe" to "öø",
"ss" to "ß",
"u" to "ūúùû",
"ue" to "ü",
)
}
fun String.romanizeAccentedChars(): String {
return ACCENTED_CHARS_REPLACEMENTS_SPEC.replace(this)
}
James Richardson
01/29/2021, 9:30 AMMark
01/29/2021, 10:36 AM