Skip to content

Commit 00fab55

Browse files
committed
refactor: Create LocaleAffinityBiCalculatorBaseImpl
1 parent aa68db1 commit 00fab55

5 files changed

Lines changed: 517 additions & 100 deletions

File tree

locales-common/src/main/java/com/spotify/i18n/locales/common/LocaleAffinityHelpersFactory.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import com.google.common.base.Preconditions;
2424
import com.ibm.icu.util.ULocale;
25+
import com.spotify.i18n.locales.common.impl.LocaleAffinityBiCalculatorBaseImpl;
2526
import com.spotify.i18n.locales.common.impl.LocaleAffinityCalculatorBaseImpl;
2627
import com.spotify.i18n.locales.common.impl.ReferenceLocalesCalculatorBaseImpl;
2728
import com.spotify.i18n.locales.common.model.LocaleAffinity;
@@ -125,7 +126,7 @@ public LocaleAffinityCalculator buildAffinityCalculatorForLanguageTags(
125126
* @see LocaleAffinityBiCalculator
126127
*/
127128
public LocaleAffinityBiCalculator buildAffinityBiCalculator() {
128-
return ReferenceLocalesCalculatorBaseImpl.builder().buildLocaleAffinityBiCalculator();
129+
return LocaleAffinityBiCalculatorBaseImpl.builder().build();
129130
}
130131

131132
/**
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
/*-
2+
* -\-\-
3+
* locales-common
4+
* --
5+
* Copyright (C) 2016 - 2025 Spotify AB
6+
* --
7+
* Licensed under the Apache License, Version 2.0 (the "License");
8+
* you may not use this file except in compliance with the License.
9+
* You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing, software
14+
* distributed under the License is distributed on an "AS IS" BASIS,
15+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
* See the License for the specific language governing permissions and
17+
* limitations under the License.
18+
* -/-/-
19+
*/
20+
21+
package com.spotify.i18n.locales.common.impl;
22+
23+
import static com.spotify.i18n.locales.utils.hierarchy.LocalesHierarchyUtils.isSameLocale;
24+
import static com.spotify.i18n.locales.utils.language.LanguageUtils.getSpokenLanguageLocale;
25+
import static com.spotify.i18n.locales.utils.languagetag.LanguageTagUtils.parse;
26+
27+
import com.google.auto.value.AutoValue;
28+
import com.ibm.icu.impl.locale.LSR;
29+
import com.ibm.icu.impl.locale.LikelySubtags;
30+
import com.ibm.icu.impl.locale.LocaleDistance;
31+
import com.ibm.icu.util.LocaleMatcher.Direction;
32+
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
33+
import com.ibm.icu.util.ULocale;
34+
import com.spotify.i18n.locales.common.LocaleAffinityBiCalculator;
35+
import com.spotify.i18n.locales.common.LocaleAffinityCalculator;
36+
import com.spotify.i18n.locales.common.model.LocaleAffinity;
37+
import com.spotify.i18n.locales.common.model.LocaleAffinityResult;
38+
import com.spotify.i18n.locales.utils.available.AvailableLocalesUtils;
39+
import edu.umd.cs.findbugs.annotations.Nullable;
40+
import java.util.Optional;
41+
import java.util.Set;
42+
import java.util.stream.Collectors;
43+
44+
/**
45+
* Base implementation of {@link LocaleAffinityBiCalculator} that calculates the locale affinity
46+
* between two given language tags.
47+
*
48+
* <p>This class is not intended for public subclassing. New object instances must be created using
49+
* the builder pattern, starting with the {@link #builder()} method.
50+
*
51+
* @author Eric Fjøsne
52+
*/
53+
@AutoValue
54+
public abstract class LocaleAffinityBiCalculatorBaseImpl implements LocaleAffinityBiCalculator {
55+
56+
// Set containing all available language codes in CLDR.
57+
private static final Set<String> AVAILABLE_LANGUAGE_CODES =
58+
AvailableLocalesUtils.getReferenceLocales().stream()
59+
.map(ULocale::getLanguage)
60+
.collect(Collectors.toSet());
61+
62+
// LocaleDistance.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the
63+
// only way to make use of this class, which provides the features we need here.
64+
private static final LocaleDistance LOCALE_DISTANCE_INSTANCE = LocaleDistance.INSTANCE;
65+
66+
// LocaleDistance best distance method arguments, all assigned to their default as per icu
67+
// implementation.
68+
private static final int LOCALE_DISTANCE_SHIFTED =
69+
LocaleDistance.shiftDistance(LOCALE_DISTANCE_INSTANCE.getDefaultScriptDistance());
70+
private static final int LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH = 1;
71+
private static final FavorSubtag LOCALE_DISTANCE_FAVOR_SUBTAG = FavorSubtag.LANGUAGE;
72+
private static final Direction LOCALE_DISTANCE_DIRECTION = Direction.WITH_ONE_WAY;
73+
74+
// LikelySubtags.INSTANCE is commented as VisibleForTesting, so not ideal ... but this is the
75+
// only way to make use of this class, which provides the features we need here.
76+
private static final LikelySubtags LIKELY_SUBTAGS_INSTANCE = LikelySubtags.INSTANCE;
77+
78+
// LikelySubtags method arguments, all assigned to their default as per icu implementation.
79+
private static final boolean LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH = false;
80+
81+
// Distance threshold: Anything above this value will be scored 0.
82+
private static final double DISTANCE_THRESHOLD = 224.0;
83+
84+
// Score to affinity thresholds
85+
private static final int SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE = 65;
86+
private static final int SCORE_THRESHOLD_HIGH = 30;
87+
private static final int SCORE_THRESHOLD_LOW = 0;
88+
89+
// Language codes for which we need some manual tweaks
90+
private static final String LANGUAGE_CODE_CROATIAN = "hr";
91+
private static final String LANGUAGE_CODE_BOSNIAN = "bs";
92+
93+
/**
94+
* Returns the calculated {@link LocaleAffinityResult} for the given language tag
95+
*
96+
* @return the locale affinity result
97+
*/
98+
@Override
99+
public LocaleAffinityResult calculate(
100+
@Nullable final String languageTag1, @Nullable final String languageTag2) {
101+
return LocaleAffinityResult.builder().affinity(getAffinity(languageTag1, languageTag2)).build();
102+
}
103+
104+
private LocaleAffinity getAffinity(
105+
@Nullable final String languageTag1, @Nullable final String languageTag2) {
106+
// We parse the language tags, and filter out locales with a language unavailable in CLDR.
107+
final Optional<ULocale> locale1 =
108+
parse(languageTag1).filter(locale -> isAvailableLanguage(locale));
109+
final Optional<ULocale> locale2 =
110+
parse(languageTag2).filter(locale -> isAvailableLanguage(locale));
111+
112+
if (locale1.isPresent() && locale2.isPresent()) {
113+
// We attempt to match based on corresponding spoken language first, and make use of the
114+
// score-based affinity calculation as fallback.
115+
if (hasSameSpokenLanguageAffinity(locale1.get(), locale2.get())) {
116+
return LocaleAffinity.SAME;
117+
} else {
118+
return calculateScoreBasedAffinity(locale1.get(), locale2.get());
119+
}
120+
} else {
121+
return LocaleAffinity.NONE;
122+
}
123+
}
124+
125+
private boolean hasSameSpokenLanguageAffinity(final ULocale locale1, final ULocale locale2) {
126+
final Optional<ULocale> spoken1 = getSpokenLanguageLocale(locale1.toLanguageTag());
127+
final Optional<ULocale> spoken2 = getSpokenLanguageLocale(locale2.toLanguageTag());
128+
return spoken1.isPresent() && spoken2.isPresent() && isSameLocale(spoken1.get(), spoken2.get());
129+
}
130+
131+
static LocaleAffinity calculateScoreBasedAffinity(final ULocale l1, final ULocale l2) {
132+
int bestDistance = getDistanceBetweenLocales(l1, l2);
133+
int correspondingScore = convertDistanceToAffinityScore(bestDistance);
134+
return convertScoreToLocaleAffinity(correspondingScore);
135+
}
136+
137+
static boolean isAvailableLanguage(final ULocale locale) {
138+
return AVAILABLE_LANGUAGE_CODES.contains(locale.getLanguage().toLowerCase());
139+
}
140+
141+
private static int getDistanceBetweenLocales(final ULocale locale1, final ULocale locale2) {
142+
final LSR lsr1 = getMaximizedLanguageScriptRegion(locale1);
143+
final LSR lsr2 = getMaximizedLanguageScriptRegion(locale2);
144+
return getDistanceBetweenLSR(lsr1, lsr2);
145+
}
146+
147+
static int getDistanceBetweenLSR(LSR lsr1, LSR lsr2) {
148+
// Croatian should be matched with Bosnian. This is the case for Bosnian written in Latin
149+
// script, but not Cyrillic, because the ICU implementation enforces script matching. We
150+
// created a workaround to ensure that we return a MUTUALLY_INTELLIGIBLE affinity when
151+
// encountering this locale.
152+
if (calculatingDistanceBetweenCroatianAndBosnian(lsr1, lsr2)) {
153+
return 0;
154+
} else {
155+
// We calculate distances both ways, and return the minimum value.
156+
return Math.min(calculateDistance(lsr1, lsr2), calculateDistance(lsr2, lsr1));
157+
}
158+
}
159+
160+
private static int calculateDistance(LSR lsr1, LSR lsr2) {
161+
return Math.abs(
162+
LOCALE_DISTANCE_INSTANCE.getBestIndexAndDistance(
163+
lsr1,
164+
new LSR[] {lsr2},
165+
LOCALE_DISTANCE_SUPPORTED_LSRS_LENGTH,
166+
LOCALE_DISTANCE_SHIFTED,
167+
LOCALE_DISTANCE_FAVOR_SUBTAG,
168+
LOCALE_DISTANCE_DIRECTION));
169+
}
170+
171+
static int convertDistanceToAffinityScore(final int distance) {
172+
if (distance > DISTANCE_THRESHOLD) {
173+
return 0;
174+
} else {
175+
return (int) ((DISTANCE_THRESHOLD - distance) / DISTANCE_THRESHOLD * 100.0);
176+
}
177+
}
178+
179+
static LocaleAffinity convertScoreToLocaleAffinity(final int score) {
180+
if (score > SCORE_THRESHOLD_MUTUALLY_INTELLIGIBLE) {
181+
return LocaleAffinity.MUTUALLY_INTELLIGIBLE;
182+
} else if (score > SCORE_THRESHOLD_HIGH) {
183+
return LocaleAffinity.HIGH;
184+
} else if (score > SCORE_THRESHOLD_LOW) {
185+
return LocaleAffinity.LOW;
186+
} else {
187+
return LocaleAffinity.NONE;
188+
}
189+
}
190+
191+
private static boolean calculatingDistanceBetweenCroatianAndBosnian(
192+
final LSR lsr1, final LSR lsr2) {
193+
return (lsr1.language.equals(LANGUAGE_CODE_CROATIAN)
194+
&& lsr2.language.equals(LANGUAGE_CODE_BOSNIAN))
195+
|| (lsr1.language.equals(LANGUAGE_CODE_BOSNIAN)
196+
&& lsr2.language.equals(LANGUAGE_CODE_CROATIAN));
197+
}
198+
199+
static LSR getMaximizedLanguageScriptRegion(final ULocale locale) {
200+
return LIKELY_SUBTAGS_INSTANCE.makeMaximizedLsrFrom(
201+
locale, LIKELY_SUBTAGS_RETURNS_INPUT_IF_UNMATCH);
202+
}
203+
204+
/**
205+
* Returns a {@link Builder} instance that will allow you to manually create a {@link
206+
* LocaleAffinityBiCalculatorBaseImpl} instance.
207+
*
208+
* @return The builder
209+
*/
210+
public static Builder builder() {
211+
return new AutoValue_LocaleAffinityBiCalculatorBaseImpl.Builder();
212+
}
213+
214+
/** A builder for a {@link LocaleAffinityBiCalculatorBaseImpl}. */
215+
@AutoValue.Builder
216+
public abstract static class Builder {
217+
Builder() {} // package private constructor
218+
219+
abstract LocaleAffinityBiCalculatorBaseImpl autoBuild();
220+
221+
/** Builds a {@link LocaleAffinityCalculator} out of this builder. */
222+
public final LocaleAffinityBiCalculator build() {
223+
return autoBuild();
224+
}
225+
}
226+
}

0 commit comments

Comments
 (0)