To answer my own question since this is rather a complex topic that isn't explained well out there.
In short as per my comment you are probably better off using icu4j. Since this offers a more complete solution for international number/date conversion.
The difficulty is working out all standards required since it does seem to support all locales languages etc properly, just a case of knowing how to use it properly.
I will provide a snippet - this code is in need of a cleanup but offers a solution for both number and date conversion for your java application:
import groovy.transform.CompileStatic
/**
*
* @author Vahid Hedayati
* Looks complex but will explain
*
* ar-SA u = unicode nu = number arab = arabic
*
* https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
* to get langauge code such as arab = subtag
*
* Rest explained here
* http://stackoverflow.com/questions/43456068/java-locale-builder-setextensionlocale-unicode-locale-extension
*
*
*/
@CompileStatic
enum LocaleCalendarExtensions {
SA('ar-SA-u-ca-arab'),
AM('hy-AM-u-ca-arevmda'),
CN('zh-TW-u-ca-hant'),
CZ('cs-CZ-u-ca-latn'),
DK('da-DK-u-ca-latn'),
NL('nl-NL-u-ca-latn'),
IE('ie-IE-u-ca-latn'),
FR('fr-FR-u-ca-latn'),
DE('de-DE-u-ca-latn'),
GR('el-GR-u-ca-grek'),
IL('iw-IL-u-ca-hebr'),
IN('hi-IN-u-ca-hindu'),
IT('it-IT-u-ca-latn'),
JP('ja-JP-u-ca-jpan'),
NO('nb-NO-u-ca-latn'),
IR(''), //fa-IR-u-ca-fars'),
PL('pl-PL-u-ca-latn'),
PT('pt-PT-u-ca-latn'),
RU('ru-RU-u-ca-cyrl'),
ES('es-ES-u-ca-latn'),
SE('sv-SE-u-ca-latn'),
TH('th-TH-u-ca-thai'),
TR('tr-TR-u-ca-latn'),
PK(''),//ur-PK-u-ca-arab'),
VN('vi-VN-u-ca-latn')
String value
LocaleCalendarExtensions(String val) {
this.value = val
}
public String getValue(){
return value
}
static LocaleCalendarExtensions byValue(String val) {
values().find { it.value == val }
}
public static EnumSet<LocaleCalendarExtensions> getArabicSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(SA)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getJapanSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(JP)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getChinaSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(CN)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getFarsiSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(IR)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getUrduSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(PK)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getAsianSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(JP)
ret_val.add(CN)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getHebrewSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(IL)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getHinduSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(IN)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getThaiSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(TH)
return ret_val
}
public static EnumSet<LocaleCalendarExtensions> getGreekSupport() {
final EnumSet< LocaleCalendarExtensions > ret_val = EnumSet.noneOf( LocaleCalendarExtensions.class )
ret_val.add(GR)
return ret_val
}
}
Now how you convert a date properly to international locales:
/**
*
* @param lang where lang code provider is ar en cn fr ur it is as per LocaleCalendarExtensions Enum main declarations
* @param date given date
* @param format definition in which case I have clause to deal with HH:mm and so on just read through below code
* @return
*/
public static String convertDate(String lang, java.util.Date date, String format) {
StringBuilder output=new StringBuilder()
if (lang != null && date) {
def found = LocaleICUCalendarExtensions?.find{it.toString()==lang}
if (found) {
def found1 = LocaleExtensions?.valueOf(lang)
com.ibm.icu.util.ULocale locale = new com.ibm.icu.util.ULocale(found1.value)
com.ibm.icu.util.Calendar calendar = com.ibm.icu.util.Calendar.getInstance(locale)
calendar.setTime(date)
com.ibm.icu.text.DateFormat df
if (format == 'HH:mm') {
df = com.ibm.icu.text.DateFormat.getPatternInstance( com.ibm.icu.text.DateFormat.HOUR_MINUTE, locale)
} else {
if (format=='dd MMM yyyy HH:mm:ss') {
df = com.ibm.icu.text.DateFormat.getDateInstance(DateFormat.FULL, locale)
} else if (format=='dd MMM') {
df = com.ibm.icu.text.DateFormat.getPatternInstance( com.ibm.icu.text.DateFormat.ABBR_MONTH_DAY, locale)
} else {
df = com.ibm.icu.text.DateFormat.getDateInstance(DateFormat.LONG, locale)
}
output << df.format(calendar)
}
}
}
return output.toString()
}
To convert a number to another country numbering system:
/**
* Converts number to given locale
* @param lang
* @param number
* @return
*/
public static String convertNumber(String lang, number) {
String output=''
if (lang != null) {
boolean arabic = (LocaleCalendarExtensions.arabicSupport.find { it.toString() == lang } ? true : false)
boolean china = (LocaleCalendarExtensions.chinaSupport.find { it.toString() == lang } ? true : false)
boolean japan = (LocaleCalendarExtensions.japanSupport.find { it.toString() == lang } ? true : false)
boolean farsi = (LocaleCalendarExtensions.farsiSupport.find { it.toString() == lang } ? true : false)
boolean urdu = (LocaleCalendarExtensions.urduSupport.find { it.toString() == lang } ? true : false)
boolean hebrew = (LocaleCalendarExtensions.hebrewSupport.find { it.toString() == lang } ? true : false)
boolean greek = (LocaleCalendarExtensions.greekSupport.find { it.toString() == lang } ? true : false)
boolean hindu = (LocaleCalendarExtensions.hinduSupport.find { it.toString() == lang } ? true : false)
boolean thai = (LocaleCalendarExtensions.thaiSupport.find { it.toString() == lang } ? true : false)
if (arabic || hindu | thai || farsi||urdu) {
def found = LocaleExtensions?.valueOf(lang)
if (found) {
Locale locale = new Locale.Builder().setLanguageTag(found.value).build();
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
NumberFormat numberFormat = NumberFormat.getNumberInstance(locale);
def numbers
if (number.toString().indexOf('.')>-1) {
numbers=number as Double
} else {
numbers=number as Long
}
output = (numberFormat?.format(numbers)) ?:''
}
}
if (japan|china||hebrew||greek) {
// to extend look up types here
//http://www.atetric.com/atetric/javadoc/com.ibm.icu/icu4j/49.1/src-html/com/ibm/icu/util/ULocale.html
//http://icu-project.org/~yoshito/jacoco_57.1/com.ibm.icu.util/ULocale.java.html
com.ibm.icu.util.ULocale locale
if (japan) {
locale= new com.ibm.icu.util.ULocale("ja_JP_JP")//ja_JP_JP //
}
if (china) {
locale= new com.ibm.icu.util.ULocale("zh_Hans")//zh_CN_TRADITIONAL@collation=pinyin;
}
if (hebrew) {
locale= new com.ibm.icu.util.ULocale("he_IL")
}
if (greek) {
locale= new com.ibm.icu.util.ULocale("el_GR")
}
com.ibm.icu.text.NumberFormat nf = com.ibm.icu.text.NumberFormat.getInstance(locale)
def numbers
if (number.toString().indexOf('.')>-1) {
numbers=number as Double
} else {
numbers=number as Long
}
output = nf.format(numbers)
}
}
return output ?: number.toString()
}
Now if you are using grails you can create a taglib and override formatDate and formatNumber defintions to work with above code:
/**
* override default date formatter if translation translate
*/
def formatDate={attrs->
String foundRecord
if (attrs.locale) {
String lang = attrs.locale.country
foundRecord = NumberHelper.convertDate(lang, attrs.date, attrs.format)
}
if (!foundRecord || foundRecord=='null') {
out << g.formatDate(attrs)
} else {
out << foundRecord
}
}
/**
* Override default formatNumber and translate number if possible otherwsie run default
*/
def formatNumber={attrs->
def foundRecord
if (attrs.locale && attrs.number) {
String lang = attrs.locale.country
foundRecord = NumberHelper.convertNumber(lang, attrs.number)
}
if (!foundRecord) {
out << g.formatNumber(attrs)
} else {
out << "${foundRecord}"
}
}
ICU4J Enum above:
import groovy.transform.CompileStatic
@CompileStatic
enum LocaleICUCalendarExtensions {
SA('ar_SA@calendar=islamic'),
AM('hy_AM@calendar=armenian'),
CN('zh_Hans@calendar=chinese'),
CZ('cs_CZ@calendar=latin'),
DK('da_DK@calendar=latin'),
NL('nl_NL@calendar=latin'),
IE('ie_IE@calendar=latin'),
FR('fr_FR@calendar=latin'),
DE('de_DE@calendar=latin'),
GR('el_GR@calendar=greek'),
IL('iw_IL@calendar=hebrew'),
IN('hi_IN@calendar=hindu'),
IT('it_IT@calendar=Latin'),
JP('ja_JP_TRADITIONAL@calendar=japanese'),
NO('nb_NO@calendar=latin'),
IR('fa_IR@calendar=persian'),
PL('pl_PL@calendar=latin'),
PT('pt_PT@calendar=latin'),
RU('ru_RU@calendar=cyrillic'),
ES('es_ES@calendar=latin'),
SE('sv_SE@calendar=latin'),
TH('th_TH_TRADITIONAL@calendar=buddhist'),
TR('tr_TR@calendar=latin'),
PK('ur_PK@calendar=pakistan'),
VN('vi_VN@calendar=latin')
String value
LocaleICUCalendarExtensions(String val) {
this.value = val
}
public String getValue(){
return value
}
static LocaleICUCalendarExtensions byValue(String val) {
values().find { it.value == val }
}
public static EnumSet<LocaleICUCalendarExtensions> getArabicSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(SA)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getJapanSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(JP)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getChinaSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(CN)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getFarsiSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(IR)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getUrduSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(PK)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getHebrewSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(IL)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getHinduSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(IN)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getThaiSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(TH)
return ret_val
}
public static EnumSet<LocaleICUCalendarExtensions> getGreekSupport() {
final EnumSet< LocaleICUCalendarExtensions > ret_val = EnumSet.noneOf( LocaleICUCalendarExtensions.class )
ret_val.add(GR)
return ret_val
}
}
LocaleExtensions enum a bit like LocaleCalendar just has nu rather than ca
import groovy.transform.CompileStatic
/**
* Looks complex but will explain
*
* ar-SA u = unicode nu = number arab = arabic
*
* https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
* to get langauge code such as arab = subtag
*
* Rest explained here
* http://stackoverflow.com/questions/43456068/java-locale-builder-setextensionlocale-unicode-locale-extension
*
*
*/
@CompileStatic
enum LocaleExtensions {
SA('ar-SA-u-nu-arab'),
AM('hy-AM-u-nu-arevmda'),
CN('zh-TW-u-nu-arab'), //'zh-TW-u-nu-hant'
CZ('cs-CZ-u-nu-latn'),
DK('da-DK-u-nu-latn'),
NL('nl-NL-u-nu-latn'),
IE('ie-IE-u-nu-latn'),
FR('fr-FR-u-nu-latn'),
DE('de-DE-u-nu-latn'),
GR('el-GR-u-nu-grek'),
IL('iw-IL-u-nu-hebr'),
IN('hi-IN-u-nu-hindu'),
IT('it-IT-u-nu-latn'),
JP('ja-JP-u-nu-arab'),
NO('nb-NO-u-nu-latn'),
IR('fa-IR-u-nu-arab'),
PL('pl-PL-u-nu-latn'),
PT('pt-PT-u-nu-latn'),
RU('ru-RU-u-nu-cyrl'),
ES('es-ES-u-nu-latn'),
SE('sv-SE-u-nu-latn'),
TH('th-TH-u-nu-thai'),
TR('tr-TR-u-nu-latn'),
PK('ur-PK-u-nu-arab'),
VN('vi-VN-u-nu-latn')
String value
LocaleExtensions(String val) {
this.value = val
}
public String getValue(){
return value
}
static LocaleExtensions byValue(String val) {
values().find { it.value == val }
}
public static EnumSet<LocaleExtensions> getArabicSupport() {
final EnumSet< LocaleExtensions > ret_val = EnumSet.noneOf( LocaleExtensions.class )
ret_val.add(SA)
//TODO
ret_val.add(JP)
ret_val.add(CN)
return ret_val
}
public static EnumSet<LocaleExtensions> getFarsiSupport() {
final EnumSet< LocaleExtensions > ret_val = EnumSet.noneOf( LocaleExtensions.class )
ret_val.add(PK)
ret_val.add(IR)
return ret_val
}
public static EnumSet<LocaleExtensions> getAsianSupport() {
final EnumSet< LocaleExtensions > ret_val = EnumSet.noneOf( LocaleExtensions.class )
ret_val.add(JP)
ret_val.add(CN)
return ret_val
}
public static EnumSet<LocaleExtensions> getHebrewSupport() {
final EnumSet< LocaleExtensions > ret_val = EnumSet.noneOf( LocaleExtensions.class )
ret_val.add(IL)
return ret_val
}
public static EnumSet<LocaleExtensions> getHinduSupport() {
final EnumSet< LocaleExtensions > ret_val = EnumSet.noneOf( LocaleExtensions.class )
ret_val.add(IN)
return ret_val
}
public static EnumSet<LocaleExtensions> getThaiSupport() {
final EnumSet< LocaleExtensions > ret_val = EnumSet.noneOf( LocaleExtensions.class )
ret_val.add(TH)
return ret_val
}
public static EnumSet<LocaleExtensions> getGreekSupport() {
final EnumSet< LocaleExtensions > ret_val = EnumSet.noneOf( LocaleExtensions.class )
ret_val.add(GR)
return ret_val
}
}
This obviously covers a range of locales and it is working correctly for me in the words of Bernard Manning's bootifuly
The date had been split into what java supported by default vs icu4j then when understood better all moved over to icu4j I think the numbering system is still using half and half and probably could be switched over to icu4j too.
Anyhow the answer is a derailed train far off the track by now as in from where it started which was converting number characters and for example from Latin to arabic is tricky to understand work with since given a Latin year of 2016 in arabic the equivalent and correct year lets in Saudi would be something like 1354 or in Thailand I think 2056