001/* 002 * Units of Measurement Systems 003 * Copyright (c) 2005-2017, Jean-Marie Dautelle, Werner Keil and others. 004 * 005 * All rights reserved. 006 * 007 * Redistribution and use in source and binary forms, with or without modification, 008 * are permitted provided that the following conditions are met: 009 * 010 * 1. Redistributions of source code must retain the above copyright notice, 011 * this list of conditions and the following disclaimer. 012 * 013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions 014 * and the following disclaimer in the documentation and/or other materials provided with the distribution. 015 * 016 * 3. Neither the name of JSR-363, Units of Measurement nor the names of their contributors may be used to 017 * endorse or promote products derived from this software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package systems.uom.ucum.format; 031 032import static tech.units.indriya.AbstractUnit.ONE; 033import static systems.uom.ucum.format.UCUMConverterFormatter.*; 034import si.uom.SI; 035import systems.uom.ucum.internal.format.UCUMFormatParser; 036import tech.units.indriya.AbstractUnit; 037import tech.units.indriya.format.AbstractUnitFormat; 038import tech.units.indriya.format.SymbolMap; 039import tech.units.indriya.function.*; 040import tech.units.indriya.internal.format.TokenException; 041import tech.units.indriya.internal.format.TokenMgrError; 042import tech.units.indriya.unit.AnnotatedUnit; 043import tech.units.indriya.unit.MetricPrefix; 044import tech.units.indriya.unit.TransformedUnit; 045 046import javax.measure.Quantity; 047import javax.measure.Unit; 048import javax.measure.UnitConverter; 049import javax.measure.format.ParserException; 050 051import java.io.ByteArrayInputStream; 052import java.io.IOException; 053import java.text.ParsePosition; 054import java.util.*; 055import java.util.Map.Entry; 056 057/** 058 * <p> 059 * This class provides the interface for formatting and parsing {@link AbstractUnit units} according to the 060 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of Measure</a> (UCUM). 061 * </p> 062 * 063 * <p> 064 * For a technical/historical overview of this format please read <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354"> 065 * CommonUnits of Measure in Clinical Information Systems</a>. 066 * </p> 067 * 068 * <p> 069 * As of revision 1.16, the BNF in the UCUM standard contains an <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to work around 070 * the problem by modifying the BNF productions for <Term>. Once the error in the standard is corrected, it may be necessary to modify the 071 * productions in the UCUMFormatParser.jj file to conform to the standard. 072 * </p> 073 * 074 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a> 075 * @author <a href="mailto:units@catmedia.us">Werner Keil</a> 076 * @version 0.8, 6 October 2018 077 */ 078public abstract class UCUMFormat extends AbstractUnitFormat { 079 /** 080 * 081 */ 082 // private static final long serialVersionUID = 8586656823290135155L; 083 084 // A helper to declare bundle names for all instances 085 private static final String BUNDLE_BASE = UCUMFormat.class.getName(); 086 087 // ///////////////// 088 // Class methods // 089 // ///////////////// 090 091 /** 092 * Returns the instance for formatting/parsing using the given variant 093 * 094 * @param variant 095 * the <strong>UCUM</strong> variant to use 096 * @return a {@link UCUMFormat} instance 097 */ 098 public static UCUMFormat getInstance(Variant variant) { 099 switch (variant) { 100 case CASE_INSENSITIVE: 101 return Parsing.DEFAULT_CI; 102 case CASE_SENSITIVE: 103 return Parsing.DEFAULT_CS; 104 case PRINT: 105 return Print.DEFAULT; 106 default: 107 throw new IllegalArgumentException("Unknown variant: " + variant); 108 } 109 } 110 111 /** 112 * Returns an instance for formatting and parsing using user defined symbols 113 * 114 * @param variant 115 * the <strong>UCUM</strong> variant to use 116 * @param symbolMap 117 * the map of user defined symbols to use 118 * @return a {@link UCUMFormat} instance 119 */ 120 public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) { 121 switch (variant) { 122 case CASE_INSENSITIVE: 123 return new Parsing(symbolMap, false); 124 case CASE_SENSITIVE: 125 return new Parsing(symbolMap, true); 126 case PRINT: 127 return new Print(symbolMap); 128 default: 129 throw new IllegalArgumentException("Unknown variant: " + variant); 130 } 131 } 132 133 /** 134 * The symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s. 135 */ 136 final SymbolMap symbolMap; 137 138 /** 139 * Get the symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s, etc... 140 * 141 * @return SymbolMap the current symbol map 142 */ 143 @Override 144 protected SymbolMap getSymbols() { 145 return symbolMap; 146 } 147 148 ////////////////// 149 // Constructors // 150 ////////////////// 151 /** 152 * Base constructor. 153 */ 154 UCUMFormat(SymbolMap symbolMap) { 155 this.symbolMap = symbolMap; 156 } 157 158 // /////////// 159 // Parsing // 160 // /////////// 161 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException; 162 163 protected Unit<?> parse(CharSequence csq, int index) throws ParserException { 164 return parse(csq, new ParsePosition(index)); 165 } 166 167 @Override 168 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException; 169 170 //////////////// 171 // Formatting // 172 //////////////// 173 @SuppressWarnings({ "rawtypes", "unchecked" }) 174 public Appendable format(Unit<?> unknownUnit, Appendable appendable) throws IOException { 175 if (!(unknownUnit instanceof AbstractUnit)) { 176 throw new UnsupportedOperationException("The UCUM format supports only known units (AbstractUnit instances)"); 177 } 178 AbstractUnit unit = (AbstractUnit) unknownUnit; 179 CharSequence symbol; 180 CharSequence annotation = null; 181 if (unit instanceof AnnotatedUnit) { 182 AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit; 183 unit = annotatedUnit.getActualUnit(); 184 annotation = annotatedUnit.getAnnotation(); 185 } 186 String mapSymbol = symbolMap.getSymbol(unit); 187 if (mapSymbol != null) { 188 symbol = mapSymbol; 189 } else if (unit instanceof TransformedUnit) { 190 final StringBuilder temp = new StringBuilder(); 191 final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit(); 192 final UnitConverter converter = unit.getConverterTo(parentUnit); 193 final boolean printSeparator = !parentUnit.equals(ONE); 194 195 format(parentUnit, temp); 196 formatConverter(converter, printSeparator, temp, symbolMap); 197 198 symbol = temp; 199 } else if (unit.getBaseUnits() != null) { 200 Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits(); 201 StringBuffer app = new StringBuffer(); 202 203 Map<AbstractUnit<?>, Integer> numeratorUnits = new LinkedHashMap<>(); 204 Map<AbstractUnit<?>, Integer> denominatorUnits = new LinkedHashMap<>(); 205 206 // divide units into numerators and denominators 207 for (Entry<? extends AbstractUnit<?>, Integer> u : productUnits.entrySet()) { 208 if (u.getValue() > 0) { 209 numeratorUnits.put(u.getKey(), u.getValue()); 210 }else { 211 denominatorUnits.put(u.getKey(), u.getValue()); 212 } 213 } 214 215 int numeratorCount = 1; 216 for (Entry<? extends AbstractUnit<?>, Integer> u : numeratorUnits.entrySet()) { 217 // add multiplication separators after first unit 218 if (numeratorCount > 1){ 219 app.append("."); 220 } 221 // add individual unit string 222 format(u.getKey(),app); 223 // add power number if greater than 1 224 if (u.getValue() > 1){ 225 app.append(u.getValue()); 226 } 227 numeratorCount++; 228 } 229 // special case if there is no numerator append one for inverse 230 if (numeratorCount == 1) { 231 app.append("1"); 232 } 233 if (denominatorUnits.size() >0){ 234 // append division symbol 235 app.append("/"); 236 int denominatorCount = 1; 237 for (Entry<? extends AbstractUnit<?>, Integer> u : denominatorUnits.entrySet()) { 238 // if there is more than one denominator unit and this is the first, add open parenthesis 239 if (denominatorCount == 1 && denominatorUnits.size() > 1 ) { 240 app.append("("); 241 } 242 // add multiplication separators after first unit 243 if (denominatorCount > 1){ 244 app.append("."); 245 } 246 // add individual unit string 247 format(u.getKey(),app); 248 // add power number if abs greater than 1 249 if (Math.abs(u.getValue()) < -1){ 250 app.append(-u.getValue()); 251 } 252 // if there is more than one denominator unit and this is the last, add close parenthesis 253 if (denominatorCount == denominatorUnits.size() && denominatorUnits.size() > 1 ) { 254 app.append(")"); 255 } 256 denominatorCount++; 257 } 258 } 259 symbol = app; 260 } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) { 261 final StringBuilder temp = new StringBuilder(); 262 UnitConverter converter; 263 boolean printSeparator; 264 if (unit.equals(SI.KILOGRAM)) { 265 // A special case because KILOGRAM is a BaseUnit instead of 266 // a transformed unit, for compatibility with existing SI 267 // unit system. 268 format(SI.GRAM, temp); 269 converter = PowersOfIntConverter.of(MetricPrefix.KILO); 270 printSeparator = true; 271 } else { 272 Unit<?> parentUnit = unit.getSystemUnit(); 273 converter = unit.getConverterTo(parentUnit); 274 if (parentUnit.equals(SI.KILOGRAM)) { 275 // More special-case hackery to work around gram/kilogram 276 // inconsistency 277 parentUnit = SI.GRAM; 278 converter = converter.concatenate(PowersOfIntConverter.of(MetricPrefix.KILO)); 279 } 280 format(parentUnit, temp); 281 printSeparator = !parentUnit.equals(ONE); 282 } 283 formatConverter(converter, printSeparator, temp, symbolMap); 284 symbol = temp; 285 } else if (unit.getSymbol() != null) { 286 symbol = unit.getSymbol(); 287 } else { 288 throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit " + unit.getClass().getName() + "). " 289 + "Custom units types should override the toString() method as the default implementation uses the UCUM format."); 290 } 291 292 appendable.append(symbol); 293 if (annotation != null && annotation.length() > 0) { 294 appendAnnotation(symbol, annotation, appendable); 295 } 296 297 return appendable; 298 } 299 300 public void label(Unit<?> unit, String label) { 301 throw new UnsupportedOperationException("label() not supported by this implementation"); 302 } 303 304 public boolean isLocaleSensitive() { 305 return false; 306 } 307 308 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 309 appendable.append('{'); 310 appendable.append(annotation); 311 appendable.append('}'); 312 } 313 314 // static final ResourceBundle.Control getControl(final String key) { 315 // return new ResourceBundle.Control() { 316 // @Override 317 // public List<Locale> getCandidateLocales(String baseName, Locale locale) { 318 // if (baseName == null) 319 // throw new NullPointerException(); 320 // if (locale.equals(new Locale(key))) { 321 // return Arrays.asList( 322 // locale, 323 // Locale.GERMANY, 324 // // no Locale.GERMAN here 325 // Locale.ROOT); 326 // } else if (locale.equals(Locale.GERMANY)) { 327 // return Arrays.asList( 328 // locale, 329 // // no Locale.GERMAN here 330 // Locale.ROOT); 331 // } 332 // return super.getCandidateLocales(baseName, locale); 333 // } 334 // }; 335 // } 336 337 // ///////////////// 338 // Inner classes // 339 // ///////////////// 340 341 /** 342 * Variant of unit representation in the UCUM standard 343 * 344 * @see <a href= "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules"> UCUM - Character Set and Lexical Rules</a> 345 */ 346 public static enum Variant { 347 CASE_SENSITIVE, CASE_INSENSITIVE, PRINT 348 } 349 350 /** 351 * The Print format is used to output units according to the "print" column in the UCUM standard. Because "print" symbols in UCUM are not unique, 352 * this class of UCUMFormat may not be used for parsing, only for formatting. 353 */ 354 private static final class Print extends UCUMFormat { 355 356 /** 357 * 358 */ 359 // private static final long serialVersionUID = 2990875526976721414L; 360 private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print")); 361 private static final Print DEFAULT = new Print(PRINT_SYMBOLS); 362 363 public Print(SymbolMap symbols) { 364 super(symbols); 365 } 366 367 @Override 368 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException { 369 throw new UnsupportedOperationException("The print format is for pretty-printing of units only. Parsing is not supported."); 370 } 371 372 @Override 373 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 374 if (symbol != null && symbol.length() > 0) { 375 appendable.append('('); 376 appendable.append(annotation); 377 appendable.append(')'); 378 } else { 379 appendable.append(annotation); 380 } 381 } 382 383 @Override 384 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException { 385 return parse(csq, new ParsePosition(0)); 386 387 } 388 } 389 390 /** 391 * The Parsing format outputs formats and parses units according to the "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap 392 * is passed to its constructor. 393 */ 394 private static final class Parsing extends UCUMFormat { 395 // private static final long serialVersionUID = -922531801940132715L; 396 private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap 397 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() { 398 @Override 399 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 400 if (baseName == null) 401 throw new NullPointerException(); 402 if (locale.equals(new Locale("", "CS"))) { 403 return Arrays.asList(locale, Locale.ROOT); 404 } 405 return super.getCandidateLocales(baseName, locale); 406 } 407 })); 408 private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap 409 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() { 410 @Override 411 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 412 if (baseName == null) 413 throw new NullPointerException(); 414 if (locale.equals(new Locale("", "CI"))) { 415 return Arrays.asList(locale, Locale.ROOT); 416 } else if (locale.equals(Locale.GERMANY)) { // TODO 417 // why 418 // GERMANY? 419 return Arrays.asList(locale, 420 // no Locale.GERMAN here 421 Locale.ROOT); 422 } 423 return super.getCandidateLocales(baseName, locale); 424 } 425 })); 426 private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true); 427 private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false); 428 private final boolean caseSensitive; 429 430 public Parsing(SymbolMap symbols, boolean caseSensitive) { 431 super(symbols); 432 this.caseSensitive = caseSensitive; 433 } 434 435 @Override 436 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException { 437 // Parsing reads the whole character sequence from the parse 438 // position. 439 int start = cursor.getIndex(); 440 int end = csq.length(); 441 if (end <= start) { 442 return ONE; 443 } 444 String source = csq.subSequence(start, end).toString().trim(); 445 if (source.length() == 0) { 446 return ONE; 447 } 448 if (!caseSensitive) { 449 source = source.toUpperCase(); 450 } 451 UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes())); 452 try { 453 Unit<?> result = parser.parseUnit(); 454 cursor.setIndex(end); 455 return result; 456 } catch (TokenException e) { 457 if (e.currentToken != null) { 458 cursor.setErrorIndex(start + e.currentToken.endColumn); 459 } else { 460 cursor.setErrorIndex(start); 461 } 462 throw new ParserException(e); 463 } catch (TokenMgrError e) { 464 cursor.setErrorIndex(start); 465 throw new IllegalArgumentException(e.getMessage()); 466 } 467 } 468 469 @Override 470 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException { 471 return parse(csq, new ParsePosition(0)); 472 } 473 } 474}