001/* 002 * Unit-API - Units of Measurement API for Java 003 * Copyright (c) 2005-2016, Jean-Marie Dautelle, Werner Keil, V2COM. 004 * 005 * All rights reserved. 006 * 007 * Redistribution and use in source and binary forms, with or without modification, 008 * are permitted provided that the following conditions are met: 009 * 010 * 1. Redistributions of source code must retain the above copyright notice, 011 * this list of conditions and the following disclaimer. 012 * 013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions 014 * and the following disclaimer in the documentation and/or other materials provided with the distribution. 015 * 016 * 3. Neither the name of JSR-363 nor the names of its contributors may be used to endorse or promote products 017 * derived from this software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package systems.uom.ucum.format; 031 032import static tec.uom.se.AbstractUnit.ONE; 033import systems.uom.ucum.internal.SI; 034import systems.uom.ucum.internal.format.UCUMFormatParser; 035import tec.uom.se.AbstractConverter; 036import tec.uom.se.AbstractUnit; 037import tec.uom.se.format.AbstractUnitFormat; 038import tec.uom.se.format.SymbolMap; 039import tec.uom.se.function.MultiplyConverter; 040import tec.uom.se.function.RationalConverter; 041import tec.uom.se.internal.format.TokenException; 042import tec.uom.se.internal.format.TokenMgrError; 043import tec.uom.se.unit.AnnotatedUnit; 044import tec.uom.se.unit.MetricPrefix; 045 046import javax.measure.Quantity; 047import javax.measure.Unit; 048import javax.measure.UnitConverter; 049import javax.measure.format.ParserException; 050 051import java.io.ByteArrayInputStream; 052import java.io.IOException; 053import java.math.BigInteger; 054import java.text.ParsePosition; 055import java.util.*; 056 057/** 058 * <p> 059 * This class provides the interface for formatting and parsing 060 * {@link AbstractUnit units} according to the <a 061 * href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of Measure</a> 062 * (UCUM). 063 * </p> 064 * 065 * <p> 066 * For a technical/historical overview of this format please read <a 067 * href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354"> 068 * CommonUnits of Measure in Clinical Information Systems</a>. 069 * </p> 070 * 071 * <p> 072 * As of revision 1.16, the BNF in the UCUM standard contains an <a 073 * href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to work 074 * around the problem by modifying the BNF productions for <Term>. Once 075 * the error in the standard is corrected, it may be necessary to modify the 076 * productions in the UCUMFormatParser.jj file to conform to the standard. 077 * </p> 078 * 079 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a> 080 * @author <a href="mailto:units@catmedia.us">Werner Keil</a> 081 * @version 0.7, 6 April 2016 082 */ 083public abstract class UCUMFormat extends AbstractUnitFormat { 084 /** 085 * 086 */ 087 // private static final long serialVersionUID = 8586656823290135155L; 088 089 // A helper to declare bundle names for all instances 090 private static final String BUNDLE_BASE = UCUMFormat.class.getName(); 091 092 // ///////////////// 093 // Class methods // 094 // ///////////////// 095 096 /** 097 * Returns the instance for formatting/parsing using the given variant 098 * 099 * @param variant 100 * the <strong>UCUM</strong> variant to use 101 */ 102 public static UCUMFormat getInstance(Variant variant) { 103 switch (variant) { 104 case CASE_INSENSITIVE: 105 return Parsing.DEFAULT_CI; 106 case CASE_SENSITIVE: 107 return Parsing.DEFAULT_CS; 108 case PRINT: 109 return Print.DEFAULT; 110 default: 111 throw new IllegalArgumentException("Unknown variant: " + variant); 112 } 113 } 114 115 /** 116 * Returns an instance for formatting and parsing using user defined symbols 117 * 118 * @param variant 119 * the <strong>UCUM</strong> variant to use 120 * @param symbolMap 121 * the map of user defined symbols to use 122 */ 123 public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) { 124 switch (variant) { 125 case CASE_INSENSITIVE: 126 return new Parsing(symbolMap, false); 127 case CASE_SENSITIVE: 128 return new Parsing(symbolMap, true); 129 case PRINT: 130 return new Print(symbolMap); 131 default: 132 throw new IllegalArgumentException("Unknown variant: " + variant); 133 } 134 } 135 136 /** 137 * The symbol map used by this instance to map between {@link AbstractUnit 138 * Unit}s and <code>String</code>s. 139 */ 140 final SymbolMap symbolMap; 141 142 /** 143 * Get the symbol map used by this instance to map between 144 * {@link AbstractUnit Unit}s and <code>String</code>s, etc... 145 * 146 * @return SymbolMap the current symbol map 147 */ 148 @Override 149 protected SymbolMap getSymbols() { 150 return symbolMap; 151 } 152 153 // //////////////// 154 // Constructors // 155 // //////////////// 156 /** 157 * Base constructor. 158 */ 159 UCUMFormat(SymbolMap symbolMap) { 160 this.symbolMap = symbolMap; 161 } 162 163 // /////////// 164 // Parsing // 165 // /////////// 166 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, 167 ParsePosition cursor) throws ParserException; 168 169 protected Unit<?> parse(CharSequence csq, int index) throws ParserException { 170 return parse(csq, new ParsePosition(index)); 171 } 172 173 @Override 174 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) 175 throws ParserException; 176 177 // ////////////// 178 // Formatting // 179 // ////////////// 180 @SuppressWarnings({ "rawtypes", "unchecked" }) 181 public Appendable format(Unit<?> unknownUnit, Appendable appendable) 182 throws IOException { 183 if (!(unknownUnit instanceof AbstractUnit)) { 184 throw new UnsupportedOperationException( 185 "The UCUM format supports only known units (AbstractUnit instances)"); 186 } 187 AbstractUnit unit = (AbstractUnit) unknownUnit; 188 CharSequence symbol; 189 CharSequence annotation = null; 190 if (unit instanceof AnnotatedUnit) { 191 AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit; 192 unit = annotatedUnit.getActualUnit(); 193 annotation = annotatedUnit.getAnnotation(); 194 } 195 String mapSymbol = symbolMap.getSymbol(unit); 196 if (mapSymbol != null) { 197 symbol = mapSymbol; 198 } else if (unit.getBaseUnits() != null) { 199 Map<? extends AbstractUnit<?>, Integer> productUnits = unit 200 .getBaseUnits(); 201 StringBuffer app = new StringBuffer(); 202 for (AbstractUnit<?> u : productUnits.keySet()) { 203 StringBuffer temp = new StringBuffer(); 204 temp = (StringBuffer) format(u, temp); 205 if ((temp.indexOf(".") >= 0) || (temp.indexOf("/") >= 0)) { 206 temp.insert(0, '('); 207 temp.append(')'); 208 } 209 int pow = productUnits.get(u); 210 if (app.length() > 0) { // Not the first unit. 211 if (pow >= 0) { 212 app.append('.'); 213 } else { 214 app.append('/'); 215 pow = -pow; 216 } 217 } else { // First unit. 218 if (pow < 0) { 219 app.append("1/"); 220 pow = -pow; 221 } 222 } 223 app.append(temp); 224 if (pow != 1) { 225 app.append(Integer.toString(pow)); 226 } 227 } 228 symbol = app; 229 } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) { 230 final StringBuilder temp = new StringBuilder(); 231 UnitConverter converter; 232 boolean printSeparator; 233 if (unit.equals(SI.KILOGRAM)) { 234 // A special case because KILOGRAM is a BaseUnit instead of 235 // a transformed unit, for compatability with existing SI 236 // unit system. 237 format(SI.GRAM, temp); 238 converter = MetricPrefix.KILO.getConverter(); 239 printSeparator = true; 240 } else { 241 Unit<?> parentUnit = unit.getSystemUnit(); 242 converter = unit.getConverterTo(parentUnit); 243 if (parentUnit.equals(SI.KILOGRAM)) { 244 // More special-case hackery to work around gram/kilogram 245 // incosistency 246 parentUnit = SI.GRAM; 247 converter = converter.concatenate(MetricPrefix.KILO 248 .getConverter()); 249 } 250 format(parentUnit, temp); 251 printSeparator = !parentUnit.equals(ONE); 252 } 253 formatConverter(converter, printSeparator, temp); 254 symbol = temp; 255 } else if (unit.getSymbol() != null) { 256 symbol = unit.getSymbol(); 257 } else { 258 throw new IllegalArgumentException( 259 "Cannot format the given Object as UCUM units (unsupported unit " 260 + unit.getClass().getName() 261 + "). " 262 + "Custom units types should override the toString() method as the default implementation uses the UCUM format."); 263 } 264 265 appendable.append(symbol); 266 if (annotation != null && annotation.length() > 0) { 267 appendAnnotation(symbol, annotation, appendable); 268 } 269 270 return appendable; 271 } 272 273 public void label(Unit<?> unit, String label) { 274 } 275 276 public boolean isLocaleSensitive() { 277 return false; 278 } 279 280 void appendAnnotation(CharSequence symbol, CharSequence annotation, 281 Appendable appendable) throws IOException { 282 appendable.append('{'); 283 appendable.append(annotation); 284 appendable.append('}'); 285 } 286 287 /** 288 * Formats the given converter to the given StringBuffer. This is similar to 289 * what {@link ConverterFormat} does, but there's no need to worry about 290 * operator precedence here, since UCUM only supports multiplication, 291 * division, and exponentiation and expressions are always evaluated left- 292 * to-right. 293 * 294 * @param converter 295 * the converter to be formatted 296 * @param continued 297 * <code>true</code> if the converter expression should begin 298 * with an operator, otherwise <code>false</code>. This will 299 * always be true unless the unit being modified is equal to 300 * Unit.ONE. 301 * @param buffer 302 * the <code>StringBuffer</code> to append to. Contains the 303 * already-formatted unit being modified by the given converter. 304 */ 305 void formatConverter(UnitConverter converter, boolean continued, 306 StringBuilder buffer) { 307 boolean unitIsExpression = ((buffer.indexOf(".") >= 0) || (buffer 308 .indexOf("/") >= 0)); 309 MetricPrefix prefix = symbolMap.getPrefix(converter); 310 if ((prefix != null) && (!unitIsExpression)) { 311 buffer.insert(0, symbolMap.getSymbol(prefix)); 312 } else if (converter == AbstractConverter.IDENTITY) { 313 // do nothing 314 } else if (converter instanceof MultiplyConverter) { 315 if (unitIsExpression) { 316 buffer.insert(0, '('); 317 buffer.append(')'); 318 } 319 MultiplyConverter multiplyConverter = (MultiplyConverter) converter; 320 double factor = multiplyConverter.getFactor(); 321 long lFactor = (long) factor; 322 if ((lFactor != factor) || (lFactor < -9007199254740992L) 323 || (lFactor > 9007199254740992L)) { 324 throw new IllegalArgumentException( 325 "Only integer factors are supported in UCUM"); 326 } 327 if (continued) { 328 buffer.append('.'); 329 } 330 buffer.append(lFactor); 331 } else if (converter instanceof RationalConverter) { 332 if (unitIsExpression) { 333 buffer.insert(0, '('); 334 buffer.append(')'); 335 } 336 RationalConverter rationalConverter = (RationalConverter) converter; 337 if (!rationalConverter.getDividend().equals(BigInteger.ONE)) { 338 if (continued) { 339 buffer.append('.'); 340 } 341 buffer.append(rationalConverter.getDividend()); 342 } 343 if (!rationalConverter.getDivisor().equals(BigInteger.ONE)) { 344 buffer.append('/'); 345 buffer.append(rationalConverter.getDivisor()); 346 } 347 } else { // All other converter type (e.g. exponential) we use the 348 // string representation. 349 buffer.insert(0, converter.toString() + "("); 350 buffer.append(")"); 351 } 352 } 353 354 // static final ResourceBundle.Control getControl(final String key) { 355 // return new ResourceBundle.Control() { 356 // @Override 357 // public List<Locale> getCandidateLocales(String baseName, Locale locale) { 358 // if (baseName == null) 359 // throw new NullPointerException(); 360 // if (locale.equals(new Locale(key))) { 361 // return Arrays.asList( 362 // locale, 363 // Locale.GERMANY, 364 // // no Locale.GERMAN here 365 // Locale.ROOT); 366 // } else if (locale.equals(Locale.GERMANY)) { 367 // return Arrays.asList( 368 // locale, 369 // // no Locale.GERMAN here 370 // Locale.ROOT); 371 // } 372 // return super.getCandidateLocales(baseName, locale); 373 // } 374 // }; 375 // } 376 377 // ///////////////// 378 // Inner classes // 379 // ///////////////// 380 381 /** 382 * Variant of unit representation in the UCUM standard 383 * 384 * @see <a 385 * href="http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules"> 386 * UCUM - Character Set and Lexical Rules</a> 387 */ 388 public static enum Variant { 389 CASE_SENSITIVE, CASE_INSENSITIVE, PRINT 390 } 391 392 /** 393 * The Print format is used to output units according to the "print" column 394 * in the UCUM standard. Because "print" symbols in UCUM are not unique, 395 * this class of UCUMFormat may not be used for parsing, only for 396 * formatting. 397 */ 398 private static final class Print extends UCUMFormat { 399 400 /** 401 * 402 */ 403 // private static final long serialVersionUID = 2990875526976721414L; 404 private static final SymbolMap PRINT_SYMBOLS = SymbolMap 405 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print")); 406 private static final Print DEFAULT = new Print(PRINT_SYMBOLS); 407 408 public Print(SymbolMap symbols) { 409 super(symbols); 410 } 411 412 @Override 413 public Unit<? extends Quantity<?>> parse(CharSequence csq, 414 ParsePosition pos) throws IllegalArgumentException { 415 throw new UnsupportedOperationException( 416 "The print format is for pretty-printing of units only. Parsing is not supported."); 417 } 418 419 @Override 420 void appendAnnotation(CharSequence symbol, CharSequence annotation, 421 Appendable appendable) throws IOException { 422 if (symbol != null && symbol.length() > 0) { 423 appendable.append('('); 424 appendable.append(annotation); 425 appendable.append(')'); 426 } else { 427 appendable.append(annotation); 428 } 429 } 430 431 @Override 432 public Unit<? extends Quantity<?>> parse(CharSequence csq) 433 throws IllegalArgumentException { 434 return parse(csq, new ParsePosition(0)); 435 436 } 437 } 438 439 /** 440 * The Parsing format outputs formats and parses units according to the 441 * "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap 442 * is passed to its constructor. 443 */ 444 private static final class Parsing extends UCUMFormat { 445 // private static final long serialVersionUID = -922531801940132715L; 446 private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap 447 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", 448 new ResourceBundle.Control() { 449 @Override 450 public List<Locale> getCandidateLocales( 451 String baseName, Locale locale) { 452 if (baseName == null) 453 throw new NullPointerException(); 454 if (locale.equals(new Locale("", "CS"))) { 455 return Arrays.asList(locale, Locale.ROOT); 456 } 457 return super.getCandidateLocales(baseName, 458 locale); 459 } 460 })); 461 private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap 462 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", 463 new ResourceBundle.Control() { 464 @Override 465 public List<Locale> getCandidateLocales( 466 String baseName, Locale locale) { 467 if (baseName == null) 468 throw new NullPointerException(); 469 if (locale.equals(new Locale("", "CI"))) { 470 return Arrays.asList(locale, Locale.ROOT); 471 } else if (locale.equals(Locale.GERMANY)) { // TODO 472 // why 473 // GERMANY? 474 return Arrays.asList(locale, 475 // no Locale.GERMAN here 476 Locale.ROOT); 477 } 478 return super.getCandidateLocales(baseName, 479 locale); 480 } 481 })); 482 private static final Parsing DEFAULT_CS = new Parsing( 483 CASE_SENSITIVE_SYMBOLS, true); 484 private static final Parsing DEFAULT_CI = new Parsing( 485 CASE_INSENSITIVE_SYMBOLS, false); 486 private final boolean caseSensitive; 487 488 public Parsing(SymbolMap symbols, boolean caseSensitive) { 489 super(symbols); 490 this.caseSensitive = caseSensitive; 491 } 492 493 @Override 494 public Unit<? extends Quantity<?>> parse(CharSequence csq, 495 ParsePosition cursor) throws ParserException { 496 // Parsing reads the whole character sequence from the parse 497 // position. 498 int start = cursor.getIndex(); 499 int end = csq.length(); 500 if (end <= start) { 501 return ONE; 502 } 503 String source = csq.subSequence(start, end).toString().trim(); 504 if (source.length() == 0) { 505 return ONE; 506 } 507 if (!caseSensitive) { 508 source = source.toUpperCase(); 509 } 510 UCUMFormatParser parser = new UCUMFormatParser(symbolMap, 511 new ByteArrayInputStream(source.getBytes())); 512 try { 513 Unit<?> result = parser.parseUnit(); 514 cursor.setIndex(end); 515 return result; 516 } catch (TokenException e) { 517 if (e.currentToken != null) { 518 cursor.setErrorIndex(start + e.currentToken.endColumn); 519 } else { 520 cursor.setErrorIndex(start); 521 } 522 throw new ParserException(e); 523 } catch (TokenMgrError e) { 524 cursor.setErrorIndex(start); 525 throw new IllegalArgumentException(e.getMessage()); 526 } 527 } 528 529 @Override 530 public Unit<? extends Quantity<?>> parse(CharSequence csq) 531 throws ParserException { 532 return parse(csq, new ParsePosition(0)); 533 } 534 } 535}