001/* 002 * Units of Measurement Systems 003 * Copyright (c) 2005-2017, Jean-Marie Dautelle, Werner Keil and others. 004 * 005 * All rights reserved. 006 * 007 * Redistribution and use in source and binary forms, with or without modification, 008 * are permitted provided that the following conditions are met: 009 * 010 * 1. Redistributions of source code must retain the above copyright notice, 011 * this list of conditions and the following disclaimer. 012 * 013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions 014 * and the following disclaimer in the documentation and/or other materials provided with the distribution. 015 * 016 * 3. Neither the name of JSR-363, Units of Measurement nor the names of their contributors may be used to 017 * endorse or promote products derived from this software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package systems.uom.ucum.format; 031 032import static systems.uom.ucum.format.UCUMConverterFormatter.formatConverter; 033import static tec.uom.se.AbstractUnit.ONE; 034import si.uom.SI; 035import systems.uom.ucum.internal.format.UCUMFormatParser; 036import tec.uom.se.AbstractUnit; 037import tec.uom.se.format.AbstractUnitFormat; 038import tec.uom.se.format.SymbolMap; 039import tec.uom.se.internal.format.TokenException; 040import tec.uom.se.internal.format.TokenMgrError; 041import tec.uom.se.unit.AnnotatedUnit; 042import tec.uom.se.unit.TransformedUnit; 043 044import javax.measure.Quantity; 045import javax.measure.Unit; 046import javax.measure.format.ParserException; 047 048import java.io.ByteArrayInputStream; 049import java.io.IOException; 050import java.text.ParsePosition; 051import java.util.*; 052import java.util.Map.Entry; 053 054/** 055 * <p> 056 * This class provides the interface for formatting and parsing 057 * {@link AbstractUnit units} according to the 058 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of 059 * Measure</a> (UCUM). 060 * </p> 061 * 062 * <p> 063 * For a technical/historical overview of this format please read 064 * <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354"> 065 * CommonUnits of Measure in Clinical Information Systems</a>. 066 * </p> 067 * 068 * <p> 069 * As of revision 1.16, the BNF in the UCUM standard contains an 070 * <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to 071 * work around the problem by modifying the BNF productions for <Term>. 072 * Once the error in the standard is corrected, it may be necessary to modify 073 * the productions in the UCUMFormatParser.jj file to conform to the standard. 074 * </p> 075 * 076 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a> 077 * @author <a href="mailto:units@catmedia.us">Werner Keil</a> 078 * @version 0.7.5, 30 April 2017 079 */ 080public abstract class UCUMFormat extends AbstractUnitFormat { 081 /** 082 * 083 */ 084 // private static final long serialVersionUID = 8586656823290135155L; 085 086 // A helper to declare bundle names for all instances 087 private static final String BUNDLE_BASE = UCUMFormat.class.getName(); 088 089 // ///////////////// 090 // Class methods // 091 // ///////////////// 092 093 /** 094 * Returns the instance for formatting/parsing using the given variant 095 * 096 * @param variant 097 * the <strong>UCUM</strong> variant to use 098 * @return a {@link UCUMFormat} instance 099 */ 100 public static UCUMFormat getInstance(Variant variant) { 101 switch (variant) { 102 case CASE_INSENSITIVE: 103 return Parsing.DEFAULT_CI; 104 case CASE_SENSITIVE: 105 return Parsing.DEFAULT_CS; 106 case PRINT: 107 return Print.DEFAULT; 108 default: 109 throw new IllegalArgumentException("Unknown variant: " + variant); 110 } 111 } 112 113 /** 114 * Returns an instance for formatting and parsing using user defined symbols 115 * 116 * @param variant 117 * the <strong>UCUM</strong> variant to use 118 * @param symbolMap 119 * the map of user defined symbols to use 120 * @return a {@link UCUMFormat} instance 121 */ 122 public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) { 123 switch (variant) { 124 case CASE_INSENSITIVE: 125 return new Parsing(symbolMap, false); 126 case CASE_SENSITIVE: 127 return new Parsing(symbolMap, true); 128 case PRINT: 129 return new Print(symbolMap); 130 default: 131 throw new IllegalArgumentException("Unknown variant: " + variant); 132 } 133 } 134 135 /** 136 * The symbol map used by this instance to map between {@link AbstractUnit 137 * Unit}s and <code>String</code>s. 138 */ 139 final SymbolMap symbolMap; 140 141 /** 142 * Get the symbol map used by this instance to map between 143 * {@link AbstractUnit Unit}s and <code>String</code>s, etc... 144 * 145 * @return SymbolMap the current symbol map 146 */ 147 @Override 148 protected SymbolMap getSymbols() { 149 return symbolMap; 150 } 151 152 ////////////////// 153 // Constructors // 154 ////////////////// 155 /** 156 * Base constructor. 157 */ 158 UCUMFormat(SymbolMap symbolMap) { 159 this.symbolMap = symbolMap; 160 } 161 162 // /////////// 163 // Parsing // 164 // /////////// 165 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException; 166 167 protected Unit<?> parse(CharSequence csq, int index) throws ParserException { 168 return parse(csq, new ParsePosition(index)); 169 } 170 171 @Override 172 public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException; 173 174 //////////////// 175 // Formatting // 176 //////////////// 177 @SuppressWarnings({ "rawtypes", "unchecked" }) 178 public Appendable format(Unit<?> unknownUnit, Appendable appendable) throws IOException { 179 if (!(unknownUnit instanceof AbstractUnit)) { 180 throw new UnsupportedOperationException( 181 "The UCUM format supports only known units (AbstractUnit instances)"); 182 } 183 AbstractUnit unit = (AbstractUnit) unknownUnit; 184 CharSequence symbol; 185 CharSequence annotation = null; 186 if (unit instanceof AnnotatedUnit) { 187 AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit; 188 unit = annotatedUnit.getActualUnit(); 189 annotation = annotatedUnit.getAnnotation(); 190 } 191 String mapSymbol = symbolMap.getSymbol(unit); 192 if (mapSymbol != null) { 193 symbol = mapSymbol; 194 } else if (unit instanceof TransformedUnit) { 195 final StringBuilder temp = new StringBuilder(); 196 final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit(); 197 final boolean printSeparator = !parentUnit.equals(ONE); 198 199 format(parentUnit, temp); 200 formatConverter(unit, parentUnit, printSeparator, temp, symbolMap); 201 202 symbol = temp; 203 } else if (unit.getBaseUnits() != null) { 204 Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits(); 205 StringBuffer app = new StringBuffer(); 206 207 Map<AbstractUnit<?>, Integer> numeratorUnits = new LinkedHashMap<>(); 208 Map<AbstractUnit<?>, Integer> denominatorUnits = new LinkedHashMap<>(); 209 210 // divide units into numerators and denominators 211 for (Entry<? extends AbstractUnit<?>, Integer> u : productUnits.entrySet()) { 212 if (u.getValue() > 0) { 213 numeratorUnits.put(u.getKey(), u.getValue()); 214 }else { 215 denominatorUnits.put(u.getKey(), u.getValue()); 216 } 217 } 218 219 int numeratorCount = 1; 220 for (Entry<? extends AbstractUnit<?>, Integer> u : numeratorUnits.entrySet()) { 221 // add multiplication separators after first unit 222 if (numeratorCount > 1){ 223 app.append("."); 224 } 225 // add individual unit string 226 format(u.getKey(),app); 227 // add power number if greater than 1 228 if (Math.abs(u.getValue()) > 1){ 229 app.append(u.getValue()); 230 } 231 numeratorCount++; 232 } 233 // special case if there is no numerator append one for inverse 234 if (numeratorCount == 1) { 235 app.append("1"); 236 } 237 if (denominatorUnits.size() >0){ 238 // append division symbol 239 app.append("/"); 240 int denominatorCount = 1; 241 for (Entry<? extends AbstractUnit<?>, Integer> u : denominatorUnits.entrySet()) { 242 // if there is more than one denominator unit and this is the first, add open parenthesis 243 if (denominatorCount == 1 && denominatorUnits.size() > 1 ) { 244 app.append("("); 245 } 246 // add multiplication separators after first unit 247 if (denominatorCount > 1){ 248 app.append("."); 249 } 250 // add individual unit string 251 format(u.getKey(),app); 252 // add power number if less than -1 253 if (Math.abs(u.getValue()) < -1){ 254 app.append(-u.getValue()); 255 } 256 // if there is more than one denominator unit and this is the last, add close parenthesis 257 if (denominatorCount == denominatorUnits.size() && denominatorUnits.size() > 1 ) { 258 app.append(")"); 259 } 260 denominatorCount++; 261 } 262 } 263 symbol = app; 264 } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) { 265 final StringBuilder temp = new StringBuilder(); 266 Unit<?> unitParent = unit.getSystemUnit(); 267 if (unitParent.equals(SI.KILOGRAM)) unitParent = SI.GRAM; // Work around gram/kilogram inconsistency 268 format(unitParent, temp); 269 boolean printSeparator = !unitParent.equals(ONE); 270 formatConverter(unit, unitParent, printSeparator, temp, symbolMap); 271 symbol = temp; 272 } else if (unit.getSymbol() != null) { 273 symbol = unit.getSymbol(); 274 } else { 275 throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit " 276 + unit.getClass().getName() + "). " 277 + "Custom units types should override the toString() method as the default implementation uses the UCUM format."); 278 } 279 280 appendable.append(symbol); 281 if (annotation != null && annotation.length() > 0) { 282 appendAnnotation(symbol, annotation, appendable); 283 } 284 285 return appendable; 286 } 287 288 public void label(Unit<?> unit, String label) { 289 throw new UnsupportedOperationException("label() not supported by this implementation"); 290 } 291 292 public boolean isLocaleSensitive() { 293 return false; 294 } 295 296 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 297 appendable.append('{'); 298 appendable.append(annotation); 299 appendable.append('}'); 300 } 301 302 // static final ResourceBundle.Control getControl(final String key) { 303 // return new ResourceBundle.Control() { 304 // @Override 305 // public List<Locale> getCandidateLocales(String baseName, Locale locale) { 306 // if (baseName == null) 307 // throw new NullPointerException(); 308 // if (locale.equals(new Locale(key))) { 309 // return Arrays.asList( 310 // locale, 311 // Locale.GERMANY, 312 // // no Locale.GERMAN here 313 // Locale.ROOT); 314 // } else if (locale.equals(Locale.GERMANY)) { 315 // return Arrays.asList( 316 // locale, 317 // // no Locale.GERMAN here 318 // Locale.ROOT); 319 // } 320 // return super.getCandidateLocales(baseName, locale); 321 // } 322 // }; 323 // } 324 325 // ///////////////// 326 // Inner classes // 327 // ///////////////// 328 329 /** 330 * Variant of unit representation in the UCUM standard 331 * 332 * @see <a href= 333 * "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules"> 334 * UCUM - Character Set and Lexical Rules</a> 335 */ 336 public static enum Variant { 337 CASE_SENSITIVE, CASE_INSENSITIVE, PRINT 338 } 339 340 /** 341 * The Print format is used to output units according to the "print" column 342 * in the UCUM standard. Because "print" symbols in UCUM are not unique, 343 * this class of UCUMFormat may not be used for parsing, only for 344 * formatting. 345 */ 346 private static final class Print extends UCUMFormat { 347 348 /** 349 * 350 */ 351 // private static final long serialVersionUID = 2990875526976721414L; 352 private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print")); 353 private static final Print DEFAULT = new Print(PRINT_SYMBOLS); 354 355 public Print(SymbolMap symbols) { 356 super(symbols); 357 } 358 359 @Override 360 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException { 361 throw new UnsupportedOperationException( 362 "The print format is for pretty-printing of units only. Parsing is not supported."); 363 } 364 365 @Override 366 void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException { 367 if (symbol != null && symbol.length() > 0) { 368 appendable.append('('); 369 appendable.append(annotation); 370 appendable.append(')'); 371 } else { 372 appendable.append(annotation); 373 } 374 } 375 376 @Override 377 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException { 378 return parse(csq, new ParsePosition(0)); 379 380 } 381 } 382 383 /** 384 * The Parsing format outputs formats and parses units according to the 385 * "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap 386 * is passed to its constructor. 387 */ 388 private static final class Parsing extends UCUMFormat { 389 // private static final long serialVersionUID = -922531801940132715L; 390 private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap 391 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() { 392 @Override 393 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 394 if (baseName == null) 395 throw new NullPointerException(); 396 if (locale.equals(new Locale("", "CS"))) { 397 return Arrays.asList(locale, Locale.ROOT); 398 } 399 return super.getCandidateLocales(baseName, locale); 400 } 401 })); 402 private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap 403 .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() { 404 @Override 405 public List<Locale> getCandidateLocales(String baseName, Locale locale) { 406 if (baseName == null) 407 throw new NullPointerException(); 408 if (locale.equals(new Locale("", "CI"))) { 409 return Arrays.asList(locale, Locale.ROOT); 410 } else if (locale.equals(Locale.GERMANY)) { // TODO 411 // why 412 // GERMANY? 413 return Arrays.asList(locale, 414 // no Locale.GERMAN here 415 Locale.ROOT); 416 } 417 return super.getCandidateLocales(baseName, locale); 418 } 419 })); 420 private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true); 421 private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false); 422 private final boolean caseSensitive; 423 424 public Parsing(SymbolMap symbols, boolean caseSensitive) { 425 super(symbols); 426 this.caseSensitive = caseSensitive; 427 } 428 429 @Override 430 public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException { 431 // Parsing reads the whole character sequence from the parse 432 // position. 433 int start = cursor.getIndex(); 434 int end = csq.length(); 435 if (end <= start) { 436 return ONE; 437 } 438 String source = csq.subSequence(start, end).toString().trim(); 439 if (source.length() == 0) { 440 return ONE; 441 } 442 if (!caseSensitive) { 443 source = source.toUpperCase(); 444 } 445 UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes())); 446 try { 447 Unit<?> result = parser.parseUnit(); 448 cursor.setIndex(end); 449 return result; 450 } catch (TokenException e) { 451 if (e.currentToken != null) { 452 cursor.setErrorIndex(start + e.currentToken.endColumn); 453 } else { 454 cursor.setErrorIndex(start); 455 } 456 throw new ParserException(e); 457 } catch (TokenMgrError e) { 458 cursor.setErrorIndex(start); 459 throw new IllegalArgumentException(e.getMessage()); 460 } 461 } 462 463 @Override 464 public Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException { 465 return parse(csq, new ParsePosition(0)); 466 } 467 } 468}