001/*
002 * Units of Measurement Systems
003 * Copyright (c) 2005-2017, Jean-Marie Dautelle, Werner Keil and others.
004 *
005 * All rights reserved.
006 *
007 * Redistribution and use in source and binary forms, with or without modification,
008 * are permitted provided that the following conditions are met:
009 *
010 * 1. Redistributions of source code must retain the above copyright notice,
011 *    this list of conditions and the following disclaimer.
012 *
013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
014 *    and the following disclaimer in the documentation and/or other materials provided with the distribution.
015 *
016 * 3. Neither the name of JSR-363, Units of Measurement nor the names of their contributors may be used to
017 *    endorse or promote products derived from this software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package systems.uom.ucum.format;
031
032import static systems.uom.ucum.format.UCUMConverterFormatter.formatConverter;
033import static tec.uom.se.AbstractUnit.ONE;
034import si.uom.SI;
035import systems.uom.ucum.internal.format.UCUMFormatParser;
036import tec.uom.se.AbstractUnit;
037import tec.uom.se.format.AbstractUnitFormat;
038import tec.uom.se.format.SymbolMap;
039import tec.uom.se.internal.format.TokenException;
040import tec.uom.se.internal.format.TokenMgrError;
041import tec.uom.se.unit.AnnotatedUnit;
042import tec.uom.se.unit.TransformedUnit;
043
044import javax.measure.Quantity;
045import javax.measure.Unit;
046import javax.measure.format.ParserException;
047
048import java.io.ByteArrayInputStream;
049import java.io.IOException;
050import java.text.ParsePosition;
051import java.util.*;
052import java.util.Map.Entry;
053
054/**
055 * <p>
056 * This class provides the interface for formatting and parsing
057 * {@link AbstractUnit units} according to the
058 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of
059 * Measure</a> (UCUM).
060 * </p>
061 *
062 * <p>
063 * For a technical/historical overview of this format please read
064 * <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354">
065 * CommonUnits of Measure in Clinical Information Systems</a>.
066 * </p>
067 *
068 * <p>
069 * As of revision 1.16, the BNF in the UCUM standard contains an
070 * <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to
071 * work around the problem by modifying the BNF productions for &lt;Term&gt;.
072 * Once the error in the standard is corrected, it may be necessary to modify
073 * the productions in the UCUMFormatParser.jj file to conform to the standard.
074 * </p>
075 *
076 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a>
077 * @author <a href="mailto:units@catmedia.us">Werner Keil</a>
078 * @version 0.7.5, 30 April 2017
079 */
080public abstract class UCUMFormat extends AbstractUnitFormat {
081    /**
082     * 
083     */
084    // private static final long serialVersionUID = 8586656823290135155L;
085
086    // A helper to declare bundle names for all instances
087    private static final String BUNDLE_BASE = UCUMFormat.class.getName();
088
089    // /////////////////
090    // Class methods //
091    // /////////////////
092
093    /**
094     * Returns the instance for formatting/parsing using the given variant
095     * 
096     * @param variant
097     *            the <strong>UCUM</strong> variant to use
098     * @return a {@link UCUMFormat} instance
099     */
100    public static UCUMFormat getInstance(Variant variant) {
101        switch (variant) {
102        case CASE_INSENSITIVE:
103            return Parsing.DEFAULT_CI;
104        case CASE_SENSITIVE:
105            return Parsing.DEFAULT_CS;
106        case PRINT:
107            return Print.DEFAULT;
108        default:
109            throw new IllegalArgumentException("Unknown variant: " + variant);
110        }
111    }
112
113    /**
114     * Returns an instance for formatting and parsing using user defined symbols
115     * 
116     * @param variant
117     *            the <strong>UCUM</strong> variant to use
118     * @param symbolMap
119     *            the map of user defined symbols to use
120     * @return a {@link UCUMFormat} instance
121     */
122    public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) {
123        switch (variant) {
124        case CASE_INSENSITIVE:
125            return new Parsing(symbolMap, false);
126        case CASE_SENSITIVE:
127            return new Parsing(symbolMap, true);
128        case PRINT:
129            return new Print(symbolMap);
130        default:
131            throw new IllegalArgumentException("Unknown variant: " + variant);
132        }
133    }
134
135    /**
136     * The symbol map used by this instance to map between {@link AbstractUnit
137     * Unit}s and <code>String</code>s.
138     */
139    final SymbolMap symbolMap;
140
141    /**
142     * Get the symbol map used by this instance to map between
143     * {@link AbstractUnit Unit}s and <code>String</code>s, etc...
144     * 
145     * @return SymbolMap the current symbol map
146     */
147    @Override
148    protected SymbolMap getSymbols() {
149        return symbolMap;
150    }
151
152    //////////////////
153    // Constructors //
154    //////////////////
155    /**
156     * Base constructor.
157     */
158    UCUMFormat(SymbolMap symbolMap) {
159        this.symbolMap = symbolMap;
160    }
161
162    // ///////////
163    // Parsing //
164    // ///////////
165    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException;
166
167    protected Unit<?> parse(CharSequence csq, int index) throws ParserException {
168        return parse(csq, new ParsePosition(index));
169    }
170
171    @Override
172    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException;
173
174    ////////////////
175    // Formatting //
176    ////////////////
177    @SuppressWarnings({ "rawtypes", "unchecked" })
178    public Appendable format(Unit<?> unknownUnit, Appendable appendable) throws IOException {
179        if (!(unknownUnit instanceof AbstractUnit)) {
180            throw new UnsupportedOperationException(
181                    "The UCUM format supports only known units (AbstractUnit instances)");
182        }
183        AbstractUnit unit = (AbstractUnit) unknownUnit;
184        CharSequence symbol;
185        CharSequence annotation = null;
186        if (unit instanceof AnnotatedUnit) {
187            AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit;
188            unit = annotatedUnit.getActualUnit();
189            annotation = annotatedUnit.getAnnotation();
190        }
191        String mapSymbol = symbolMap.getSymbol(unit);
192        if (mapSymbol != null) {
193            symbol = mapSymbol;
194        } else if (unit instanceof TransformedUnit) {
195            final StringBuilder temp = new StringBuilder();
196            final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit();
197            final boolean printSeparator = !parentUnit.equals(ONE);
198
199            format(parentUnit, temp);
200            formatConverter(unit, parentUnit, printSeparator, temp, symbolMap);
201
202            symbol = temp;
203        } else if (unit.getBaseUnits() != null) {
204                Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits();
205        StringBuffer app = new StringBuffer();
206        
207        Map<AbstractUnit<?>, Integer> numeratorUnits = new LinkedHashMap<>();            
208        Map<AbstractUnit<?>, Integer> denominatorUnits = new LinkedHashMap<>();
209
210        // divide units into numerators and denominators
211        for (Entry<? extends AbstractUnit<?>, Integer> u : productUnits.entrySet()) {
212                if (u.getValue() > 0) {
213                        numeratorUnits.put(u.getKey(), u.getValue());
214                }else {
215                        denominatorUnits.put(u.getKey(), u.getValue());
216                }
217        }
218        
219        int numeratorCount = 1;
220        for (Entry<? extends AbstractUnit<?>, Integer> u : numeratorUnits.entrySet()) {
221                // add multiplication separators after first unit
222                if (numeratorCount > 1){
223                        app.append(".");
224                }
225                // add individual unit string
226                format(u.getKey(),app);
227                // add power number if greater than 1
228                if (Math.abs(u.getValue()) > 1){
229                        app.append(u.getValue());
230                }
231                numeratorCount++;
232        }
233        // special case if there is no numerator append one for inverse
234        if (numeratorCount == 1) {
235                app.append("1");
236        }
237        if (denominatorUnits.size() >0){
238                // append division symbol
239                app.append("/");
240                int denominatorCount = 1;
241                for (Entry<? extends AbstractUnit<?>, Integer> u : denominatorUnits.entrySet()) {
242                        // if there is more than one denominator unit and this is the first, add open parenthesis 
243                        if (denominatorCount == 1 && denominatorUnits.size() > 1 ) {
244                                app.append("(");
245                        }
246                        // add multiplication separators after first unit
247                        if (denominatorCount > 1){
248                                app.append(".");
249                        }
250                        // add individual unit string
251                        format(u.getKey(),app);
252                        // add power number if less than -1
253                if (Math.abs(u.getValue()) < -1){
254                        app.append(-u.getValue());
255                }
256                // if there is more than one denominator unit and this is the last, add close parenthesis
257                if (denominatorCount == denominatorUnits.size() && denominatorUnits.size() > 1 ) {
258                                app.append(")");
259                        }
260                denominatorCount++;
261            }
262        }            
263        symbol = app;
264        } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) {
265            final StringBuilder temp = new StringBuilder();
266            Unit<?> unitParent = unit.getSystemUnit();
267            if (unitParent.equals(SI.KILOGRAM)) unitParent = SI.GRAM;  // Work around gram/kilogram inconsistency
268            format(unitParent, temp);
269            boolean printSeparator = !unitParent.equals(ONE);
270            formatConverter(unit, unitParent, printSeparator, temp, symbolMap);
271            symbol = temp;
272        } else if (unit.getSymbol() != null) {
273            symbol = unit.getSymbol();
274        } else {
275            throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit "
276                    + unit.getClass().getName() + "). "
277                    + "Custom units types should override the toString() method as the default implementation uses the UCUM format.");
278        }
279        
280        appendable.append(symbol);
281        if (annotation != null && annotation.length() > 0) {
282            appendAnnotation(symbol, annotation, appendable);
283        }
284
285        return appendable;
286    }
287
288    public void label(Unit<?> unit, String label) {
289        throw new UnsupportedOperationException("label() not supported by this implementation");
290    }
291
292    public boolean isLocaleSensitive() {
293        return false;
294    }
295
296    void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
297        appendable.append('{');
298        appendable.append(annotation);
299        appendable.append('}');
300    }
301
302    // static final ResourceBundle.Control getControl(final String key) {
303    // return new ResourceBundle.Control() {
304    // @Override
305    // public List<Locale> getCandidateLocales(String baseName, Locale locale) {
306    // if (baseName == null)
307    // throw new NullPointerException();
308    // if (locale.equals(new Locale(key))) {
309    // return Arrays.asList(
310    // locale,
311    // Locale.GERMANY,
312    // // no Locale.GERMAN here
313    // Locale.ROOT);
314    // } else if (locale.equals(Locale.GERMANY)) {
315    // return Arrays.asList(
316    // locale,
317    // // no Locale.GERMAN here
318    // Locale.ROOT);
319    // }
320    // return super.getCandidateLocales(baseName, locale);
321    // }
322    // };
323    // }
324
325    // /////////////////
326    // Inner classes //
327    // /////////////////
328
329    /**
330     * Variant of unit representation in the UCUM standard
331     * 
332     * @see <a href=
333     *      "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules">
334     *      UCUM - Character Set and Lexical Rules</a>
335     */
336    public static enum Variant {
337        CASE_SENSITIVE, CASE_INSENSITIVE, PRINT
338    }
339
340    /**
341     * The Print format is used to output units according to the "print" column
342     * in the UCUM standard. Because "print" symbols in UCUM are not unique,
343     * this class of UCUMFormat may not be used for parsing, only for
344     * formatting.
345     */
346    private static final class Print extends UCUMFormat {
347
348        /**
349         *
350         */
351        // private static final long serialVersionUID = 2990875526976721414L;
352        private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print"));
353        private static final Print DEFAULT = new Print(PRINT_SYMBOLS);
354
355        public Print(SymbolMap symbols) {
356            super(symbols);
357        }
358
359        @Override
360        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException {
361            throw new UnsupportedOperationException(
362                    "The print format is for pretty-printing of units only. Parsing is not supported.");
363        }
364
365        @Override
366        void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
367            if (symbol != null && symbol.length() > 0) {
368                appendable.append('(');
369                appendable.append(annotation);
370                appendable.append(')');
371            } else {
372                appendable.append(annotation);
373            }
374        }
375
376        @Override
377        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException {
378            return parse(csq, new ParsePosition(0));
379
380        }
381    }
382
383    /**
384     * The Parsing format outputs formats and parses units according to the
385     * "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap
386     * is passed to its constructor.
387     */
388    private static final class Parsing extends UCUMFormat {
389        // private static final long serialVersionUID = -922531801940132715L;
390        private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap
391                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() {
392                    @Override
393                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
394                        if (baseName == null)
395                            throw new NullPointerException();
396                        if (locale.equals(new Locale("", "CS"))) {
397                            return Arrays.asList(locale, Locale.ROOT);
398                        }
399                        return super.getCandidateLocales(baseName, locale);
400                    }
401                }));
402        private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap
403                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() {
404                    @Override
405                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
406                        if (baseName == null)
407                            throw new NullPointerException();
408                        if (locale.equals(new Locale("", "CI"))) {
409                            return Arrays.asList(locale, Locale.ROOT);
410                        } else if (locale.equals(Locale.GERMANY)) { // TODO
411                                                                    // why
412                                                                    // GERMANY?
413                            return Arrays.asList(locale,
414                                    // no Locale.GERMAN here
415                                    Locale.ROOT);
416                        }
417                        return super.getCandidateLocales(baseName, locale);
418                    }
419                }));
420        private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true);
421        private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false);
422        private final boolean caseSensitive;
423
424        public Parsing(SymbolMap symbols, boolean caseSensitive) {
425            super(symbols);
426            this.caseSensitive = caseSensitive;
427        }
428
429        @Override
430        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException {
431            // Parsing reads the whole character sequence from the parse
432            // position.
433            int start = cursor.getIndex();
434            int end = csq.length();
435            if (end <= start) {
436                return ONE;
437            }
438            String source = csq.subSequence(start, end).toString().trim();
439            if (source.length() == 0) {
440                return ONE;
441            }
442            if (!caseSensitive) {
443                source = source.toUpperCase();
444            }
445            UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes()));
446            try {
447                Unit<?> result = parser.parseUnit();
448                cursor.setIndex(end);
449                return result;
450            } catch (TokenException e) {
451                if (e.currentToken != null) {
452                    cursor.setErrorIndex(start + e.currentToken.endColumn);
453                } else {
454                    cursor.setErrorIndex(start);
455                }
456                throw new ParserException(e);
457            } catch (TokenMgrError e) {
458                cursor.setErrorIndex(start);
459                throw new IllegalArgumentException(e.getMessage());
460            }
461        }
462
463        @Override
464        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException {
465            return parse(csq, new ParsePosition(0));
466        }
467    }
468}