001/*
002 *  Unit-API - Units of Measurement API for Java
003 *  Copyright (c) 2005-2016, Jean-Marie Dautelle, Werner Keil, V2COM.
004 *
005 * All rights reserved.
006 *
007 * Redistribution and use in source and binary forms, with or without modification,
008 * are permitted provided that the following conditions are met:
009 *
010 * 1. Redistributions of source code must retain the above copyright notice,
011 *    this list of conditions and the following disclaimer.
012 *
013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
014 *    and the following disclaimer in the documentation and/or other materials provided with the distribution.
015 *
016 * 3. Neither the name of JSR-363 nor the names of its contributors may be used to endorse or promote products
017 *    derived from this software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package systems.uom.ucum.format;
031
032import static tec.uom.se.AbstractUnit.ONE;
033import systems.uom.ucum.internal.SI;
034import systems.uom.ucum.internal.format.UCUMFormatParser;
035import tec.uom.se.AbstractConverter;
036import tec.uom.se.AbstractUnit;
037import tec.uom.se.format.AbstractUnitFormat;
038import tec.uom.se.format.SymbolMap;
039import tec.uom.se.function.MultiplyConverter;
040import tec.uom.se.function.RationalConverter;
041import tec.uom.se.internal.format.TokenException;
042import tec.uom.se.internal.format.TokenMgrError;
043import tec.uom.se.unit.AnnotatedUnit;
044import tec.uom.se.unit.MetricPrefix;
045
046import javax.measure.Quantity;
047import javax.measure.Unit;
048import javax.measure.UnitConverter;
049import javax.measure.format.ParserException;
050
051import java.io.ByteArrayInputStream;
052import java.io.IOException;
053import java.math.BigInteger;
054import java.text.ParsePosition;
055import java.util.*;
056
057/**
058 * <p>
059 * This class provides the interface for formatting and parsing
060 * {@link AbstractUnit units} according to the <a
061 * href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of Measure</a>
062 * (UCUM).
063 * </p>
064 *
065 * <p>
066 * For a technical/historical overview of this format please read <a
067 * href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354">
068 * CommonUnits of Measure in Clinical Information Systems</a>.
069 * </p>
070 *
071 * <p>
072 * As of revision 1.16, the BNF in the UCUM standard contains an <a
073 * href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to work
074 * around the problem by modifying the BNF productions for &lt;Term&gt;. Once
075 * the error in the standard is corrected, it may be necessary to modify the
076 * productions in the UCUMFormatParser.jj file to conform to the standard.
077 * </p>
078 *
079 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a>
080 * @author <a href="mailto:units@catmedia.us">Werner Keil</a>
081 * @version 0.7, 6 April 2016
082 */
083public abstract class UCUMFormat extends AbstractUnitFormat {
084    /**
085         * 
086         */
087    // private static final long serialVersionUID = 8586656823290135155L;
088
089    // A helper to declare bundle names for all instances
090    private static final String BUNDLE_BASE = UCUMFormat.class.getName();
091
092    // /////////////////
093    // Class methods //
094    // /////////////////
095
096    /**
097     * Returns the instance for formatting/parsing using the given variant
098     * 
099     * @param variant
100     *            the <strong>UCUM</strong> variant to use
101     */
102    public static UCUMFormat getInstance(Variant variant) {
103        switch (variant) {
104        case CASE_INSENSITIVE:
105            return Parsing.DEFAULT_CI;
106        case CASE_SENSITIVE:
107            return Parsing.DEFAULT_CS;
108        case PRINT:
109            return Print.DEFAULT;
110        default:
111            throw new IllegalArgumentException("Unknown variant: " + variant);
112        }
113    }
114
115    /**
116     * Returns an instance for formatting and parsing using user defined symbols
117     * 
118     * @param variant
119     *            the <strong>UCUM</strong> variant to use
120     * @param symbolMap
121     *            the map of user defined symbols to use
122     */
123    public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) {
124        switch (variant) {
125        case CASE_INSENSITIVE:
126            return new Parsing(symbolMap, false);
127        case CASE_SENSITIVE:
128            return new Parsing(symbolMap, true);
129        case PRINT:
130            return new Print(symbolMap);
131        default:
132            throw new IllegalArgumentException("Unknown variant: " + variant);
133        }
134    }
135
136    /**
137     * The symbol map used by this instance to map between {@link AbstractUnit
138     * Unit}s and <code>String</code>s.
139     */
140    final SymbolMap symbolMap;
141
142    /**
143     * Get the symbol map used by this instance to map between
144     * {@link AbstractUnit Unit}s and <code>String</code>s, etc...
145     * 
146     * @return SymbolMap the current symbol map
147     */
148    @Override
149    protected SymbolMap getSymbols() {
150        return symbolMap;
151    }
152
153    // ////////////////
154    // Constructors //
155    // ////////////////
156    /**
157     * Base constructor.
158     */
159    UCUMFormat(SymbolMap symbolMap) {
160        this.symbolMap = symbolMap;
161    }
162
163    // ///////////
164    // Parsing //
165    // ///////////
166    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq,
167            ParsePosition cursor) throws ParserException;
168
169    protected Unit<?> parse(CharSequence csq, int index) throws ParserException {
170        return parse(csq, new ParsePosition(index));
171    }
172
173    @Override
174    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq)
175            throws ParserException;
176
177    // //////////////
178    // Formatting //
179    // //////////////
180    @SuppressWarnings({ "rawtypes", "unchecked" })
181    public Appendable format(Unit<?> unknownUnit, Appendable appendable)
182            throws IOException {
183        if (!(unknownUnit instanceof AbstractUnit)) {
184            throw new UnsupportedOperationException(
185                    "The UCUM format supports only known units (AbstractUnit instances)");
186        }
187        AbstractUnit unit = (AbstractUnit) unknownUnit;
188        CharSequence symbol;
189        CharSequence annotation = null;
190        if (unit instanceof AnnotatedUnit) {
191            AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit;
192            unit = annotatedUnit.getActualUnit();
193            annotation = annotatedUnit.getAnnotation();
194        }
195        String mapSymbol = symbolMap.getSymbol(unit);
196        if (mapSymbol != null) {
197            symbol = mapSymbol;
198        } else if (unit.getBaseUnits() != null) {
199            Map<? extends AbstractUnit<?>, Integer> productUnits = unit
200                    .getBaseUnits();
201            StringBuffer app = new StringBuffer();
202            for (AbstractUnit<?> u : productUnits.keySet()) {
203                StringBuffer temp = new StringBuffer();
204                temp = (StringBuffer) format(u, temp);
205                if ((temp.indexOf(".") >= 0) || (temp.indexOf("/") >= 0)) {
206                    temp.insert(0, '(');
207                    temp.append(')');
208                }
209                int pow = productUnits.get(u);
210                if (app.length() > 0) { // Not the first unit.
211                    if (pow >= 0) {
212                        app.append('.');
213                    } else {
214                        app.append('/');
215                        pow = -pow;
216                    }
217                } else { // First unit.
218                    if (pow < 0) {
219                        app.append("1/");
220                        pow = -pow;
221                    }
222                }
223                app.append(temp);
224                if (pow != 1) {
225                    app.append(Integer.toString(pow));
226                }
227            }
228            symbol = app;
229        } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) {
230            final StringBuilder temp = new StringBuilder();
231            UnitConverter converter;
232            boolean printSeparator;
233            if (unit.equals(SI.KILOGRAM)) {
234                // A special case because KILOGRAM is a BaseUnit instead of
235                // a transformed unit, for compatability with existing SI
236                // unit system.
237                format(SI.GRAM, temp);
238                converter = MetricPrefix.KILO.getConverter();
239                printSeparator = true;
240            } else {
241                Unit<?> parentUnit = unit.getSystemUnit();
242                converter = unit.getConverterTo(parentUnit);
243                if (parentUnit.equals(SI.KILOGRAM)) {
244                    // More special-case hackery to work around gram/kilogram
245                    // incosistency
246                    parentUnit = SI.GRAM;
247                    converter = converter.concatenate(MetricPrefix.KILO
248                            .getConverter());
249                }
250                format(parentUnit, temp);
251                printSeparator = !parentUnit.equals(ONE);
252            }
253            formatConverter(converter, printSeparator, temp);
254            symbol = temp;
255        } else if (unit.getSymbol() != null) {
256            symbol = unit.getSymbol();
257        } else {
258            throw new IllegalArgumentException(
259                    "Cannot format the given Object as UCUM units (unsupported unit "
260                            + unit.getClass().getName()
261                            + "). "
262                            + "Custom units types should override the toString() method as the default implementation uses the UCUM format.");
263        }
264
265        appendable.append(symbol);
266        if (annotation != null && annotation.length() > 0) {
267            appendAnnotation(symbol, annotation, appendable);
268        }
269
270        return appendable;
271    }
272
273    public void label(Unit<?> unit, String label) {
274    }
275
276    public boolean isLocaleSensitive() {
277        return false;
278    }
279
280    void appendAnnotation(CharSequence symbol, CharSequence annotation,
281            Appendable appendable) throws IOException {
282        appendable.append('{');
283        appendable.append(annotation);
284        appendable.append('}');
285    }
286
287    /**
288     * Formats the given converter to the given StringBuffer. This is similar to
289     * what {@link ConverterFormat} does, but there's no need to worry about
290     * operator precedence here, since UCUM only supports multiplication,
291     * division, and exponentiation and expressions are always evaluated left-
292     * to-right.
293     * 
294     * @param converter
295     *            the converter to be formatted
296     * @param continued
297     *            <code>true</code> if the converter expression should begin
298     *            with an operator, otherwise <code>false</code>. This will
299     *            always be true unless the unit being modified is equal to
300     *            Unit.ONE.
301     * @param buffer
302     *            the <code>StringBuffer</code> to append to. Contains the
303     *            already-formatted unit being modified by the given converter.
304     */
305    void formatConverter(UnitConverter converter, boolean continued,
306            StringBuilder buffer) {
307        boolean unitIsExpression = ((buffer.indexOf(".") >= 0) || (buffer
308                .indexOf("/") >= 0));
309        MetricPrefix prefix = symbolMap.getPrefix(converter);
310        if ((prefix != null) && (!unitIsExpression)) {
311            buffer.insert(0, symbolMap.getSymbol(prefix));
312        } else if (converter == AbstractConverter.IDENTITY) {
313            // do nothing
314        } else if (converter instanceof MultiplyConverter) {
315            if (unitIsExpression) {
316                buffer.insert(0, '(');
317                buffer.append(')');
318            }
319            MultiplyConverter multiplyConverter = (MultiplyConverter) converter;
320            double factor = multiplyConverter.getFactor();
321            long lFactor = (long) factor;
322            if ((lFactor != factor) || (lFactor < -9007199254740992L)
323                    || (lFactor > 9007199254740992L)) {
324                throw new IllegalArgumentException(
325                        "Only integer factors are supported in UCUM");
326            }
327            if (continued) {
328                buffer.append('.');
329            }
330            buffer.append(lFactor);
331        } else if (converter instanceof RationalConverter) {
332            if (unitIsExpression) {
333                buffer.insert(0, '(');
334                buffer.append(')');
335            }
336            RationalConverter rationalConverter = (RationalConverter) converter;
337            if (!rationalConverter.getDividend().equals(BigInteger.ONE)) {
338                if (continued) {
339                    buffer.append('.');
340                }
341                buffer.append(rationalConverter.getDividend());
342            }
343            if (!rationalConverter.getDivisor().equals(BigInteger.ONE)) {
344                buffer.append('/');
345                buffer.append(rationalConverter.getDivisor());
346            }
347        } else { // All other converter type (e.g. exponential) we use the
348                 // string representation.
349            buffer.insert(0, converter.toString() + "(");
350            buffer.append(")");
351        }
352    }
353
354    // static final ResourceBundle.Control getControl(final String key) {
355    // return new ResourceBundle.Control() {
356    // @Override
357    // public List<Locale> getCandidateLocales(String baseName, Locale locale) {
358    // if (baseName == null)
359    // throw new NullPointerException();
360    // if (locale.equals(new Locale(key))) {
361    // return Arrays.asList(
362    // locale,
363    // Locale.GERMANY,
364    // // no Locale.GERMAN here
365    // Locale.ROOT);
366    // } else if (locale.equals(Locale.GERMANY)) {
367    // return Arrays.asList(
368    // locale,
369    // // no Locale.GERMAN here
370    // Locale.ROOT);
371    // }
372    // return super.getCandidateLocales(baseName, locale);
373    // }
374    // };
375    // }
376
377    // /////////////////
378    // Inner classes //
379    // /////////////////
380
381    /**
382     * Variant of unit representation in the UCUM standard
383     * 
384     * @see <a
385     *      href="http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules">
386     *      UCUM - Character Set and Lexical Rules</a>
387     */
388    public static enum Variant {
389        CASE_SENSITIVE, CASE_INSENSITIVE, PRINT
390    }
391
392    /**
393     * The Print format is used to output units according to the "print" column
394     * in the UCUM standard. Because "print" symbols in UCUM are not unique,
395     * this class of UCUMFormat may not be used for parsing, only for
396     * formatting.
397     */
398    private static final class Print extends UCUMFormat {
399
400        /**
401         *
402         */
403        // private static final long serialVersionUID = 2990875526976721414L;
404        private static final SymbolMap PRINT_SYMBOLS = SymbolMap
405                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print"));
406        private static final Print DEFAULT = new Print(PRINT_SYMBOLS);
407
408        public Print(SymbolMap symbols) {
409            super(symbols);
410        }
411
412        @Override
413        public Unit<? extends Quantity<?>> parse(CharSequence csq,
414                ParsePosition pos) throws IllegalArgumentException {
415            throw new UnsupportedOperationException(
416                    "The print format is for pretty-printing of units only. Parsing is not supported.");
417        }
418
419        @Override
420        void appendAnnotation(CharSequence symbol, CharSequence annotation,
421                Appendable appendable) throws IOException {
422            if (symbol != null && symbol.length() > 0) {
423                appendable.append('(');
424                appendable.append(annotation);
425                appendable.append(')');
426            } else {
427                appendable.append(annotation);
428            }
429        }
430
431        @Override
432        public Unit<? extends Quantity<?>> parse(CharSequence csq)
433                throws IllegalArgumentException {
434            return parse(csq, new ParsePosition(0));
435
436        }
437    }
438
439    /**
440     * The Parsing format outputs formats and parses units according to the
441     * "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap
442     * is passed to its constructor.
443     */
444    private static final class Parsing extends UCUMFormat {
445        // private static final long serialVersionUID = -922531801940132715L;
446        private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap
447                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS",
448                        new ResourceBundle.Control() {
449                            @Override
450                            public List<Locale> getCandidateLocales(
451                                    String baseName, Locale locale) {
452                                if (baseName == null)
453                                    throw new NullPointerException();
454                                if (locale.equals(new Locale("", "CS"))) {
455                                    return Arrays.asList(locale, Locale.ROOT);
456                                }
457                                return super.getCandidateLocales(baseName,
458                                        locale);
459                            }
460                        }));
461        private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap
462                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI",
463                        new ResourceBundle.Control() {
464                            @Override
465                            public List<Locale> getCandidateLocales(
466                                    String baseName, Locale locale) {
467                                if (baseName == null)
468                                    throw new NullPointerException();
469                                if (locale.equals(new Locale("", "CI"))) {
470                                    return Arrays.asList(locale, Locale.ROOT);
471                                } else if (locale.equals(Locale.GERMANY)) { // TODO
472                                                                            // why
473                                                                            // GERMANY?
474                                    return Arrays.asList(locale,
475                                    // no Locale.GERMAN here
476                                            Locale.ROOT);
477                                }
478                                return super.getCandidateLocales(baseName,
479                                        locale);
480                            }
481                        }));
482        private static final Parsing DEFAULT_CS = new Parsing(
483                CASE_SENSITIVE_SYMBOLS, true);
484        private static final Parsing DEFAULT_CI = new Parsing(
485                CASE_INSENSITIVE_SYMBOLS, false);
486        private final boolean caseSensitive;
487
488        public Parsing(SymbolMap symbols, boolean caseSensitive) {
489            super(symbols);
490            this.caseSensitive = caseSensitive;
491        }
492
493        @Override
494        public Unit<? extends Quantity<?>> parse(CharSequence csq,
495                ParsePosition cursor) throws ParserException {
496            // Parsing reads the whole character sequence from the parse
497            // position.
498            int start = cursor.getIndex();
499            int end = csq.length();
500            if (end <= start) {
501                return ONE;
502            }
503            String source = csq.subSequence(start, end).toString().trim();
504            if (source.length() == 0) {
505                return ONE;
506            }
507            if (!caseSensitive) {
508                source = source.toUpperCase();
509            }
510            UCUMFormatParser parser = new UCUMFormatParser(symbolMap,
511                    new ByteArrayInputStream(source.getBytes()));
512            try {
513                Unit<?> result = parser.parseUnit();
514                cursor.setIndex(end);
515                return result;
516            } catch (TokenException e) {
517                if (e.currentToken != null) {
518                    cursor.setErrorIndex(start + e.currentToken.endColumn);
519                } else {
520                    cursor.setErrorIndex(start);
521                }
522                throw new ParserException(e);
523            } catch (TokenMgrError e) {
524                cursor.setErrorIndex(start);
525                throw new IllegalArgumentException(e.getMessage());
526            }
527        }
528
529        @Override
530        public Unit<? extends Quantity<?>> parse(CharSequence csq)
531                throws ParserException {
532            return parse(csq, new ParsePosition(0));
533        }
534    }
535}