001/*
002 * Units of Measurement Systems
003 * Copyright (c) 2005-2017, Jean-Marie Dautelle, Werner Keil and others.
004 *
005 * All rights reserved.
006 *
007 * Redistribution and use in source and binary forms, with or without modification,
008 * are permitted provided that the following conditions are met:
009 *
010 * 1. Redistributions of source code must retain the above copyright notice,
011 *    this list of conditions and the following disclaimer.
012 *
013 * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
014 *    and the following disclaimer in the documentation and/or other materials provided with the distribution.
015 *
016 * 3. Neither the name of JSR-363, Units of Measurement nor the names of their contributors may be used to
017 *    endorse or promote products derived from this software without specific prior written permission.
018 *
019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
020 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
022 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
023 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
026 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
028 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029 */
030package systems.uom.ucum.format;
031
032import static tech.units.indriya.AbstractUnit.ONE;
033import static systems.uom.ucum.format.UCUMConverterFormatter.*;
034import si.uom.SI;
035import systems.uom.ucum.internal.format.UCUMFormatParser;
036import tech.units.indriya.AbstractUnit;
037import tech.units.indriya.format.AbstractUnitFormat;
038import tech.units.indriya.format.SymbolMap;
039import tech.units.indriya.function.*;
040import tech.units.indriya.internal.format.TokenException;
041import tech.units.indriya.internal.format.TokenMgrError;
042import tech.units.indriya.unit.AnnotatedUnit;
043import tech.units.indriya.unit.MetricPrefix;
044import tech.units.indriya.unit.TransformedUnit;
045
046import javax.measure.Quantity;
047import javax.measure.Unit;
048import javax.measure.UnitConverter;
049import javax.measure.format.ParserException;
050
051import java.io.ByteArrayInputStream;
052import java.io.IOException;
053import java.text.ParsePosition;
054import java.util.*;
055import java.util.Map.Entry;
056
057/**
058 * <p>
059 * This class provides the interface for formatting and parsing {@link AbstractUnit units} according to the
060 * <a href="http://unitsofmeasure.org/">Uniform Code for CommonUnits of Measure</a> (UCUM).
061 * </p>
062 *
063 * <p>
064 * For a technical/historical overview of this format please read <a href="http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=61354">
065 * CommonUnits of Measure in Clinical Information Systems</a>.
066 * </p>
067 *
068 * <p>
069 * As of revision 1.16, the BNF in the UCUM standard contains an <a href="http://unitsofmeasure.org/ticket/4">error</a>. I've attempted to work around
070 * the problem by modifying the BNF productions for &lt;Term&gt;. Once the error in the standard is corrected, it may be necessary to modify the
071 * productions in the UCUMFormatParser.jj file to conform to the standard.
072 * </p>
073 *
074 * @author <a href="mailto:eric-r@northwestern.edu">Eric Russell</a>
075 * @author <a href="mailto:units@catmedia.us">Werner Keil</a>
076 * @version 0.8, 6 October 2018
077 */
078public abstract class UCUMFormat extends AbstractUnitFormat {
079    /**
080     * 
081     */
082    // private static final long serialVersionUID = 8586656823290135155L;
083
084    // A helper to declare bundle names for all instances
085    private static final String BUNDLE_BASE = UCUMFormat.class.getName();
086
087    // /////////////////
088    // Class methods //
089    // /////////////////
090
091    /**
092     * Returns the instance for formatting/parsing using the given variant
093     * 
094     * @param variant
095     *            the <strong>UCUM</strong> variant to use
096     * @return a {@link UCUMFormat} instance
097     */
098    public static UCUMFormat getInstance(Variant variant) {
099        switch (variant) {
100            case CASE_INSENSITIVE:
101                return Parsing.DEFAULT_CI;
102            case CASE_SENSITIVE:
103                return Parsing.DEFAULT_CS;
104            case PRINT:
105                return Print.DEFAULT;
106            default:
107                throw new IllegalArgumentException("Unknown variant: " + variant);
108        }
109    }
110
111    /**
112     * Returns an instance for formatting and parsing using user defined symbols
113     * 
114     * @param variant
115     *            the <strong>UCUM</strong> variant to use
116     * @param symbolMap
117     *            the map of user defined symbols to use
118     * @return a {@link UCUMFormat} instance
119     */
120    public static UCUMFormat getInstance(Variant variant, SymbolMap symbolMap) {
121        switch (variant) {
122            case CASE_INSENSITIVE:
123                return new Parsing(symbolMap, false);
124            case CASE_SENSITIVE:
125                return new Parsing(symbolMap, true);
126            case PRINT:
127                return new Print(symbolMap);
128            default:
129                throw new IllegalArgumentException("Unknown variant: " + variant);
130        }
131    }
132
133    /**
134     * The symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s.
135     */
136    final SymbolMap symbolMap;
137
138    /**
139     * Get the symbol map used by this instance to map between {@link AbstractUnit Unit}s and <code>String</code>s, etc...
140     * 
141     * @return SymbolMap the current symbol map
142     */
143    @Override
144    protected SymbolMap getSymbols() {
145        return symbolMap;
146    }
147
148    //////////////////
149    // Constructors //
150    //////////////////
151    /**
152     * Base constructor.
153     */
154    UCUMFormat(SymbolMap symbolMap) {
155        this.symbolMap = symbolMap;
156    }
157
158    // ///////////
159    // Parsing //
160    // ///////////
161    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException;
162
163    protected Unit<?> parse(CharSequence csq, int index) throws ParserException {
164        return parse(csq, new ParsePosition(index));
165    }
166
167    @Override
168    public abstract Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException;
169
170    ////////////////
171    // Formatting //
172    ////////////////
173    @SuppressWarnings({ "rawtypes", "unchecked" })
174    public Appendable format(Unit<?> unknownUnit, Appendable appendable) throws IOException {
175        if (!(unknownUnit instanceof AbstractUnit)) {
176            throw new UnsupportedOperationException("The UCUM format supports only known units (AbstractUnit instances)");
177        }
178        AbstractUnit unit = (AbstractUnit) unknownUnit;
179        CharSequence symbol;
180        CharSequence annotation = null;
181        if (unit instanceof AnnotatedUnit) {
182            AnnotatedUnit annotatedUnit = (AnnotatedUnit) unit;
183            unit = annotatedUnit.getActualUnit();
184            annotation = annotatedUnit.getAnnotation();
185        }
186        String mapSymbol = symbolMap.getSymbol(unit);
187        if (mapSymbol != null) {
188            symbol = mapSymbol;
189        } else if (unit instanceof TransformedUnit) {
190            final StringBuilder temp = new StringBuilder();
191            final Unit<?> parentUnit = ((TransformedUnit) unit).getParentUnit();
192            final UnitConverter converter = unit.getConverterTo(parentUnit);
193            final boolean printSeparator = !parentUnit.equals(ONE);
194
195            format(parentUnit, temp);
196            formatConverter(converter, printSeparator, temp, symbolMap);
197
198            symbol = temp;
199        } else if (unit.getBaseUnits() != null) {
200            Map<? extends AbstractUnit<?>, Integer> productUnits = unit.getBaseUnits();
201            StringBuffer app = new StringBuffer();
202            
203            Map<AbstractUnit<?>, Integer> numeratorUnits = new LinkedHashMap<>();            
204            Map<AbstractUnit<?>, Integer> denominatorUnits = new LinkedHashMap<>();
205
206            // divide units into numerators and denominators
207            for (Entry<? extends AbstractUnit<?>, Integer> u : productUnits.entrySet()) {
208                if (u.getValue() > 0) {
209                        numeratorUnits.put(u.getKey(), u.getValue());
210                }else {
211                        denominatorUnits.put(u.getKey(), u.getValue());
212                }
213            }
214            
215            int numeratorCount = 1;
216            for (Entry<? extends AbstractUnit<?>, Integer> u : numeratorUnits.entrySet()) {
217                // add multiplication separators after first unit
218                        if (numeratorCount > 1){
219                                app.append(".");
220                        }
221                        // add individual unit string
222                        format(u.getKey(),app);
223                        // add power number if greater than 1
224                if (u.getValue() > 1){
225                        app.append(u.getValue());
226                }
227                numeratorCount++;
228            }
229            // special case if there is no numerator append one for inverse
230            if (numeratorCount == 1) {
231                app.append("1");
232            }
233            if (denominatorUnits.size() >0){
234                // append division symbol
235                app.append("/");
236                int denominatorCount = 1;
237                for (Entry<? extends AbstractUnit<?>, Integer> u : denominatorUnits.entrySet()) {
238                        // if there is more than one denominator unit and this is the first, add open parenthesis 
239                        if (denominatorCount == 1 && denominatorUnits.size() > 1 ) {
240                                app.append("(");
241                        }
242                        // add multiplication separators after first unit
243                        if (denominatorCount > 1){
244                                app.append(".");
245                        }
246                        // add individual unit string
247                        format(u.getKey(),app);
248                        // add power number if abs greater than 1
249                        if (Math.abs(u.getValue()) < -1){
250                                app.append(-u.getValue());
251                        }
252                        // if there is more than one denominator unit and this is the last, add close parenthesis
253                        if (denominatorCount == denominatorUnits.size() && denominatorUnits.size() > 1 ) {
254                                app.append(")");
255                        }
256                        denominatorCount++;
257                }
258            }            
259            symbol = app;
260        } else if (!unit.isSystemUnit() || unit.equals(SI.KILOGRAM)) {
261            final StringBuilder temp = new StringBuilder();
262            UnitConverter converter;
263            boolean printSeparator;
264            if (unit.equals(SI.KILOGRAM)) {
265                // A special case because KILOGRAM is a BaseUnit instead of
266                // a transformed unit, for compatibility with existing SI
267                // unit system.
268                format(SI.GRAM, temp);
269                converter = PowersOfIntConverter.of(MetricPrefix.KILO);
270                printSeparator = true;
271            } else {
272                Unit<?> parentUnit = unit.getSystemUnit();
273                converter = unit.getConverterTo(parentUnit);
274                if (parentUnit.equals(SI.KILOGRAM)) {
275                    // More special-case hackery to work around gram/kilogram
276                    // inconsistency
277                    parentUnit = SI.GRAM;
278                    converter = converter.concatenate(PowersOfIntConverter.of(MetricPrefix.KILO));
279                }
280                format(parentUnit, temp);
281                printSeparator = !parentUnit.equals(ONE);
282            }
283            formatConverter(converter, printSeparator, temp, symbolMap);
284            symbol = temp;
285        } else if (unit.getSymbol() != null) {
286            symbol = unit.getSymbol();
287        } else {
288            throw new IllegalArgumentException("Cannot format the given Object as UCUM units (unsupported unit " + unit.getClass().getName() + "). "
289                    + "Custom units types should override the toString() method as the default implementation uses the UCUM format.");
290        }
291
292        appendable.append(symbol);
293        if (annotation != null && annotation.length() > 0) {
294            appendAnnotation(symbol, annotation, appendable);
295        }
296
297        return appendable;
298    }
299
300    public void label(Unit<?> unit, String label) {
301        throw new UnsupportedOperationException("label() not supported by this implementation");
302    }
303
304    public boolean isLocaleSensitive() {
305        return false;
306    }
307
308    void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
309        appendable.append('{');
310        appendable.append(annotation);
311        appendable.append('}');
312    }
313
314    // static final ResourceBundle.Control getControl(final String key) {
315    // return new ResourceBundle.Control() {
316    // @Override
317    // public List<Locale> getCandidateLocales(String baseName, Locale locale) {
318    // if (baseName == null)
319    // throw new NullPointerException();
320    // if (locale.equals(new Locale(key))) {
321    // return Arrays.asList(
322    // locale,
323    // Locale.GERMANY,
324    // // no Locale.GERMAN here
325    // Locale.ROOT);
326    // } else if (locale.equals(Locale.GERMANY)) {
327    // return Arrays.asList(
328    // locale,
329    // // no Locale.GERMAN here
330    // Locale.ROOT);
331    // }
332    // return super.getCandidateLocales(baseName, locale);
333    // }
334    // };
335    // }
336
337    // /////////////////
338    // Inner classes //
339    // /////////////////
340
341    /**
342     * Variant of unit representation in the UCUM standard
343     * 
344     * @see <a href= "http://unitsofmeasure.org/ucum.html#section-Character-Set-and-Lexical-Rules"> UCUM - Character Set and Lexical Rules</a>
345     */
346    public static enum Variant {
347        CASE_SENSITIVE, CASE_INSENSITIVE, PRINT
348    }
349
350    /**
351     * The Print format is used to output units according to the "print" column in the UCUM standard. Because "print" symbols in UCUM are not unique,
352     * this class of UCUMFormat may not be used for parsing, only for formatting.
353     */
354    private static final class Print extends UCUMFormat {
355
356        /**
357         *
358         */
359        // private static final long serialVersionUID = 2990875526976721414L;
360        private static final SymbolMap PRINT_SYMBOLS = SymbolMap.of(ResourceBundle.getBundle(BUNDLE_BASE + "_Print"));
361        private static final Print DEFAULT = new Print(PRINT_SYMBOLS);
362
363        public Print(SymbolMap symbols) {
364            super(symbols);
365        }
366
367        @Override
368        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition pos) throws IllegalArgumentException {
369            throw new UnsupportedOperationException("The print format is for pretty-printing of units only. Parsing is not supported.");
370        }
371
372        @Override
373        void appendAnnotation(CharSequence symbol, CharSequence annotation, Appendable appendable) throws IOException {
374            if (symbol != null && symbol.length() > 0) {
375                appendable.append('(');
376                appendable.append(annotation);
377                appendable.append(')');
378            } else {
379                appendable.append(annotation);
380            }
381        }
382
383        @Override
384        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws IllegalArgumentException {
385            return parse(csq, new ParsePosition(0));
386
387        }
388    }
389
390    /**
391     * The Parsing format outputs formats and parses units according to the "c/s" or "c/i" column in the UCUM standard, depending on which SymbolMap
392     * is passed to its constructor.
393     */
394    private static final class Parsing extends UCUMFormat {
395        // private static final long serialVersionUID = -922531801940132715L;
396        private static final SymbolMap CASE_SENSITIVE_SYMBOLS = SymbolMap
397                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CS", new ResourceBundle.Control() {
398                    @Override
399                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
400                        if (baseName == null)
401                            throw new NullPointerException();
402                        if (locale.equals(new Locale("", "CS"))) {
403                            return Arrays.asList(locale, Locale.ROOT);
404                        }
405                        return super.getCandidateLocales(baseName, locale);
406                    }
407                }));
408        private static final SymbolMap CASE_INSENSITIVE_SYMBOLS = SymbolMap
409                .of(ResourceBundle.getBundle(BUNDLE_BASE + "_CI", new ResourceBundle.Control() {
410                    @Override
411                    public List<Locale> getCandidateLocales(String baseName, Locale locale) {
412                        if (baseName == null)
413                            throw new NullPointerException();
414                        if (locale.equals(new Locale("", "CI"))) {
415                            return Arrays.asList(locale, Locale.ROOT);
416                        } else if (locale.equals(Locale.GERMANY)) { // TODO
417                            // why
418                            // GERMANY?
419                            return Arrays.asList(locale,
420                                    // no Locale.GERMAN here
421                                    Locale.ROOT);
422                        }
423                        return super.getCandidateLocales(baseName, locale);
424                    }
425                }));
426        private static final Parsing DEFAULT_CS = new Parsing(CASE_SENSITIVE_SYMBOLS, true);
427        private static final Parsing DEFAULT_CI = new Parsing(CASE_INSENSITIVE_SYMBOLS, false);
428        private final boolean caseSensitive;
429
430        public Parsing(SymbolMap symbols, boolean caseSensitive) {
431            super(symbols);
432            this.caseSensitive = caseSensitive;
433        }
434
435        @Override
436        public Unit<? extends Quantity<?>> parse(CharSequence csq, ParsePosition cursor) throws ParserException {
437            // Parsing reads the whole character sequence from the parse
438            // position.
439            int start = cursor.getIndex();
440            int end = csq.length();
441            if (end <= start) {
442                return ONE;
443            }
444            String source = csq.subSequence(start, end).toString().trim();
445            if (source.length() == 0) {
446                return ONE;
447            }
448            if (!caseSensitive) {
449                source = source.toUpperCase();
450            }
451            UCUMFormatParser parser = new UCUMFormatParser(symbolMap, new ByteArrayInputStream(source.getBytes()));
452            try {
453                Unit<?> result = parser.parseUnit();
454                cursor.setIndex(end);
455                return result;
456            } catch (TokenException e) {
457                if (e.currentToken != null) {
458                    cursor.setErrorIndex(start + e.currentToken.endColumn);
459                } else {
460                    cursor.setErrorIndex(start);
461                }
462                throw new ParserException(e);
463            } catch (TokenMgrError e) {
464                cursor.setErrorIndex(start);
465                throw new IllegalArgumentException(e.getMessage());
466            }
467        }
468
469        @Override
470        public Unit<? extends Quantity<?>> parse(CharSequence csq) throws ParserException {
471            return parse(csq, new ParsePosition(0));
472        }
473    }
474}