001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.validation.tests;
003
004import static org.openstreetmap.josm.tools.I18n.marktr;
005import static org.openstreetmap.josm.tools.I18n.tr;
006
007import java.awt.GridBagConstraints;
008import java.awt.event.ActionListener;
009import java.io.BufferedReader;
010import java.io.IOException;
011import java.util.ArrayList;
012import java.util.Arrays;
013import java.util.Collection;
014import java.util.Collections;
015import java.util.HashMap;
016import java.util.HashSet;
017import java.util.List;
018import java.util.Locale;
019import java.util.Map;
020import java.util.Map.Entry;
021import java.util.Set;
022import java.util.regex.Pattern;
023
024import javax.swing.JCheckBox;
025import javax.swing.JLabel;
026import javax.swing.JPanel;
027
028import org.openstreetmap.josm.command.ChangePropertyCommand;
029import org.openstreetmap.josm.command.ChangePropertyKeyCommand;
030import org.openstreetmap.josm.command.Command;
031import org.openstreetmap.josm.command.SequenceCommand;
032import org.openstreetmap.josm.data.osm.AbstractPrimitive;
033import org.openstreetmap.josm.data.osm.OsmPrimitive;
034import org.openstreetmap.josm.data.osm.Tag;
035import org.openstreetmap.josm.data.osm.Tagged;
036import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper;
037import org.openstreetmap.josm.data.validation.Severity;
038import org.openstreetmap.josm.data.validation.Test.TagTest;
039import org.openstreetmap.josm.data.validation.TestError;
040import org.openstreetmap.josm.data.validation.util.Entities;
041import org.openstreetmap.josm.gui.progress.ProgressMonitor;
042import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset;
043import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem;
044import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets;
045import org.openstreetmap.josm.gui.tagging.presets.items.Check;
046import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup;
047import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem;
048import org.openstreetmap.josm.gui.widgets.EditableList;
049import org.openstreetmap.josm.io.CachedFile;
050import org.openstreetmap.josm.spi.preferences.Config;
051import org.openstreetmap.josm.tools.GBC;
052import org.openstreetmap.josm.tools.Logging;
053import org.openstreetmap.josm.tools.MultiMap;
054import org.openstreetmap.josm.tools.Utils;
055
056/**
057 * Check for misspelled or wrong tags
058 *
059 * @author frsantos
060 * @since 3669
061 */
062public class TagChecker extends TagTest {
063
064    /** The config file of ignored tags */
065    public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg";
066    /** The config file of dictionary words */
067    public static final String SPELL_FILE = "resource://data/validator/words.cfg";
068
069    /** Normalized keys: the key should be substituted by the value if the key was not found in presets */
070    private static final Map<String, String> harmonizedKeys = new HashMap<>();
071    /** The spell check preset values which are not stored in TaggingPresets */
072    private static volatile HashSet<String> additionalPresetsValueData;
073    /** often used tags which are not in presets */
074    private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>();
075
076    private static final Pattern NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
077            "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200c-\\u200f\\u202a-\\u202e]");
078
079    /** The TagChecker data */
080    private static final List<String> ignoreDataStartsWith = new ArrayList<>();
081    private static final Set<String> ignoreDataEquals = new HashSet<>();
082    private static final List<String> ignoreDataEndsWith = new ArrayList<>();
083    private static final List<Tag> ignoreDataTag = new ArrayList<>();
084    /** tag keys that have only numerical values in the presets */
085    private static final Set<String> ignoreForLevenshtein = new HashSet<>();
086
087    /** The preferences prefix */
088    protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName();
089
090    /**
091     * The preference key to check values
092     */
093    public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues";
094    /**
095     * The preference key to check keys
096     */
097    public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys";
098    /**
099     * The preference key to enable complex checks
100     */
101    public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex";
102    /**
103     * The preference key to search for fixme tags
104     */
105    public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes";
106
107    /**
108     * The preference key for source files
109     * @see #DEFAULT_SOURCES
110     */
111    public static final String PREF_SOURCES = PREFIX + ".source";
112
113    private static final String BEFORE_UPLOAD = "BeforeUpload";
114    /**
115     * The preference key to check keys - used before upload
116     */
117    public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD;
118    /**
119     * The preference key to check values - used before upload
120     */
121    public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD;
122    /**
123     * The preference key to run complex tests - used before upload
124     */
125    public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD;
126    /**
127     * The preference key to search for fixmes - used before upload
128     */
129    public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD;
130
131    private static final int MAX_LEVENSHTEIN_DISTANCE = 2;
132
133    protected boolean checkKeys;
134    protected boolean checkValues;
135    /** Was used for special configuration file, might be used to disable value spell checker. */
136    protected boolean checkComplex;
137    protected boolean checkFixmes;
138
139    protected JCheckBox prefCheckKeys;
140    protected JCheckBox prefCheckValues;
141    protected JCheckBox prefCheckComplex;
142    protected JCheckBox prefCheckFixmes;
143    protected JCheckBox prefCheckPaint;
144
145    protected JCheckBox prefCheckKeysBeforeUpload;
146    protected JCheckBox prefCheckValuesBeforeUpload;
147    protected JCheckBox prefCheckComplexBeforeUpload;
148    protected JCheckBox prefCheckFixmesBeforeUpload;
149    protected JCheckBox prefCheckPaintBeforeUpload;
150
151    // CHECKSTYLE.OFF: SingleSpaceSeparator
152    protected static final int EMPTY_VALUES             = 1200;
153    protected static final int INVALID_KEY              = 1201;
154    protected static final int INVALID_VALUE            = 1202;
155    protected static final int FIXME                    = 1203;
156    protected static final int INVALID_SPACE            = 1204;
157    protected static final int INVALID_KEY_SPACE        = 1205;
158    protected static final int INVALID_HTML             = 1206; /* 1207 was PAINT */
159    protected static final int LONG_VALUE               = 1208;
160    protected static final int LONG_KEY                 = 1209;
161    protected static final int LOW_CHAR_VALUE           = 1210;
162    protected static final int LOW_CHAR_KEY             = 1211;
163    protected static final int MISSPELLED_VALUE         = 1212;
164    protected static final int MISSPELLED_KEY           = 1213;
165    protected static final int MULTIPLE_SPACES          = 1214;
166    protected static final int MISSPELLED_VALUE_NO_FIX  = 1215;
167    // CHECKSTYLE.ON: SingleSpaceSeparator
168
169    protected EditableList sourcesList;
170
171    private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE);
172
173    /**
174     * Constructor
175     */
176    public TagChecker() {
177        super(tr("Tag checker"), tr("This test checks for errors in tag keys and values."));
178    }
179
180    @Override
181    public void initialize() throws IOException {
182        initializeData();
183        initializePresets();
184        analysePresets();
185    }
186
187    /**
188     * Add presets that contain only numerical values to the ignore list
189     */
190    private static void analysePresets() {
191        for (String key : TaggingPresets.getPresetKeys()) {
192            if (isKeyIgnored(key))
193                continue;
194            boolean allNumerical = true;
195            Set<String> values = TaggingPresets.getPresetValues(key);
196            if (values.isEmpty())
197                allNumerical = false;
198            for (String val : values) {
199                if (!isNum(val)) {
200                    allNumerical = false;
201                    break;
202                }
203            }
204            if (allNumerical) {
205                ignoreForLevenshtein.add(key);
206            }
207        }
208    }
209
210    /**
211     * Reads the spell-check file into a HashMap.
212     * The data file is a list of words, beginning with +/-. If it starts with +,
213     * the word is valid, but if it starts with -, the word should be replaced
214     * by the nearest + word before this.
215     *
216     * @throws IOException if any I/O error occurs
217     */
218    private static void initializeData() throws IOException {
219        ignoreDataStartsWith.clear();
220        ignoreDataEquals.clear();
221        ignoreDataEndsWith.clear();
222        ignoreDataTag.clear();
223        harmonizedKeys.clear();
224        ignoreForLevenshtein.clear();
225        oftenUsedTags.clear();
226
227        StringBuilder errorSources = new StringBuilder();
228        for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) {
229            try (
230                CachedFile cf = new CachedFile(source);
231                BufferedReader reader = cf.getContentReader()
232            ) {
233                String okValue = null;
234                boolean tagcheckerfile = false;
235                boolean ignorefile = false;
236                boolean isFirstLine = true;
237                String line;
238                while ((line = reader.readLine()) != null) {
239                    if (line.isEmpty()) {
240                        // ignore
241                    } else if (line.startsWith("#")) {
242                        if (line.startsWith("# JOSM TagChecker")) {
243                            tagcheckerfile = true;
244                            Logging.error(tr("Ignoring {0}. Support was dropped", source));
245                        } else
246                        if (line.startsWith("# JOSM IgnoreTags")) {
247                            ignorefile = true;
248                            if (!DEFAULT_SOURCES.contains(source)) {
249                                Logging.info(tr("Adding {0} to ignore tags", source));
250                            }
251                        }
252                    } else if (ignorefile) {
253                        parseIgnoreFileLine(source, line);
254                    } else if (tagcheckerfile) {
255                        // ignore
256                    } else if (line.charAt(0) == '+') {
257                        okValue = line.substring(1);
258                    } else if (line.charAt(0) == '-' && okValue != null) {
259                        String hk = harmonizeKey(line.substring(1));
260                        if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) {
261                            Logging.debug(tr("Line was ignored: {0}", line));
262                        }
263                    } else {
264                        Logging.error(tr("Invalid spellcheck line: {0}", line));
265                    }
266                    if (isFirstLine) {
267                        isFirstLine = false;
268                        if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) {
269                            Logging.info(tr("Adding {0} to spellchecker", source));
270                        }
271                    }
272                }
273            } catch (IOException e) {
274                Logging.error(e);
275                errorSources.append(source).append('\n');
276            }
277        }
278
279        if (errorSources.length() > 0)
280            throw new IOException(tr("Could not access data file(s):\n{0}", errorSources));
281    }
282
283    /**
284     * Parse a line found in a configuration file
285     * @param source name of configuration file
286     * @param line the line to parse
287     */
288    private static void parseIgnoreFileLine(String source, String line) {
289        line = line.trim();
290        if (line.length() < 4) {
291            return;
292        }
293        try {
294            String key = line.substring(0, 2);
295            line = line.substring(2);
296
297            switch (key) {
298            case "S:":
299                ignoreDataStartsWith.add(line);
300                break;
301            case "E:":
302                ignoreDataEquals.add(line);
303                addToKeyDictionary(line);
304                break;
305            case "F:":
306                ignoreDataEndsWith.add(line);
307                break;
308            case "K:":
309                Tag tag = Tag.ofString(line);
310                ignoreDataTag.add(tag);
311                oftenUsedTags.put(tag.getKey(), tag.getValue());
312                addToKeyDictionary(tag.getKey());
313                break;
314            default:
315                if (!key.startsWith(";")) {
316                    Logging.warn("Unsupported TagChecker key: " + key);
317                }
318            }
319        } catch (IllegalArgumentException e) {
320            Logging.error("Invalid line in {0} : {1}", source, e.getMessage());
321            Logging.trace(e);
322        }
323    }
324
325    private static void addToKeyDictionary(String key) {
326        if (key != null) {
327            String hk = harmonizeKey(key);
328            if (!key.equals(hk)) {
329                harmonizedKeys.put(hk, key);
330            }
331        }
332    }
333
334    /**
335     * Reads the presets data.
336     *
337     */
338    public static void initializePresets() {
339
340        if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true))
341            return;
342
343        Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets();
344        if (!presets.isEmpty()) {
345            initAdditionalPresetsValueData();
346            for (TaggingPreset p : presets) {
347                for (TaggingPresetItem i : p.data) {
348                    if (i instanceof KeyedItem) {
349                        addPresetValue((KeyedItem) i);
350                    } else if (i instanceof CheckGroup) {
351                        for (Check c : ((CheckGroup) i).checks) {
352                            addPresetValue(c);
353                        }
354                    }
355                }
356            }
357        }
358    }
359
360    private static void initAdditionalPresetsValueData() {
361        additionalPresetsValueData = new HashSet<>();
362        for (String a : AbstractPrimitive.getUninterestingKeys()) {
363            additionalPresetsValueData.add(a);
364        }
365        for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys",
366                Arrays.asList("is_in", "int_ref", "fixme", "population"))) {
367            additionalPresetsValueData.add(a);
368        }
369    }
370
371    private static void addPresetValue(KeyedItem ky) {
372        if (ky.key != null && ky.getValues() != null) {
373            addToKeyDictionary(ky.key);
374        }
375    }
376
377    /**
378     * Checks given string (key or value) if it contains non-printing control characters (either ASCII or Unicode bidi characters)
379     * @param s string to check
380     * @return {@code true} if {@code s} contains non-printing control characters
381     */
382    private static boolean containsNonPrintingControlCharacter(String s) {
383        if (s == null)
384            return false;
385        for (int i = 0; i < s.length(); i++) {
386            char c = s.charAt(i);
387            if ((isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
388                return true;
389        }
390        return false;
391    }
392
393    private static boolean isAsciiControlChar(char c) {
394        return c < 0x20 || c == 0x7F;
395    }
396
397    private static boolean isNewLineChar(char c) {
398        return c == 0x0a || c == 0x0d;
399    }
400
401    private static boolean isBidiControlChar(char c) {
402        /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
403                           0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
404        return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));
405    }
406
407    static String removeNonPrintingControlCharacters(String s) {
408        return NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
409    }
410
411    /**
412     * Get set of preset values for the given key.
413     * @param key the key
414     * @return null if key is not in presets or in additionalPresetsValueData,
415     *  else a set which might be empty.
416     */
417    private static Set<String> getPresetValues(String key) {
418        Set<String> res = TaggingPresets.getPresetValues(key);
419        if (res != null)
420            return res;
421        if (additionalPresetsValueData.contains(key))
422            return Collections.emptySet();
423        // null means key is not known
424        return null;
425    }
426
427    /**
428     * Determines if the given key is in internal presets.
429     * @param key key
430     * @return {@code true} if the given key is in internal presets
431     * @since 9023
432     */
433    public static boolean isKeyInPresets(String key) {
434        return TaggingPresets.getPresetValues(key) != null;
435    }
436
437    /**
438     * Determines if the given tag is in internal presets.
439     * @param key key
440     * @param value value
441     * @return {@code true} if the given tag is in internal presets
442     * @since 9023
443     */
444    public static boolean isTagInPresets(String key, String value) {
445        final Set<String> values = getPresetValues(key);
446        return values != null && values.contains(value);
447    }
448
449    /**
450     * Returns the list of ignored tags.
451     * @return the list of ignored tags
452     * @since 9023
453     */
454    public static List<Tag> getIgnoredTags() {
455        return new ArrayList<>(ignoreDataTag);
456    }
457
458    /**
459     * Determines if the given tag key is ignored for checks "key/tag not in presets".
460     * @param key key
461     * @return true if the given key is ignored
462     */
463    private static boolean isKeyIgnored(String key) {
464        if (ignoreDataEquals.contains(key)) {
465            return true;
466        }
467        for (String a : ignoreDataStartsWith) {
468            if (key.startsWith(a)) {
469                return true;
470            }
471        }
472        for (String a : ignoreDataEndsWith) {
473            if (key.endsWith(a)) {
474                return true;
475            }
476        }
477        return false;
478    }
479
480    /**
481     * Determines if the given tag is ignored for checks "key/tag not in presets".
482     * @param key key
483     * @param value value
484     * @return {@code true} if the given tag is ignored
485     * @since 9023
486     */
487    public static boolean isTagIgnored(String key, String value) {
488        if (isKeyIgnored(key))
489            return true;
490        final Set<String> values = getPresetValues(key);
491        if (values != null && values.isEmpty())
492            return true;
493        if (!isTagInPresets(key, value)) {
494            for (Tag a : ignoreDataTag) {
495                if (key.equals(a.getKey()) && value.equals(a.getValue())) {
496                    return true;
497                }
498            }
499        }
500        return false;
501    }
502
503    /**
504     * Checks the primitive tags
505     * @param p The primitive to check
506     */
507    @Override
508    public void check(OsmPrimitive p) {
509        if (!p.isTagged())
510            return;
511
512        // Just a collection to know if a primitive has been already marked with error
513        MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>();
514
515        for (Entry<String, String> prop : p.getKeys().entrySet()) {
516            String s = marktr("Tag ''{0}'' invalid.");
517            String key = prop.getKey();
518            String value = prop.getValue();
519
520            if (checkKeys) {
521                checkSingleTagKeySimple(withErrors, p, s, key);
522            }
523            if (checkValues) {
524                checkSingleTagValueSimple(withErrors, p, s, key, value);
525                checkSingleTagComplex(withErrors, p, key, value);
526            }
527            if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) {
528                errors.add(TestError.builder(this, Severity.OTHER, FIXME)
529                        .message(tr("FIXMES"))
530                        .primitives(p)
531                        .build());
532                withErrors.put(p, "FIXME");
533            }
534        }
535    }
536
537    private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) {
538        if (!checkValues || value == null)
539            return;
540        if ((containsNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
541            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
542                    .message(tr("Tag value contains non-printing character"), s, key)
543                    .primitives(p)
544                    .fix(() -> new ChangePropertyCommand(p, key, removeNonPrintingControlCharacters(value)))
545                    .build());
546            withErrors.put(p, "ICV");
547        }
548        if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) {
549            errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE)
550                    .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key)
551                    .primitives(p)
552                    .build());
553            withErrors.put(p, "LV");
554        }
555        if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) {
556            errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES)
557                    .message(tr("Tags with empty values"), s, key)
558                    .primitives(p)
559                    .build());
560            withErrors.put(p, "EV");
561        }
562        final String errTypeSpace = "SPACE";
563        if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) {
564            errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE)
565                    .message(tr("Property values start or end with white space"), s, key)
566                    .primitives(p)
567                    .build());
568            withErrors.put(p, errTypeSpace);
569        }
570        if (value.contains("  ") && !withErrors.contains(p, errTypeSpace)) {
571            errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES)
572                    .message(tr("Property values contain multiple white spaces"), s, key)
573                    .primitives(p)
574                    .build());
575            withErrors.put(p, errTypeSpace);
576        }
577        if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) {
578            errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML)
579                    .message(tr("Property values contain HTML entity"), s, key)
580                    .primitives(p)
581                    .build());
582            withErrors.put(p, "HTML");
583        }
584    }
585
586    private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) {
587        if (!checkKeys || key == null)
588            return;
589        if ((containsNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
590            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
591                    .message(tr("Tag key contains non-printing character"), s, key)
592                    .primitives(p)
593                    .fix(() -> new ChangePropertyCommand(p, key, removeNonPrintingControlCharacters(key)))
594                    .build());
595            withErrors.put(p, "ICK");
596        }
597        if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) {
598            errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY)
599                    .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key)
600                    .primitives(p)
601                    .build());
602            withErrors.put(p, "LK");
603        }
604        if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) {
605            errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE)
606                    .message(tr("Invalid white space in property key"), s, key)
607                    .primitives(p)
608                    .build());
609            withErrors.put(p, "IPK");
610        }
611    }
612
613    private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) {
614        if (!checkValues || key == null || value == null || value.isEmpty())
615            return;
616        if (additionalPresetsValueData != null && !isTagIgnored(key, value)) {
617            if (!isKeyInPresets(key)) {
618                spellCheckKey(withErrors, p, key);
619            } else if (!isTagInPresets(key, value)) {
620                if (oftenUsedTags.contains(key, value)) {
621                    // tag is quite often used but not in presets
622                    errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
623                            .message(tr("Presets do not contain property value"),
624                                    marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key)
625                            .primitives(p)
626                            .build());
627                    withErrors.put(p, "UPV");
628                } else {
629                    tryGuess(p, key, value, withErrors);
630                }
631            }
632        }
633    }
634
635    private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) {
636        String prettifiedKey = harmonizeKey(key);
637        String fixedKey;
638        if (ignoreDataEquals.contains(prettifiedKey)) {
639            fixedKey = prettifiedKey;
640        } else {
641            fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey);
642        }
643        if (fixedKey == null) {
644            for (Tag a : ignoreDataTag) {
645                if (a.getKey().equals(prettifiedKey)) {
646                    fixedKey = prettifiedKey;
647                    break;
648                }
649            }
650        }
651
652        if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) {
653            final String proposedKey = fixedKey;
654            // misspelled preset key
655            final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY)
656                    .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey)
657                    .primitives(p);
658            if (p.hasKey(fixedKey)) {
659                errors.add(error.build());
660            } else {
661                errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build());
662            }
663            withErrors.put(p, "WPK");
664        } else {
665            errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY)
666                    .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key)
667                    .primitives(p)
668                    .build());
669            withErrors.put(p, "UPK");
670        }
671    }
672
673    private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) {
674        // try to fix common typos and check again if value is still unknown
675        final String harmonizedValue = harmonizeValue(value);
676        if (harmonizedValue == null || harmonizedValue.isEmpty())
677            return;
678        String fixedValue = null;
679        List<Set<String>> sets = new ArrayList<>();
680        Set<String> presetValues = getPresetValues(key);
681        if (presetValues != null)
682            sets.add(presetValues);
683        Set<String> usedValues = oftenUsedTags.get(key);
684        if (usedValues != null)
685            sets.add(usedValues);
686        for (Set<String> possibleValues: sets) {
687            if (possibleValues.contains(harmonizedValue)) {
688                fixedValue = harmonizedValue;
689                break;
690            }
691        }
692        if (fixedValue == null && !ignoreForLevenshtein.contains(key)) {
693            int maxPresetValueLen = 0;
694            List<String> fixVals = new ArrayList<>();
695            // use Levenshtein distance to find typical typos
696            int minDist = MAX_LEVENSHTEIN_DISTANCE + 1;
697            String closest = null;
698            for (Set<String> possibleValues: sets) {
699                for (String possibleVal : possibleValues) {
700                    if (possibleVal.isEmpty())
701                        continue;
702                    maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length());
703                    if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) {
704                        // don't suggest fix value when given value is short and lengths are too different
705                        // for example surface=u would result in surface=mud
706                        continue;
707                    }
708                    int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue);
709                    if (dist >= harmonizedValue.length()) {
710                        // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'.
711                        continue;
712                    }
713                    if (dist < minDist) {
714                        closest = possibleVal;
715                        minDist = dist;
716                        fixVals.clear();
717                        fixVals.add(possibleVal);
718                    } else if (dist == minDist) {
719                        fixVals.add(possibleVal);
720                    }
721                }
722            }
723
724            if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE
725                    && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) {
726                if (fixVals.size() < 2) {
727                    fixedValue = closest;
728                } else {
729                    Collections.sort(fixVals);
730                    // misspelled preset value with multiple good alternatives
731                    errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX)
732                            .message(tr("Unknown property value"),
733                                    marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"),
734                                    value, key, fixVals)
735                            .primitives(p).build());
736                    withErrors.put(p, "WPV");
737                    return;
738                }
739            }
740        }
741        if (fixedValue != null && !fixedValue.equals(value)) {
742            final String newValue = fixedValue;
743            // misspelled preset value
744            errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE)
745                    .message(tr("Unknown property value"),
746                            marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue)
747                    .primitives(p)
748                    .build());
749            withErrors.put(p, "WPV");
750        } else {
751            // unknown preset value
752            errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
753                    .message(tr("Presets do not contain property value"),
754                            marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key)
755                    .primitives(p)
756                    .build());
757            withErrors.put(p, "UPV");
758        }
759    }
760
761    private static boolean isNum(String harmonizedValue) {
762        try {
763            Double.parseDouble(harmonizedValue);
764            return true;
765        } catch (NumberFormatException e) {
766            return false;
767        }
768    }
769
770    private static boolean isFixme(String key, String value) {
771        return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo")
772          || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete");
773    }
774
775    private static String harmonizeKey(String key) {
776        return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,");
777    }
778
779    private static String harmonizeValue(String value) {
780        return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,");
781    }
782
783    @Override
784    public void startTest(ProgressMonitor monitor) {
785        super.startTest(monitor);
786        checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true);
787        if (isBeforeUpload) {
788            checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true);
789        }
790
791        checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true);
792        if (isBeforeUpload) {
793            checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true);
794        }
795
796        checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true);
797        if (isBeforeUpload) {
798            checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true);
799        }
800
801        checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true);
802        if (isBeforeUpload) {
803            checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true);
804        }
805    }
806
807    @Override
808    public void visit(Collection<OsmPrimitive> selection) {
809        if (checkKeys || checkValues || checkComplex || checkFixmes) {
810            super.visit(selection);
811        }
812    }
813
814    @Override
815    public void addGui(JPanel testPanel) {
816        GBC a = GBC.eol();
817        a.anchor = GridBagConstraints.EAST;
818
819        testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0));
820
821        prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true));
822        prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words."));
823        testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0));
824
825        prefCheckKeysBeforeUpload = new JCheckBox();
826        prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true));
827        testPanel.add(prefCheckKeysBeforeUpload, a);
828
829        prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true));
830        prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules."));
831        testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0));
832
833        prefCheckComplexBeforeUpload = new JCheckBox();
834        prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true));
835        testPanel.add(prefCheckComplexBeforeUpload, a);
836
837        final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES);
838        sourcesList = new EditableList(tr("TagChecker source"));
839        sourcesList.setItems(sources);
840        testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0));
841        testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0));
842
843        ActionListener disableCheckActionListener = e -> handlePrefEnable();
844        prefCheckKeys.addActionListener(disableCheckActionListener);
845        prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener);
846        prefCheckComplex.addActionListener(disableCheckActionListener);
847        prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener);
848
849        handlePrefEnable();
850
851        prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true));
852        prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets."));
853        testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0));
854
855        prefCheckValuesBeforeUpload = new JCheckBox();
856        prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true));
857        testPanel.add(prefCheckValuesBeforeUpload, a);
858
859        prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true));
860        prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value."));
861        testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0));
862
863        prefCheckFixmesBeforeUpload = new JCheckBox();
864        prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true));
865        testPanel.add(prefCheckFixmesBeforeUpload, a);
866    }
867
868    /**
869     * Enables/disables the source list field
870     */
871    public void handlePrefEnable() {
872        boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected()
873                || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected();
874        sourcesList.setEnabled(selected);
875    }
876
877    @Override
878    public boolean ok() {
879        enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected();
880        testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected()
881                || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected();
882
883        Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected());
884        Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected());
885        Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected());
886        Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected());
887        Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected());
888        Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected());
889        Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected());
890        Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected());
891        return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems());
892    }
893
894    @Override
895    public Command fixError(TestError testError) {
896        List<Command> commands = new ArrayList<>(50);
897
898        Collection<? extends OsmPrimitive> primitives = testError.getPrimitives();
899        for (OsmPrimitive p : primitives) {
900            Map<String, String> tags = p.getKeys();
901            if (tags.isEmpty()) {
902                continue;
903            }
904
905            for (Entry<String, String> prop: tags.entrySet()) {
906                String key = prop.getKey();
907                String value = prop.getValue();
908                if (value == null || value.trim().isEmpty()) {
909                    commands.add(new ChangePropertyCommand(p, key, null));
910                } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains("  ")) {
911                    commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value)));
912                } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains("  ")) {
913                    commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key)));
914                } else {
915                    String evalue = Entities.unescape(value);
916                    if (!evalue.equals(value)) {
917                        commands.add(new ChangePropertyCommand(p, key, evalue));
918                    }
919                }
920            }
921        }
922
923        if (commands.isEmpty())
924            return null;
925        if (commands.size() == 1)
926            return commands.get(0);
927
928        return new SequenceCommand(tr("Fix tags"), commands);
929    }
930
931    @Override
932    public boolean isFixable(TestError testError) {
933        if (testError.getTester() instanceof TagChecker) {
934            int code = testError.getCode();
935            return code == EMPTY_VALUES || code == INVALID_SPACE ||
936                   code == INVALID_KEY_SPACE || code == INVALID_HTML ||
937                   code == MULTIPLE_SPACES;
938        }
939
940        return false;
941    }
942}