View Javadoc
1   package de.dlr.shepard.common.util;
2   
3   import org.jsoup.Jsoup;
4   import org.jsoup.safety.Safelist;
5   
6   public class HtmlSanitizer {
7   
8     /**
9      * Checks if the given Html string contains unwanted/ unsecure Html tags or attributes specified by the safelist.
10     * @param html
11     * @return boolean - true, if Html string does not contain any unwanted tags or attributes
12     */
13    public static boolean isSafeHtml(String html) {
14      return Jsoup.isValid(html, getSafeList());
15    }
16  
17    /**
18     * Returns a sanitized version of the string passed into this function. The string is sanitized string only contains tags and attributes that are specified in safelist.
19     * The sanitized string still contains all values inside the tags. Only the tags and attributes are removed.
20     * @param html
21     * @return String - sanitized Html string
22     */
23    public static String cleanHtmlString(String html) {
24      return Jsoup.clean(html, getSafeList());
25    }
26  
27    private static Safelist getSafeList() {
28      return Safelist.basicWithImages()
29        // Tags that are allowed in the 'basic' Safelist, but are unwanted
30        .removeTags("blockquote", "cite", "dl", "dt", "dd", "h4", "h5", "h6", "small", "sub", "sup", "tfoot", "q")
31        // Allow specific tags
32        .addTags("th", "thead", "tbody", "tr", "table", "td", "h1", "h2", "h3", "colgroup", "col", "strike", "s")
33        // Allow specific attributes
34        .addAttributes(":all", "style", "colspan", "rowspan")
35        .addAttributes("a", "target", "rel");
36    }
37  }