1 package de.dlr.shepard.common.util;
2
3 import org.jsoup.Jsoup;
4 import org.jsoup.safety.Safelist;
5
6 public class HtmlSanitizer {
7
8 /**
9 * Checks if the given Html string contains unwanted/ unsecure Html tags or attributes specified by the safelist.
10 * @param html
11 * @return boolean - true, if Html string does not contain any unwanted tags or attributes
12 */
13 public static boolean isSafeHtml(String html) {
14 return Jsoup.isValid(html, getSafeList());
15 }
16
17 /**
18 * Returns a sanitized version of the string passed into this function. The string is sanitized string only contains tags and attributes that are specified in safelist.
19 * The sanitized string still contains all values inside the tags. Only the tags and attributes are removed.
20 * @param html
21 * @return String - sanitized Html string
22 */
23 public static String cleanHtmlString(String html) {
24 return Jsoup.clean(html, getSafeList());
25 }
26
27 private static Safelist getSafeList() {
28 return Safelist.basicWithImages()
29 // Tags that are allowed in the 'basic' Safelist, but are unwanted
30 .removeTags("blockquote", "cite", "dl", "dt", "dd", "h4", "h5", "h6", "small", "sub", "sup", "tfoot", "q")
31 // Allow specific tags
32 .addTags("th", "thead", "tbody", "tr", "table", "td", "h1", "h2", "h3", "colgroup", "col", "strike", "s")
33 // Allow specific attributes
34 .addAttributes(":all", "style", "colspan", "rowspan")
35 .addAttributes("a", "target", "rel");
36 }
37 }