HtmlSanitizer.java
package de.dlr.shepard.util;
import org.jsoup.Jsoup;
import org.jsoup.safety.Safelist;
public class HtmlSanitizer {
/**
* Checks if the given Html string contains unwanted/ unsecure Html tags or attributes specified by the safelist.
* @param html
* @return boolean - true, if Html string does not contain any unwanted tags or attributes
*/
public static boolean isSafeHtml(String html) {
return Jsoup.isValid(html, getSafeList());
}
/**
* Returns a sanitized version of the string passed into this function. The string is sanitized string only contains tags and attributes that are specified in safelist.
* The sanitized string still contains all values inside the tags. Only the tags and attributes are removed.
* @param html
* @return String - sanitized Html string
*/
public static String cleanHtmlString(String html) {
return Jsoup.clean(html, getSafeList());
}
private static Safelist getSafeList() {
return Safelist.basicWithImages()
// Tags that are allowed in the 'basic' Safelist, but are unwanted
.removeTags("blockquote", "cite", "dl", "dt", "dd", "h4", "h5", "h6", "small", "sub", "sup", "tfoot", "q")
// Allow specific tags
.addTags("th", "thead", "tbody", "tr", "table", "td", "h1", "h2", "h3", "colgroup", "col", "strike", "s")
// Allow specific attributes
.addAttributes(":all", "style", "colspan", "rowspan")
.addAttributes("a", "target", "rel");
}
}