View Javadoc
1   package de.dlr.shepard.common.util;
2   
3   import static org.junit.jupiter.api.Assertions.assertEquals;
4   
5   import org.junit.jupiter.api.Test;
6   
7   public class HtmlSanitizerTest {
8   
9     @Test
10    public void testLinkHtmlSanitizing() {
11      String unsafeHtml = "<p><a href='http://example.com/' onclick='stealCookies()'>Link</a></p>";
12      String safeHtml = "<p><a href=\"http://example.com/\" rel=\"nofollow\">Link</a></p>";
13  
14      assertEquals(safeHtml, HtmlSanitizer.cleanHtmlString(unsafeHtml));
15    }
16  
17    @Test
18    public void testSimpleTextSanitizing() {
19      String textString = "This is my text that should not be changed by sanitizing.";
20  
21      assertEquals(textString, HtmlSanitizer.cleanHtmlString(textString));
22    }
23  
24    @Test
25    public void testScriptHtmlSanitizing() {
26      String unsafeHtml = "<p>Useful information, thanks!<script>alert('This is an injection!');</script></p>";
27      String safeHtml = "<p>Useful information, thanks!</p>";
28  
29      assertEquals(safeHtml, HtmlSanitizer.cleanHtmlString(unsafeHtml));
30    }
31  
32    @Test
33    public void testButtonOnClickHtmlSanitizing() {
34      String unsafeHtml = "<button onclick=\"alert('Button')\">Hello</button>";
35      String safeHtml = "Hello";
36  
37      assertEquals(safeHtml, HtmlSanitizer.cleanHtmlString(unsafeHtml));
38    }
39  
40    @Test
41    public void testImageHtmlSanitizing() {
42      // Only 'https://' img sources are allowed
43      String unsafeHtmlSrc = "<img src=\"test.jpg\" onerror=\"alert('XSS Hata!');\">";
44      String safeHtmlSrc = "<img>";
45  
46      assertEquals(safeHtmlSrc, HtmlSanitizer.cleanHtmlString(unsafeHtmlSrc));
47  
48      // No 'on-error' is allowed
49      String unsafeHtmlOnError = "<img src=\"https://my-website.xyz\" onerror=\"alert('XSS Hata!');\">";
50      String safeHtmlOnerror = "<img src=\"https://my-website.xyz\">";
51  
52      assertEquals(safeHtmlOnerror, HtmlSanitizer.cleanHtmlString(unsafeHtmlOnError));
53    }
54  
55    @Test
56    public void testTableHtmlSanitizing() {
57      // Testing the removal of a secure but unwanted Html tag
58      String unwantedHtmlCaptionTag =
59        """
60        <table>
61        <caption>Monthly savings</caption>
62        <tbody>
63        <tr>
64        <th>Month</th>
65        <th>Savings</th>
66        </tr>
67        <tr>
68        <td>January</td>
69        <td>$100</td>
70        </tr>
71        <tr>
72        <td>February</td>
73        <td>$50</td>
74        </tr>
75        </tbody>
76        </table>""";
77      String wantedHtmlCaptionTag =
78        """
79        <table>
80         Monthly savings
81         <tbody>
82          <tr>
83           <th>Month</th>
84           <th>Savings</th>
85          </tr>
86          <tr>
87           <td>January</td>
88           <td>$100</td>
89          </tr>
90          <tr>
91           <td>February</td>
92           <td>$50</td>
93          </tr>
94         </tbody>
95        </table>""";
96  
97      assertEquals(wantedHtmlCaptionTag, HtmlSanitizer.cleanHtmlString(unwantedHtmlCaptionTag));
98    }
99  
100   @Test
101   public void testSafeHtmlSanitizing() {
102     // have a safe Html string with wanted tags and dont remove anything
103     String htmlString =
104       """
105       <h1>My Heading</h1>
106       <p>This is a <b>paragraph</b> with <i>italic</i>, <u>underlined</u>, and <strike>strikethrough</strike> text.</p>
107       <p>Here is an <a href="https://example.com" rel="nofollow">example link</a>.</p>
108       <p>This line of text includes a <span>span element</span>.</p>
109       <p>Here is a line break<br>
110         like this.</p>
111       <p>You can include some <code>inline code</code> as well.</p>
112       <p>You can include some <s>strike format</s> as well.</p>
113       <ul>
114        <li>Unordered list item 1</li>
115        <li>Unordered list item 2</li>
116        <li>Unordered list item 3</li>
117       </ul>
118       <ol>
119        <li>Ordered list item 1</li>
120        <li>Ordered list item 2</li>
121        <li>Ordered list item 3</li>
122       </ol>""";
123 
124     assertEquals(true, HtmlSanitizer.isSafeHtml(htmlString));
125 
126     // This is the valid html string from the LabJournal integration test
127     String htmlString2 =
128       """
129       <h3>This is my heading</h3>
130       <p>Here some <strong>bold text</strong>, some <em>italic text</em>, some <u>underline text</u>, some <code>code text</code></p>
131       <p>left</p><p style="text-align: center">center</p><p style="text-align: right">right</p><p></p>
132       <p><a target="_blank" rel="noopener noreferrer nofollow" href="https://shepard.com">This is a link</a></p>
133       <p></p>
134       <ul><li><p>List 1</p><ul><li><p>List 2</p></li></ul></li></ul>
135       <ol><li><p>List 1.1</p><ol><li><p>List 2.2</p></li></ol></li></ol><p></p>
136       <table style="min-width: 75px"><colgroup><col style="min-width: 25px">
137       <col style="min-width: 25px">
138       <col style="min-width: 25px"></colgroup><tbody><tr><th colspan="1" rowspan="1"><p>1</p></th>
139       <th colspan="1" rowspan="1"><p>2</p></th><th colspan="1" rowspan="1"><p>3</p></th></tr><tr>
140       <td colspan="1" rowspan="1"><p>3</p></td><td colspan="1" rowspan="1"><p>2</p></td>
141       <td colspan="1" rowspan="1"><p>1</p></td></tr><tr><td colspan="1" rowspan="1"><p>c</p></td>
142       <td colspan="1" rowspan="1"><p>b</p></td><td colspan="1" rowspan="1"><p>a</p></td></tr></tbody></table>
143       """;
144     assertEquals(true, HtmlSanitizer.isSafeHtml(htmlString2));
145   }
146 
147   @Test
148   public void testInvalidHtmlUnclosedTagSanitizing() {
149     String htmlString = "<h1>My Heading";
150     String repairedHtmlString = "<h1>My Heading</h1>";
151 
152     assertEquals(false, HtmlSanitizer.isSafeHtml(htmlString));
153     assertEquals(repairedHtmlString, HtmlSanitizer.cleanHtmlString(htmlString));
154   }
155 
156   @Test
157   public void testUnSafeHtmlSanitizing() {
158     // This is the invalid html string from the LabJournal integration test
159     String htmlString1 =
160       """
161       <h1>This is my heading</h1>
162       <p>Here some <strong>bold text</strong>, some <em>italic text</em>, some <u>underline text</u>, some <code>code text</code></p>
163       <p>left</p><p style="text-align: center">center</p><p style="text-align: right">right</p><p></p>
164       <p><a target="_blank" rel="noopener noreferrer nofollow" href="https://shepard.com">This is a link</a></p>
165       <p></p>
166       <ul><li><p>List 1</p><ul><li><p>List 2</p></li></ul></li></ul>
167       <ol><li><p>List 1.1</p><ol><li><p>List 2.2</p></li></ol></li></ol><p></p>
168       <table style="min-width: 75px"><colgroup><col style="min-width: 25px">
169       <col style="min-width: 25px">
170       <script>alert("dangerous")</script>
171       """;
172     assertEquals(false, HtmlSanitizer.isSafeHtml(htmlString1));
173   }
174 
175   @Test
176   public void testKeepAlignment() {
177     // keep alignment attributes on HTML
178     String htmlString =
179       """
180       <p style="text-align: center">asdasdas</p>
181       <p style="text-align: right">my desription</p>
182       <p>is here</p>""";
183 
184     assertEquals(htmlString, HtmlSanitizer.cleanHtmlString(htmlString));
185   }
186 }