1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.tools.plugin.generator;
20
21 import org.codehaus.plexus.util.StringUtils;
22 import org.jsoup.Jsoup;
23 import org.jsoup.internal.StringUtil;
24 import org.jsoup.nodes.Document;
25 import org.jsoup.nodes.Element;
26 import org.jsoup.nodes.Node;
27 import org.jsoup.nodes.TextNode;
28 import org.jsoup.select.NodeTraversor;
29 import org.jsoup.select.NodeVisitor;
30
31
32
33
34
35
36
37 public class HtmlToPlainTextConverter implements Converter {
38 @Override
39 public String convert(String text) {
40 if (StringUtils.isBlank(text)) {
41 return text;
42 }
43 Document document = Jsoup.parse(text);
44 return getPlainText(document);
45 }
46
47
48
49
50
51
52
53 private String getPlainText(Element element) {
54 FormattingVisitor formatter = new FormattingVisitor();
55 NodeTraversor.traverse(formatter, element);
56
57 return formatter.toString();
58 }
59
60
61 private static class FormattingVisitor implements NodeVisitor {
62 private StringBuilder accum = new StringBuilder();
63
64
65 public void head(Node node, int depth) {
66 String name = node.nodeName();
67 if (node instanceof TextNode) {
68 accum.append(((TextNode) node).text());
69 } else if (name.equals("li")) {
70 accum.append("\n * ");
71 } else if (name.equals("dt")) {
72 accum.append(" ");
73 } else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
74 accum.append("\n");
75 }
76 }
77
78
79 public void tail(Node node, int depth) {
80 String name = node.nodeName();
81 if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) {
82 accum.append("\n");
83 } else if (name.equals("a")) {
84
85 String link = node.absUrl("href");
86 if (!link.isEmpty()) {
87 accum.append(String.format(" <%s>", link));
88 }
89 }
90 }
91
92 @Override
93 public String toString() {
94
95 return accum.toString().replaceAll(" +", " ").replace("\n ", "\n");
96 }
97 }
98 }