1 /**
2 *
3 */
4 package org.apache.maven.plugin.linkcheck.validation;
5
6 import org.apache.commons.httpclient.Credentials;
7 import org.apache.commons.httpclient.Header;
8 import org.apache.commons.httpclient.HostConfiguration;
9 import org.apache.commons.httpclient.HttpClient;
10 import org.apache.commons.httpclient.HttpException;
11 import org.apache.commons.httpclient.HttpMethod;
12 import org.apache.commons.httpclient.HttpState;
13 import org.apache.commons.httpclient.HttpStatus;
14 import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
15 import org.apache.commons.httpclient.NTCredentials;
16 import org.apache.commons.httpclient.StatusLine;
17 import org.apache.commons.httpclient.UsernamePasswordCredentials;
18 import org.apache.commons.httpclient.methods.GetMethod;
19 import org.apache.commons.httpclient.methods.HeadMethod;
20 import org.apache.commons.logging.Log;
21 import org.apache.commons.logging.LogFactory;
22
23 import java.io.IOException;
24 import java.net.URL;
25
26 /**
27 * Checks links which are normal URLs
28 *
29 * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
30 * @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a>
31 * @version $Id: HTTPLinkValidator.java 155277 2005-02-24 23:23:29Z aheritier $
32 */
33 public final class OnlineHTTPLinkValidator extends HTTPLinkValidator
34 {
35 /**
36 * Log for debug output
37 */
38 private final static Log LOG = LogFactory.getLog( OnlineHTTPLinkValidator.class );
39
40 /** The maximum number of redirections for a link */
41 private final static int MAX_NB_REDIRECT = 10;
42
43 /** Use the get method to test pages. */
44 private final static String GET_METHOD = "get";
45
46 /** Use the head method to test pages. */
47 private final static String HEAD_METHOD = "head";
48
49 private String proxyHost;
50
51 private int proxyPort;
52
53 private String proxyUser;
54
55 private String proxyPass;
56
57 private String proxyNtlmHost;
58
59 private String proxyNtlmDomain;
60
61 private String method = HEAD_METHOD;
62
63 private transient HttpClient cl;
64
65 public OnlineHTTPLinkValidator( String method, String proxyHost, String proxyPort, String proxyUser,
66 String proxyPass, String proxyNtlmHost, String proxyNtlmDomain )
67 {
68 if ( LOG.isDebugEnabled() )
69 {
70 LOG.debug( "Will use method : [" + method + "]" );
71 }
72 this.method = method;
73 if ( proxyHost == null || proxyHost.trim().equals( "" ) )
74 {
75 this.proxyHost = null;
76 }
77 else
78 {
79 this.proxyHost = proxyHost;
80 if ( proxyPort != null )
81 {
82 try
83 {
84 this.proxyPort = Integer.parseInt( proxyPort );
85 }
86 catch ( NumberFormatException e )
87 {
88 LOG.warn( "Invalid number for Proxy Port:" + proxyPort );
89 LOG.warn( "Proxy Port won't be used." );
90 this.proxyPort = -1;
91 }
92 }
93 this.proxyUser = proxyUser;
94 this.proxyPass = proxyPass;
95 if ( proxyNtlmHost != null && !proxyNtlmHost.trim().equals( "" ) )
96 {
97 this.proxyNtlmHost = proxyNtlmHost;
98 this.proxyNtlmDomain = proxyNtlmDomain;
99 }
100 }
101 initHttpClient();
102 }
103
104 /**
105 * @see org.apache.maven.plugin.linkcheck.LinkValidator#validateLink(org.apache.maven.plugin.linkcheck.LinkValidationItem)
106 */
107 public LinkValidationResult validateLink( LinkValidationItem lvi )
108 {
109 if ( this.cl == null )
110 {
111 initHttpClient();
112 }
113 try
114 {
115 String link = lvi.getLink();
116 HttpMethod hm = null;
117 try
118 {
119 hm = checkLink( this.cl, link, 0 );
120 }
121 catch ( Throwable t )
122 {
123 if ( LOG.isDebugEnabled() )
124 {
125 LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]", t );
126 }
127 else
128 {
129 LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]" );
130 }
131 return new LinkValidationResult( LinkValidationResult.ERROR, false, t.getClass().getName() + " : "
132 + t.getMessage() );
133 }
134 if ( hm == null )
135 {
136 return new LinkValidationResult( LinkValidationResult.ERROR, false, "Cannot retreive HTTP Status" );
137 }
138 if ( hm.getStatusCode() == HttpStatus.SC_OK )
139 {
140 return new LinkValidationResult( LinkValidationResult.VALID, true, hm.getStatusCode() + " "
141 + hm.getStatusText() );
142 }
143 else
144 {
145
146 if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
147 || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
148 || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
149 {
150 LOG.warn( "Received: [" + hm.getStatusCode() + "] for [" + lvi.getLink() + "] in page ["
151 + lvi.getSource() + "]" );
152 return new LinkValidationResult( LinkValidationResult.WARNING, true, hm.getStatusCode() + " "
153 + hm.getStatusText() );
154 }
155 else
156 {
157 LOG.error( "Received: [" + hm.getStatusCode() + "] for [" + lvi.getLink() + "] in page ["
158 + lvi.getSource() + "]" );
159 return new LinkValidationResult( LinkValidationResult.ERROR, false, hm.getStatusCode() + " "
160 + hm.getStatusText() );
161 }
162 }
163
164 }
165 catch ( Throwable t )
166 {
167 if ( LOG.isDebugEnabled() )
168 {
169 LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]", t );
170 }
171 else
172 {
173 LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]" );
174 }
175 return new LinkValidationResult( LinkValidationResult.ERROR, false, t.getMessage() );
176 }
177 }
178
179 private void initHttpClient()
180 {
181 LOG.debug( "A new HttpClient instance is needed ..." );
182
183 System.setProperty( "httpclient.useragent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
184 this.cl = new HttpClient( new MultiThreadedHttpConnectionManager() );
185 HostConfiguration hc = new HostConfiguration();
186 HttpState state = new HttpState();
187 if ( this.proxyHost != null )
188 {
189 hc.setProxy( this.proxyHost, this.proxyPort );
190 if ( LOG.isDebugEnabled() )
191 {
192 LOG.debug( "Proxy Host:" + this.proxyHost );
193 LOG.debug( "Proxy Port:" + this.proxyPort );
194 }
195 if ( this.proxyUser != null && this.proxyPass != null )
196 {
197 if ( LOG.isDebugEnabled() )
198 {
199 LOG.debug( "Proxy User:" + this.proxyUser );
200 }
201 Credentials credentials;
202 if ( this.proxyNtlmHost != null )
203 {
204 credentials =
205 new NTCredentials( this.proxyUser, this.proxyPass, this.proxyNtlmHost, this.proxyNtlmDomain );
206 }
207 else
208 {
209 credentials = new UsernamePasswordCredentials( this.proxyUser, this.proxyPass );
210 }
211 state.setProxyCredentials( null, null, credentials );
212 }
213
214 }
215 else
216 {
217 LOG.debug( "Not using a proxy" );
218 }
219 this.cl.setHostConfiguration( hc );
220 this.cl.setState( state );
221 LOG.debug( "New HttpClient instance created." );
222 }
223
224 private HttpMethod checkLink( HttpClient cl, String link, int nbRedirect ) throws HttpException, IOException
225 {
226 if ( nbRedirect > MAX_NB_REDIRECT )
227 {
228 throw new HttpException( "Maximum number of redirections (" + MAX_NB_REDIRECT + ") exceeded" );
229 }
230 HttpMethod hm;
231 if ( HEAD_METHOD.equals( this.method ) )
232 {
233 hm = new HeadMethod( link );
234 }
235 else
236 {
237 hm = new GetMethod( link );
238 }
239 try
240 {
241
242 hm.setFollowRedirects( false );
243 URL url = new URL( link );
244 cl.getHostConfiguration().setHost( url.getHost(), url.getPort(), url.getProtocol() );
245 cl.executeMethod( hm );
246 StatusLine sl = hm.getStatusLine();
247
248 if ( sl == null )
249 {
250 LOG.error( "Unknown error validating link : " + link );
251 return null;
252 }
253
254 if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
255 || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
256 || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
257 {
258 Header locationHeader = hm.getResponseHeader( "location" );
259 if ( locationHeader == null )
260 {
261 LOG.error( "Site sent redirect, but did not set Location header" );
262 return hm;
263 }
264 else
265 {
266 String newLink = locationHeader.getValue();
267
268 if ( !newLink.startsWith( "http://" ) && !newLink.startsWith( "https://" ) )
269 {
270 if ( newLink.startsWith( "/" ) )
271 {
272 URL oldUrl = new URL( link );
273 newLink =
274 oldUrl.getProtocol() + "://" + oldUrl.getHost()
275 + ( oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "" ) + newLink;
276 }
277 else
278 {
279 newLink = link + newLink;
280 }
281 }
282 HttpMethod oldHm = hm;
283 LOG.info( "[" + link + "] is redirected to [" + newLink + "]" );
284 oldHm.releaseConnection();
285 hm = checkLink( cl, newLink, nbRedirect + 1 );
286
287
288 if ( hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0 )
289 {
290 return oldHm;
291 }
292 }
293 }
294
295 }
296 finally
297 {
298 hm.releaseConnection();
299 }
300 return hm;
301 }
302
303 }