View Javadoc

1   /**
2    * 
3    */
4   package org.apache.maven.plugin.linkcheck.validation;
5   
6   import org.apache.commons.httpclient.Credentials;
7   import org.apache.commons.httpclient.Header;
8   import org.apache.commons.httpclient.HostConfiguration;
9   import org.apache.commons.httpclient.HttpClient;
10  import org.apache.commons.httpclient.HttpException;
11  import org.apache.commons.httpclient.HttpMethod;
12  import org.apache.commons.httpclient.HttpState;
13  import org.apache.commons.httpclient.HttpStatus;
14  import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
15  import org.apache.commons.httpclient.NTCredentials;
16  import org.apache.commons.httpclient.StatusLine;
17  import org.apache.commons.httpclient.UsernamePasswordCredentials;
18  import org.apache.commons.httpclient.methods.GetMethod;
19  import org.apache.commons.httpclient.methods.HeadMethod;
20  import org.apache.commons.logging.Log;
21  import org.apache.commons.logging.LogFactory;
22  
23  import java.io.IOException;
24  import java.net.URL;
25  
26  /**
27   * Checks links which are normal URLs
28   * 
29   * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
30   * @author <a href="mailto:aheritier@apache.org">Arnaud Heritier</a>
31   * @version $Id: HTTPLinkValidator.java 155277 2005-02-24 23:23:29Z aheritier $
32   */
33  public final class OnlineHTTPLinkValidator extends HTTPLinkValidator
34  {
35      /**
36       * Log for debug output
37       */
38      private final static Log LOG = LogFactory.getLog( OnlineHTTPLinkValidator.class );
39  
40      /** The maximum number of redirections for a link */
41      private final static int MAX_NB_REDIRECT = 10;
42  
43      /** Use the get method to test pages. */
44      private final static String GET_METHOD = "get";
45  
46      /** Use the head method to test pages. */
47      private final static String HEAD_METHOD = "head";
48  
49      private String proxyHost;
50  
51      private int proxyPort;
52  
53      private String proxyUser;
54  
55      private String proxyPass;
56  
57      private String proxyNtlmHost;
58  
59      private String proxyNtlmDomain;
60  
61      private String method = HEAD_METHOD;
62  
63      private transient HttpClient cl;
64  
65      public OnlineHTTPLinkValidator( String method, String proxyHost, String proxyPort, String proxyUser,
66                                      String proxyPass, String proxyNtlmHost, String proxyNtlmDomain )
67      {
68          if ( LOG.isDebugEnabled() )
69          {
70              LOG.debug( "Will use method : [" + method + "]" );
71          }
72          this.method = method;
73          if ( proxyHost == null || proxyHost.trim().equals( "" ) )
74          {
75              this.proxyHost = null;
76          }
77          else
78          {
79              this.proxyHost = proxyHost;
80              if ( proxyPort != null )
81              {
82                  try
83                  {
84                      this.proxyPort = Integer.parseInt( proxyPort );
85                  }
86                  catch ( NumberFormatException e )
87                  {
88                      LOG.warn( "Invalid number for Proxy Port:" + proxyPort );
89                      LOG.warn( "Proxy Port won't be used." );
90                      this.proxyPort = -1;
91                  }
92              }
93              this.proxyUser = proxyUser;
94              this.proxyPass = proxyPass;
95              if ( proxyNtlmHost != null && !proxyNtlmHost.trim().equals( "" ) )
96              {
97                  this.proxyNtlmHost = proxyNtlmHost;
98                  this.proxyNtlmDomain = proxyNtlmDomain;
99              }
100         }
101         initHttpClient();
102     }
103 
104     /**
105      * @see org.apache.maven.plugin.linkcheck.LinkValidator#validateLink(org.apache.maven.plugin.linkcheck.LinkValidationItem)
106      */
107     public LinkValidationResult validateLink( LinkValidationItem lvi )
108     {
109         if ( this.cl == null )
110         {
111             initHttpClient();
112         }
113         try
114         {
115             String link = lvi.getLink();
116             HttpMethod hm = null;
117             try
118             {
119                 hm = checkLink( this.cl, link, 0 );
120             }
121             catch ( Throwable t )
122             {
123                 if ( LOG.isDebugEnabled() )
124                 {
125                     LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]", t );
126                 }
127                 else
128                 {
129                     LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]" );
130                 }
131                 return new LinkValidationResult( LinkValidationResult.ERROR, false, t.getClass().getName() + " : "
132                                 + t.getMessage() );
133             }
134             if ( hm == null )
135             {
136                 return new LinkValidationResult( LinkValidationResult.ERROR, false, "Cannot retreive HTTP Status" );
137             }
138             if ( hm.getStatusCode() == HttpStatus.SC_OK )
139             {
140                 return new LinkValidationResult( LinkValidationResult.VALID, true, hm.getStatusCode() + " "
141                                 + hm.getStatusText() );
142             }
143             else
144             {
145                 // If there's a redirection ... add a warning
146                 if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
147                                 || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
148                                 || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
149                 {
150                     LOG.warn( "Received: [" + hm.getStatusCode() + "] for [" + lvi.getLink() + "] in page ["
151                                     + lvi.getSource() + "]" );
152                     return new LinkValidationResult( LinkValidationResult.WARNING, true, hm.getStatusCode() + " "
153                                     + hm.getStatusText() );
154                 }
155                 else
156                 {
157                     LOG.error( "Received: [" + hm.getStatusCode() + "] for [" + lvi.getLink() + "] in page ["
158                                     + lvi.getSource() + "]" );
159                     return new LinkValidationResult( LinkValidationResult.ERROR, false, hm.getStatusCode() + " "
160                                     + hm.getStatusText() );
161                 }
162             }
163 
164         }
165         catch ( Throwable t )
166         {
167             if ( LOG.isDebugEnabled() )
168             {
169                 LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]", t );
170             }
171             else
172             {
173                 LOG.error( "Received: [" + t + "] for [" + lvi.getLink() + "] in page [" + lvi.getSource() + "]" );
174             }
175             return new LinkValidationResult( LinkValidationResult.ERROR, false, t.getMessage() );
176         }
177     }
178 
179     private void initHttpClient()
180     {
181         LOG.debug( "A new HttpClient instance is needed ..." );
182         // Some web servers doesn't allow the default user-agent sent by httpClient
183         System.setProperty( "httpclient.useragent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)" );
184         this.cl = new HttpClient( new MultiThreadedHttpConnectionManager() );
185         HostConfiguration hc = new HostConfiguration();
186         HttpState state = new HttpState();
187         if ( this.proxyHost != null )
188         {
189             hc.setProxy( this.proxyHost, this.proxyPort );
190             if ( LOG.isDebugEnabled() )
191             {
192                 LOG.debug( "Proxy Host:" + this.proxyHost );
193                 LOG.debug( "Proxy Port:" + this.proxyPort );
194             }
195             if ( this.proxyUser != null && this.proxyPass != null )
196             {
197                 if ( LOG.isDebugEnabled() )
198                 {
199                     LOG.debug( "Proxy User:" + this.proxyUser );
200                 }
201                 Credentials credentials;
202                 if ( this.proxyNtlmHost != null )
203                 {
204                     credentials =
205                         new NTCredentials( this.proxyUser, this.proxyPass, this.proxyNtlmHost, this.proxyNtlmDomain );
206                 }
207                 else
208                 {
209                     credentials = new UsernamePasswordCredentials( this.proxyUser, this.proxyPass );
210                 }
211                 state.setProxyCredentials( null, null, credentials );
212             }
213 
214         }
215         else
216         {
217             LOG.debug( "Not using a proxy" );
218         }
219         this.cl.setHostConfiguration( hc );
220         this.cl.setState( state );
221         LOG.debug( "New HttpClient instance created." );
222     }
223 
224     private HttpMethod checkLink( HttpClient cl, String link, int nbRedirect ) throws HttpException, IOException
225     {
226         if ( nbRedirect > MAX_NB_REDIRECT )
227         {
228             throw new HttpException( "Maximum number of redirections (" + MAX_NB_REDIRECT + ") exceeded" );
229         }
230         HttpMethod hm;
231         if ( HEAD_METHOD.equals( this.method ) )
232         {
233             hm = new HeadMethod( link );
234         }
235         else
236         {
237             hm = new GetMethod( link );
238         }
239         try
240         {
241             // We want to do it manually
242             hm.setFollowRedirects( false );
243             URL url = new URL( link );
244             cl.getHostConfiguration().setHost( url.getHost(), url.getPort(), url.getProtocol() );
245             cl.executeMethod( hm );
246             StatusLine sl = hm.getStatusLine();
247 
248             if ( sl == null )
249             {
250                 LOG.error( "Unknown error validating link : " + link );
251                 return null;
252             }
253 
254             if ( hm.getStatusCode() == HttpStatus.SC_MOVED_PERMANENTLY
255                             || hm.getStatusCode() == HttpStatus.SC_MOVED_TEMPORARILY
256                             || hm.getStatusCode() == HttpStatus.SC_TEMPORARY_REDIRECT )
257             {
258                 Header locationHeader = hm.getResponseHeader( "location" );
259                 if ( locationHeader == null )
260                 {
261                     LOG.error( "Site sent redirect, but did not set Location header" );
262                     return hm;
263                 }
264                 else
265                 {
266                     String newLink = locationHeader.getValue();
267                     // Be careful to absolute/relative links
268                     if ( !newLink.startsWith( "http://" ) && !newLink.startsWith( "https://" ) )
269                     {
270                         if ( newLink.startsWith( "/" ) )
271                         {
272                             URL oldUrl = new URL( link );
273                             newLink =
274                                 oldUrl.getProtocol() + "://" + oldUrl.getHost()
275                                                 + ( oldUrl.getPort() > 0 ? ":" + oldUrl.getPort() : "" ) + newLink;
276                         }
277                         else
278                         {
279                             newLink = link + newLink;
280                         }
281                     }
282                     HttpMethod oldHm = hm;
283                     LOG.info( "[" + link + "] is redirected to [" + newLink + "]" );
284                     oldHm.releaseConnection();
285                     hm = checkLink( cl, newLink, nbRedirect + 1 );
286                     // Restore the hm to "Moved permanently" | "Moved temporarily" | "Temporary redirect"
287                     // if the new location is found to allow us to report it
288                     if ( hm.getStatusCode() == HttpStatus.SC_OK && nbRedirect == 0 )
289                     {
290                         return oldHm;
291                     }
292                 }
293             }
294 
295         }
296         finally
297         {
298             hm.releaseConnection();
299         }
300         return hm;
301     }
302 
303 }