1 package org.apache.maven.doxia.linkcheck;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.io.Writer;
25 import java.nio.charset.Charset;
26 import java.nio.charset.UnsupportedCharsetException;
27 import java.util.Arrays;
28 import java.util.Iterator;
29 import java.util.LinkedList;
30 import java.util.Set;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34
35 import org.apache.maven.doxia.linkcheck.model.LinkcheckFile;
36 import org.apache.maven.doxia.linkcheck.model.LinkcheckFileResult;
37 import org.apache.maven.doxia.linkcheck.model.LinkcheckModel;
38 import org.apache.maven.doxia.linkcheck.model.io.xpp3.LinkcheckModelXpp3Writer;
39 import org.apache.maven.doxia.linkcheck.validation.FileLinkValidator;
40 import org.apache.maven.doxia.linkcheck.validation.HTTPLinkValidationResult;
41 import org.apache.maven.doxia.linkcheck.validation.LinkValidationItem;
42 import org.apache.maven.doxia.linkcheck.validation.LinkValidationResult;
43 import org.apache.maven.doxia.linkcheck.validation.LinkValidatorManager;
44 import org.apache.maven.doxia.linkcheck.validation.MailtoLinkValidator;
45 import org.apache.maven.doxia.linkcheck.validation.OfflineHTTPLinkValidator;
46 import org.apache.maven.doxia.linkcheck.validation.OnlineHTTPLinkValidator;
47
48 import org.codehaus.plexus.util.FileUtils;
49 import org.codehaus.plexus.util.IOUtil;
50 import org.codehaus.plexus.util.ReaderFactory;
51 import org.codehaus.plexus.util.StringUtils;
52 import org.codehaus.plexus.util.WriterFactory;
53
54
55
56
57
58
59
60
61
62
63
64
65 public final class DefaultLinkCheck
66 implements LinkCheck
67 {
68
69 private static final Log LOG = LogFactory.getLog( DefaultLinkCheck.class );
70
71
72 private static final long MEG = 1024 * 1024;
73
74
75 private File basedir;
76
77
78 private File linkCheckCache;
79
80
81
82
83
84 private String[] excludedLinks = null;
85
86
87 private String[] excludedPages = null;
88
89
90
91
92
93
94 private int[] excludedHttpStatusErrors = null;
95
96
97
98
99
100
101 private int[] excludedHttpStatusWarnings = null;
102
103
104 private boolean online;
105
106
107 private HttpBean http;
108
109
110 private LinkValidatorManager lvm = null;
111
112
113 private File reportOutput;
114
115
116 private String reportOutputEncoding = "UTF-8";
117
118
119 private String baseURL;
120
121
122 private String encoding = ReaderFactory.UTF_8;
123
124
125
126
127
128
129 public void setBasedir( File base )
130 {
131 this.basedir = base;
132 }
133
134
135 public void setBaseURL( String url )
136 {
137 this.baseURL = url;
138 }
139
140
141 public void setExcludedHttpStatusErrors( int[] excl )
142 {
143 this.excludedHttpStatusErrors = excl;
144 }
145
146
147 public void setExcludedHttpStatusWarnings( int[] excl )
148 {
149 this.excludedHttpStatusWarnings = excl;
150 }
151
152
153 public void setExcludedLinks( String[] excl )
154 {
155 this.excludedLinks = excl;
156 }
157
158
159 public void setExcludedPages( String[] excl )
160 {
161 this.excludedPages = excl;
162 }
163
164
165 public void setHttp( HttpBean http )
166 {
167 this.http = http;
168 }
169
170
171 public void setLinkCheckCache( File cacheFile )
172 {
173 this.linkCheckCache = cacheFile;
174 }
175
176
177 public void setOnline( boolean onLine )
178 {
179 this.online = onLine;
180 }
181
182
183 public void setReportOutput( File file )
184 {
185 this.reportOutput = file;
186 }
187
188
189 public void setReportOutputEncoding( String encoding )
190 {
191 this.reportOutputEncoding = encoding;
192 }
193
194
195 public LinkcheckModel execute()
196 throws LinkCheckException
197 {
198 if ( this.basedir == null )
199 {
200 LOG.error( "No base directory specified!" );
201
202 throw new NullPointerException( "The basedir can't be null!" );
203 }
204
205 if ( this.reportOutput == null )
206 {
207 LOG.warn( "No output file specified! Results will not be written!" );
208 }
209
210 LinkcheckModel model = new LinkcheckModel();
211 model.setModelEncoding( reportOutputEncoding );
212 model.setFiles( new LinkedList() );
213
214 displayMemoryConsumption();
215
216 LinkValidatorManager validator = getLinkValidatorManager();
217 try
218 {
219 validator.loadCache( this.linkCheckCache );
220 }
221 catch ( IOException e )
222 {
223 throw new LinkCheckException( "Could not load cache: " + e.getMessage(), e );
224 }
225
226 displayMemoryConsumption();
227
228 LOG.info( "Begin to check links in files..." );
229
230 try
231 {
232 findAndCheckFiles( this.basedir, model );
233 }
234 catch ( IOException e )
235 {
236 throw new LinkCheckException( "Could not scan base directory: " + basedir.getAbsolutePath(), e );
237 }
238
239 LOG.info( "Links checked." );
240
241 displayMemoryConsumption();
242
243 try
244 {
245 createDocument( model );
246 }
247 catch ( IOException e )
248 {
249 throw new LinkCheckException( "Could not write the linkcheck document: " + e.getMessage(), e );
250 }
251
252 try
253 {
254 validator.saveCache( this.linkCheckCache );
255 }
256 catch ( IOException e )
257 {
258 throw new LinkCheckException( "Could not save cache: " + e.getMessage(), e );
259 }
260
261 displayMemoryConsumption();
262
263 return model;
264 }
265
266
267 public void setEncoding( String encoding )
268 {
269 if ( StringUtils.isEmpty( encoding ) )
270 {
271 throw new IllegalArgumentException( "encoding is required" );
272 }
273 try
274 {
275 Charset.forName( encoding );
276 }
277 catch ( UnsupportedCharsetException e )
278 {
279 throw new IllegalArgumentException( "encoding '" + encoding + "' is unsupported" );
280 }
281
282 this.encoding = encoding;
283 }
284
285
286
287
288
289
290
291
292
293
294 private boolean isOnline()
295 {
296 return this.online;
297 }
298
299
300
301
302
303
304
305
306 private String[] getExcludedLinks()
307 {
308 return this.excludedLinks;
309 }
310
311
312
313
314
315
316 private String getExcludedPages()
317 {
318 LinkedList patternList = new LinkedList( FileUtils.getDefaultExcludesAsList() );
319
320 if ( excludedPages != null )
321 {
322 patternList.addAll( Arrays.asList( excludedPages ) );
323 }
324
325 return StringUtils.join( patternList.iterator(), "," );
326 }
327
328
329
330
331
332
333 private String getIncludedPages()
334 {
335 return "**/*.html,**/*.htm";
336 }
337
338
339
340
341
342
343
344 private int[] getExcludedHttpStatusErrors()
345 {
346 return this.excludedHttpStatusErrors;
347 }
348
349
350
351
352
353
354
355 private int[] getExcludedHttpStatusWarnings()
356 {
357 return this.excludedHttpStatusWarnings;
358 }
359
360
361
362
363
364
365
366
367 private LinkValidatorManager getLinkValidatorManager()
368 {
369 if ( this.lvm == null )
370 {
371 initDefaultLinkValidatorManager();
372 }
373
374 return this.lvm;
375 }
376
377
378
379
380 private void initDefaultLinkValidatorManager()
381 {
382 this.lvm = new LinkValidatorManager();
383
384 if ( getExcludedLinks() != null )
385 {
386 this.lvm.setExcludedLinks( getExcludedLinks() );
387 }
388
389 this.lvm.addLinkValidator( new FileLinkValidator( encoding ) );
390
391 if ( isOnline() )
392 {
393 OnlineHTTPLinkValidator olv = new OnlineHTTPLinkValidator( http );
394
395 if ( this.baseURL != null )
396 {
397 olv.setBaseURL( baseURL );
398 }
399
400 this.lvm.addLinkValidator( olv );
401 }
402 else
403 {
404 this.lvm.addLinkValidator( new OfflineHTTPLinkValidator() );
405 }
406
407 this.lvm.addLinkValidator( new MailtoLinkValidator() );
408 }
409
410
411
412
413
414
415
416 private void findAndCheckFiles( File base, LinkcheckModel model )
417 throws IOException
418 {
419 Iterator files = FileUtils.getFiles( base, getIncludedPages(), getExcludedPages() ).iterator();
420
421 while( files.hasNext() )
422 {
423 checkFile( (File) files.next(), model );
424 }
425 }
426
427 private void checkFile( File file, LinkcheckModel model )
428 {
429 if ( LOG.isDebugEnabled() )
430 {
431 LOG.debug( " File - " + file );
432 }
433
434 String fileRelativePath = file.getAbsolutePath();
435
436 if ( fileRelativePath.startsWith( this.basedir.getAbsolutePath() ) )
437 {
438 fileRelativePath = fileRelativePath.substring( this.basedir.getAbsolutePath().length() + 1 );
439 }
440
441 fileRelativePath = fileRelativePath.replace( '\\', '/' );
442
443 LinkcheckFile linkcheckFile = new LinkcheckFile();
444 linkcheckFile.setAbsolutePath( file.getAbsolutePath() );
445 linkcheckFile.setRelativePath( fileRelativePath );
446
447 check( linkcheckFile );
448
449 model.addFile( linkcheckFile );
450
451 if ( ( model.getFiles().size() % 100 == 0 ) && LOG.isInfoEnabled() )
452 {
453 LOG.info( "Found " + model.getFiles().size() + " files so far." );
454 }
455 }
456
457
458
459
460
461
462 private void check( LinkcheckFile linkcheckFile )
463 {
464 linkcheckFile.setSuccessful( 0 );
465
466 linkcheckFile.setUnsuccessful( 0 );
467
468 if ( LOG.isDebugEnabled() )
469 {
470 LOG.debug( "Validating " + linkcheckFile.getRelativePath() );
471 }
472
473 final Set hrefs;
474
475 try
476 {
477 hrefs = LinkMatcher.match( new File( linkcheckFile.getAbsolutePath() ), encoding );
478 }
479 catch ( Throwable t )
480 {
481
482
483
484 LOG.error( "Received: [" + t + "] in page [" + linkcheckFile.getRelativePath() + "]" );
485 LOG.debug( t.getMessage(), t );
486
487 LinkcheckFileResult lcr = new LinkcheckFileResult();
488
489 lcr.setStatus( "PARSE FAILURE" );
490
491 lcr.setTarget( "N/A" );
492
493 linkcheckFile.addResult( lcr );
494
495 return;
496 }
497
498 String href;
499 LinkcheckFileResult lcr;
500 LinkValidationItem lvi;
501 LinkValidationResult result;
502
503 for ( Iterator iter = hrefs.iterator(); iter.hasNext(); )
504 {
505 href = (String) iter.next();
506
507 lcr = new LinkcheckFileResult();
508 lvi = new LinkValidationItem( new File( linkcheckFile.getAbsolutePath() ), href );
509 result = lvm.validateLink( lvi );
510 lcr.setTarget( href );
511 lcr.setErrorMessage( result.getErrorMessage() );
512
513 switch ( result.getStatus() )
514 {
515 case LinkcheckFileResult.VALID_LEVEL:
516 linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
517
518 lcr.setStatus( LinkcheckFileResult.VALID );
519
520
521 linkcheckFile.addResult( lcr );
522
523 break;
524 case LinkcheckFileResult.ERROR_LEVEL:
525 boolean ignoredError = false;
526 if ( result instanceof HTTPLinkValidationResult )
527 {
528 HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
529
530 if ( httpResult.getHttpStatusCode() > 0
531 && getExcludedHttpStatusErrors() != null
532 && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
533 toStringArray( getExcludedHttpStatusErrors() ) ) >= 0 )
534 {
535 ignoredError = true;
536 }
537 }
538
539 if ( ignoredError )
540 {
541 linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
542 }
543 else
544 {
545 linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
546 }
547
548 lcr.setStatus( ignoredError ? LinkcheckFileResult.VALID : LinkcheckFileResult.ERROR );
549
550 linkcheckFile.addResult( lcr );
551
552 break;
553 case LinkcheckFileResult.WARNING_LEVEL:
554 boolean ignoredWarning = false;
555 if ( result instanceof HTTPLinkValidationResult )
556 {
557 HTTPLinkValidationResult httpResult = (HTTPLinkValidationResult) result;
558
559 if ( httpResult.getHttpStatusCode() > 0
560 && getExcludedHttpStatusWarnings() != null
561 && StringUtils.indexOfAny( String.valueOf( httpResult.getHttpStatusCode() ),
562 toStringArray( getExcludedHttpStatusWarnings() ) ) >= 0 )
563 {
564 ignoredWarning = true;
565 }
566 }
567
568 if ( ignoredWarning )
569 {
570 linkcheckFile.setSuccessful( linkcheckFile.getSuccessful() + 1 );
571 }
572 else
573 {
574 linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
575 }
576
577 lcr.setStatus( ignoredWarning ? LinkcheckFileResult.VALID : LinkcheckFileResult.WARNING );
578
579 linkcheckFile.addResult( lcr );
580
581 break;
582 case LinkcheckFileResult.UNKNOWN_LEVEL:
583 default:
584 linkcheckFile.setUnsuccessful( linkcheckFile.getUnsuccessful() + 1 );
585
586 lcr.setStatus( LinkcheckFileResult.UNKNOWN );
587
588 linkcheckFile.addResult( lcr );
589
590 break;
591 }
592 }
593
594 href = null;
595 lcr = null;
596 lvi = null;
597 result = null;
598 }
599
600
601
602
603 private void displayMemoryConsumption()
604 {
605 if ( LOG.isDebugEnabled() )
606 {
607 Runtime r = Runtime.getRuntime();
608 LOG.debug( "Memory: " + ( r.totalMemory() - r.freeMemory() ) / MEG + "M/" + r.totalMemory() / MEG
609 + "M" );
610 }
611 }
612
613
614
615
616
617
618 private void createDocument( LinkcheckModel model )
619 throws IOException
620 {
621 if ( this.reportOutput == null )
622 {
623 return;
624 }
625
626 File dir = this.reportOutput.getParentFile();
627 if ( dir != null )
628 {
629 dir.mkdirs();
630 }
631
632 Writer writer = null;
633 LinkcheckModelXpp3Writer xpp3Writer = new LinkcheckModelXpp3Writer();
634 try
635 {
636 writer = WriterFactory.newXmlWriter( this.reportOutput );
637 xpp3Writer.write( writer, model );
638 }
639 catch ( IllegalStateException e )
640 {
641 IOException ioe =
642 new IOException( e.getMessage() + " Maybe try to specify an other encoding instead of '"
643 + encoding + "'." );
644 ioe.initCause( e );
645 throw ioe;
646 }
647 finally
648 {
649 IOUtil.close( writer );
650 }
651
652 dir = null;
653 }
654
655 private static String[] toStringArray( int[] array )
656 {
657 if ( array == null )
658 {
659 throw new IllegalArgumentException( "array could not be null" );
660 }
661
662 String[] result = new String[array.length];
663 for ( int i = 0; i < array.length; i++ )
664 {
665 result[i] = String.valueOf( array[i] );
666 }
667 return result;
668 }
669 }