001package org.apache.maven.scm.provider.git.gitexe.command.changelog;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 * http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import org.apache.maven.scm.ChangeFile;
023import org.apache.maven.scm.ChangeSet;
024import org.apache.maven.scm.ScmFileStatus;
025import org.apache.maven.scm.log.ScmLogger;
026import org.apache.maven.scm.util.AbstractConsumer;
027import org.apache.regexp.RE;
028import org.apache.regexp.RESyntaxException;
029
030import java.util.ArrayList;
031import java.util.Calendar;
032import java.util.Date;
033import java.util.List;
034import java.util.Locale;
035import java.util.TimeZone;
036
037/**
038 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
039 * @author Olivier Lamy
040 *
041 */
042public class GitChangeLogConsumer
043    extends AbstractConsumer
044{
045    /**
046     * Date formatter for git timestamp
047     * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
048     */
049    private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
050
051    /**
052     * State machine constant: expecting header
053     */
054    private static final int STATUS_GET_HEADER = 1;
055
056    /**
057     * State machine constant: expecting author information
058     */
059    private static final int STATUS_GET_AUTHOR = 2;
060
061    /**
062     * State machine constant: expecting parent hash information
063     */
064    private static final int STATUS_RAW_TREE = 21;
065
066    /**
067     * State machine constant: expecting parent hash information
068     */
069    private static final int STATUS_RAW_PARENT = 22;
070
071    /**
072     * State machine constant: expecting author name, email and timestamp information
073     */
074    private static final int STATUS_RAW_AUTHOR = 23;
075
076    /**
077     * State machine constant: expecting committer name, email and timestamp information
078     */
079    private static final int STATUS_RAW_COMMITTER = 24;
080
081    /**
082     * State machine constant: expecting date information
083     */
084    private static final int STATUS_GET_DATE = 3;
085
086    /**
087     * State machine constant: expecting file information
088     */
089    private static final int STATUS_GET_FILE = 4;
090
091    /**
092     * State machine constant: expecting comments
093     */
094    private static final int STATUS_GET_COMMENT = 5;
095
096    /**
097     * The pattern used to match git header lines
098     */
099    private static final String HEADER_PATTERN = "^commit (.*)";
100
101    /**
102     * The pattern used to match git author lines
103     */
104    private static final String AUTHOR_PATTERN = "^Author: (.*)";
105
106    /**
107     * The pattern used to match git tree hash lines (raw mode)
108     */
109    private static final String RAW_TREE_PATTERN = "^tree ([:xdigit:]+)";
110
111    /**
112     * The pattern used to match git parent hash lines (raw mode)
113     */
114    private static final String RAW_PARENT_PATTERN = "^parent ([:xdigit:]+)";
115
116    /**
117     * The pattern used to match git author lines (raw mode)
118     */
119    private static final String RAW_AUTHOR_PATTERN = "^author (.+ <.+>) ([:digit:]+) (.*)";
120
121    /**
122     * The pattern used to match git author lines (raw mode)
123     */
124    private static final String RAW_COMMITTER_PATTERN = "^committer (.+ <.+>) ([:digit:]+) (.*)";
125
126    /**
127     * The pattern used to match git date lines
128     */
129    private static final String DATE_PATTERN = "^Date:\\s*(.*)";
130
131    /**
132     * The pattern used to match git file lines
133     */
134    private static final String FILE_PATTERN =
135        "^:\\d* \\d* [:xdigit:]*\\.* [:xdigit:]*\\.* ([:upper:])[:digit:]*\\t([^\\t]*)(\\t(.*))?";
136
137    /**
138     * Current status of the parser
139     */
140    private int status = STATUS_GET_HEADER;
141
142    /**
143     * List of change log entries
144     */
145    private List<ChangeSet> entries = new ArrayList<ChangeSet>();
146
147    /**
148     * The current log entry being processed by the parser
149     */
150    private ChangeSet currentChange;
151
152    /**
153     * The current revision of the entry being processed by the parser
154     */
155    private String currentRevision;
156
157    /**
158     * The current comment of the entry being processed by the parser
159     */
160    private StringBuilder currentComment;
161
162    /**
163     * The regular expression used to match header lines
164     */
165    private RE headerRegexp;
166
167    /**
168     * The regular expression used to match author lines
169     */
170    private RE authorRegexp;
171
172    /**
173     * The regular expression used to match tree hash lines in raw mode
174     */
175    private RE rawTreeRegexp;
176
177    /**
178     * The regular expression used to match parent hash lines in raw mode
179     */
180    private RE rawParentRegexp;
181
182    /**
183     * The regular expression used to match author lines in raw mode
184     */
185    private RE rawAuthorRegexp;
186
187    /**
188     * The regular expression used to match committer lines in raw mode
189     */
190    private RE rawCommitterRegexp;
191
192    /**
193     * The regular expression used to match date lines
194     */
195    private RE dateRegexp;
196
197    /**
198     * The regular expression used to match file lines
199     */
200    private RE fileRegexp;
201
202    private String userDateFormat;
203
204    /**
205     * Default constructor.
206     */
207    public GitChangeLogConsumer( ScmLogger logger, String userDateFormat )
208    {
209        super( logger );
210
211        this.userDateFormat = userDateFormat;
212
213        try
214        {
215            headerRegexp = new RE( HEADER_PATTERN );
216            authorRegexp = new RE( AUTHOR_PATTERN );
217            dateRegexp = new RE( DATE_PATTERN );
218            fileRegexp = new RE( FILE_PATTERN );
219            rawTreeRegexp = new RE( RAW_TREE_PATTERN );
220            rawParentRegexp = new RE( RAW_PARENT_PATTERN );
221            rawAuthorRegexp = new RE( RAW_AUTHOR_PATTERN );
222            rawCommitterRegexp = new RE( RAW_COMMITTER_PATTERN );
223        }
224        catch ( RESyntaxException ex )
225        {
226            throw new RuntimeException(
227                "INTERNAL ERROR: Could not create regexp to parse git log file. This shouldn't happen. Something is probably wrong with the oro installation.",
228                ex );
229        }
230    }
231
232    public List<ChangeSet> getModifications()
233    {
234        // this is needed since the processFile does not always get a the end-sequence correctly.
235        processGetFile( "" );
236
237        return entries;
238    }
239
240    // ----------------------------------------------------------------------
241    // StreamConsumer Implementation
242    // ----------------------------------------------------------------------
243
244    /**
245     * {@inheritDoc}
246     */
247    public void consumeLine( String line )
248    {
249        switch ( status )
250        {
251            case STATUS_GET_HEADER:
252                processGetHeader( line );
253                break;
254            case STATUS_GET_AUTHOR:
255                processGetAuthor( line );
256                break;
257            case STATUS_GET_DATE:
258                processGetDate( line, null );
259                break;
260            case STATUS_GET_COMMENT:
261                processGetComment( line );
262                break;
263            case STATUS_GET_FILE:
264                processGetFile( line );
265                break;
266            case STATUS_RAW_TREE:
267                processGetRawTree( line );
268                break;
269            case STATUS_RAW_PARENT:
270                processGetRawParent( line );
271                break;
272            case STATUS_RAW_AUTHOR:
273                processGetRawAuthor( line );
274                break;
275            case STATUS_RAW_COMMITTER:
276                processGetRawCommitter( line );
277                break;
278            default:
279                throw new IllegalStateException( "Unknown state: " + status );
280        }
281    }
282
283    // ----------------------------------------------------------------------
284    //
285    // ----------------------------------------------------------------------
286
287    /**
288     * Process the current input line in the GET_HEADER state.  The
289     * author, date, and the revision of the entry are gathered.  Note,
290     * Git does not have per-file revisions, instead, the entire
291     * branch is given a single revision number, which is also used for
292     * the revision number of each file.
293     *
294     * @param line A line of text from the git log output
295     */
296    private void processGetHeader( String line )
297    {
298        if ( !headerRegexp.match( line ) )
299        {
300            return;
301        }
302
303        currentRevision = headerRegexp.getParen( 1 );
304
305        currentChange = new ChangeSet();
306
307        currentChange.setRevision( currentRevision );
308
309        status = STATUS_GET_AUTHOR;
310    }
311
312    /**
313     * Process the current input line in the STATUS_GET_AUTHOR state.  This
314     * state gathers all of the author information that are part of a log entry.
315     *
316     * @param line a line of text from the git log output
317     */
318    private void processGetAuthor( String line )
319    {
320        // this autodetects 'raw' format
321        if ( rawTreeRegexp.match( line ) )
322        {
323            status = STATUS_RAW_TREE;
324            processGetRawTree( line );
325            return;
326        }
327
328        if ( !authorRegexp.match( line ) )
329        {
330            return;
331        }
332        String author = authorRegexp.getParen( 1 );
333
334        currentChange.setAuthor( author );
335
336        status = STATUS_GET_DATE;
337    }
338
339    /**
340     * Process the current input line in the STATUS_RAW_TREE state.  This
341     * state gathers tree hash part of a log entry.
342     *
343     * @param line a line of text from the git log output
344     */
345    private void processGetRawTree( String line )
346    {
347        if ( !rawTreeRegexp.match( line ) )
348        {
349            return;
350        }
351        //here we could set treeHash if it appears in the model: currentChange.setTreeHash( rawTreeRegexp.getParen( 1 ) );
352        status = STATUS_RAW_PARENT;
353    }
354
355    /**
356     * Process the current input line in the STATUS_RAW_PARENT state.  This
357     * state gathers parent revisions of a log entry.
358     *
359     * @param line a line of text from the git log output
360     */
361    private void processGetRawParent( String line )
362    {
363        if ( !rawParentRegexp.match( line ) )
364        {
365            status = STATUS_RAW_AUTHOR;
366            processGetRawAuthor( line );
367            return;
368        }
369        String parentHash = rawParentRegexp.getParen( 1 );
370
371        addParentRevision( parentHash );
372    }
373
374    /**
375     * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the log.
376     * This method takes care of the difference.
377     *
378     * @param hash -
379     */
380    private void addParentRevision( String hash )
381    {
382        if ( currentChange.getParentRevision() == null )
383        {
384            currentChange.setParentRevision( hash );
385        }
386        else
387        {
388            currentChange.addMergedRevision( hash );
389        }
390    }
391
392    /**
393     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
394     * state gathers all the author information of a log entry.
395     *
396     * @param line a line of text from the git log output
397     */
398    private void processGetRawAuthor( String line )
399    {
400        if ( !rawAuthorRegexp.match( line ) )
401        {
402            return;
403        }
404        String author = rawAuthorRegexp.getParen( 1 );
405        currentChange.setAuthor( author );
406
407        String datestring = rawAuthorRegexp.getParen( 2 );
408        String tz = rawAuthorRegexp.getParen( 3 );
409
410        // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
411        // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
412        Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
413        c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
414        currentChange.setDate( c.getTime() );
415
416        status = STATUS_RAW_COMMITTER;
417    }
418
419    /**
420     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
421     * state gathers all the committer information of a log entry.
422     *
423     * @param line a line of text from the git log output
424     */
425    private void processGetRawCommitter( String line )
426    {
427        if ( !rawCommitterRegexp.match( line ) )
428        {
429            return;
430        }
431        // here we could set committer and committerDate, the same way as in processGetRawAuthor
432        status = STATUS_GET_COMMENT;
433    }
434
435    /**
436     * Process the current input line in the STATUS_GET_DATE state.  This
437     * state gathers all of the date information that are part of a log entry.
438     *
439     * @param line a line of text from the git log output
440     */
441    private void processGetDate( String line, Locale locale )
442    {
443        if ( !dateRegexp.match( line ) )
444        {
445            return;
446        }
447
448        String datestring = dateRegexp.getParen( 1 );
449
450        Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
451
452        currentChange.setDate( date );
453
454        status = STATUS_GET_COMMENT;
455    }
456
457    /**
458     * Process the current input line in the GET_COMMENT state.  This
459     * state gathers all of the comments that are part of a log entry.
460     *
461     * @param line a line of text from the git log output
462     */
463    private void processGetComment( String line )
464    {
465        if ( line.length() < 4 )
466        {
467            if ( currentComment == null )
468            {
469                currentComment = new StringBuilder();
470            }
471            else
472            {
473                currentChange.setComment( currentComment.toString() );
474                status = STATUS_GET_FILE;
475            }
476        }
477        else
478        {
479            if ( currentComment.length() > 0 )
480            {
481                currentComment.append( '\n' );
482            }
483
484            currentComment.append( line.substring( 4 ) );
485        }
486    }
487
488    /**
489     * Process the current input line in the GET_FILE state.  This state
490     * adds each file entry line to the current change log entry.  Note,
491     * the revision number for the entire entry is used for the revision
492     * number of each file.
493     *
494     * @param line A line of text from the git log output
495     */
496    private void processGetFile( String line )
497    {
498        if ( line.length() == 0 )
499        {
500            if ( currentChange != null )
501            {
502                entries.add( currentChange );
503            }
504
505            resetChangeLog();
506
507            status = STATUS_GET_HEADER;
508        }
509        else
510        {
511            if ( !fileRegexp.match( line ) )
512            {
513                return;
514            }
515            final String actionChar = fileRegexp.getParen( 1 );
516            // action is currently not used
517            final ScmFileStatus action;
518            String name = fileRegexp.getParen( 2 );
519            String originalName = null;
520            String originalRevision = null;
521            if ( "A".equals( actionChar ) )
522            {
523                action = ScmFileStatus.ADDED;
524            }
525            else if ( "M".equals( actionChar ) )
526            {
527                action = ScmFileStatus.MODIFIED;
528            }
529            else if ( "D".equals( actionChar ) )
530            {
531                action = ScmFileStatus.DELETED;
532            }
533            else if ( "R".equals( actionChar ) )
534            {
535                action = ScmFileStatus.RENAMED;
536                originalName = name;
537                name = fileRegexp.getParen( 4 );
538                originalRevision = currentChange.getParentRevision();
539            }
540            else if ( "C".equals( actionChar ) )
541            {
542                action = ScmFileStatus.COPIED;
543                originalName = name;
544                name = fileRegexp.getParen( 4 );
545                originalRevision = currentChange.getParentRevision();
546            }
547            else
548            {
549                action = ScmFileStatus.UNKNOWN;
550            }
551
552            final ChangeFile changeFile = new ChangeFile( name, currentRevision );
553            changeFile.setAction( action );
554            changeFile.setOriginalName( originalName );
555            changeFile.setOriginalRevision( originalRevision );
556            currentChange.addFile( changeFile );
557        }
558    }
559
560    private void resetChangeLog()
561    {
562        currentComment = null;
563        currentChange = null;
564    }
565}