git2html: da456f160a31b778e7ee5bdbdf5f739e41ba2e32

     1: #! /bin/bash
     2: 
     3: # git2html - Convert a git repository to a set of static HTML pages.
     4: # Copyright (c) 2011 Neal H. Walfield <neal@walfield.org>
     5: #
     6: # git2html is free software; you can redistribute it and/or modify
     7: # it under the terms of the GNU General Public License as published by
     8: # the Free Software Foundation; either version 3 of the License, or
     9: # (at your option) any later version.
    10: #
    11: # git2html is distributed in the hope that it will be useful,
    12: # but WITHOUT ANY WARRANTY; without even the implied warranty of
    13: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14: # GNU General Public License for more details.
    15: #
    16: # You should have received a copy of the GNU General Public License
    17: # along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18: 
    19: set -e
    20: set -o pipefail
    21: # set -x
    22: 
    23: usage()
    24: {
    25:   echo "Usage $0 [-prlbq] TARGET"
    26:   echo "Generate static HTML pages in TARGET for the specified git repository."
    27:   echo
    28:   echo "  -p  Project's name"
    29:   echo "  -r  Repository to clone from."
    30:   echo "  -l  Public repository link, e.g., 'http://host.org/project.git'"
    31:   echo "  -b  List of branches to process (default: all)."
    32:   echo "  -q  Be quiet."
    33:   echo "  -f  Force rebuilding of all pages."
    34:   exit "$1"
    35: }
    36: 
    37: show_progress=1
    38: force_rebuild=0
    39: 
    40: progress()
    41: {
    42:   if test x"$show_progress" = x1
    43:   then
    44:     echo "$@"
    45:   fi
    46: }
    47: 
    48: while getopts ":p:r:l:b:qf" opt
    49: do
    50:   case $opt in
    51:     p)
    52:       PROJECT=$OPTARG
    53:       ;;
    54:     r)
    55:       # Directory containing the repository.
    56:       REPOSITORY=$OPTARG
    57:       ;;
    58:     l)
    59:       PUBLIC_REPOSITORY=$OPTARG
    60:       ;;
    61:     b)
    62:       BRANCHES=$OPTARG
    63:       ;;
    64:     q)
    65:       show_progress=0
    66:       ;;
    67:     f)
    68:       force_rebuild=1
    69:       ;;
    70:     \?)
    71:       echo "Invalid option: -$OPTARG" >&2
    72:       usage
    73:       ;;
    74:   esac
    75: done
    76: shift $((OPTIND - 1))
    77: 
    78: if test $# -ne 1
    79: then
    80:   usage 1
    81: fi
    82: 
    83: # Where to create the html pages.
    84: TARGET="$1"
    85: 
    86: # Make sure TARGET is an absolute path.
    87: if test x"${TARGET%%/*}" != x
    88: then
    89:     TARGET=$(pwd)/$TARGET
    90: fi
    91: 
    92: # Make sure the target exists.
    93: mkdir -p "$TARGET"
    94: 
    95: CONFIG_FILE=".ht_git2html"
    96: 
    97: # Read the configuration file.
    98: # shellcheck source=/dev/null
    99: if test -e "$TARGET/$CONFIG_FILE"
   100: then
   101:   . "$TARGET/$CONFIG_FILE"
   102: fi
   103: 
   104: if test x"$REPOSITORY" = x
   105: then
   106:   echo "-r required."
   107:   echo
   108:   usage 1
   109: fi
   110: 
   111: # The output version
   112: CURRENT_TEMPLATE="$(sha1sum "$0")"
   113: if test "x$CURRENT_TEMPLATE" != "x$TEMPLATE"
   114: then
   115:   progress "Rebuilding all pages as output template changed."
   116:   force_rebuild=1
   117: fi
   118: TEMPLATE="$CURRENT_TEMPLATE"
   119: 
   120: {
   121:   save()
   122:   {
   123:     # Prefer environment variables and arguments to the configuration file.
   124:     echo "$1=\"\${$1:-\"$2\"}\""
   125:   }
   126:   save "PROJECT" "$PROJECT"
   127:   save "REPOSITORY" "$REPOSITORY"
   128:   save "PUBLIC_REPOSITORY" "$PUBLIC_REPOSITORY"
   129:   save "TARGET" "$TARGET"
   130:   save "BRANCHES" "$BRANCHES"
   131:   save "TEMPLATE" "$TEMPLATE"
   132: } > "$TARGET/$CONFIG_FILE"
   133: 
   134: html_header()
   135: {
   136:   title="$1"
   137:   top_level="$2"
   138: 
   139:   if test x"$PROJECT" != x -a x"$title" != x
   140:   then
   141:     # Title is not the empty string.  Prefix it with ": "
   142:     title=": $title"
   143:   fi
   144: 
   145:   echo "<html><head><title>$PROJECT$title</title></head>" \
   146:     "<body>" \
   147:     "<h1><a href=\"$top_level/index.html\">$PROJECT</a>$title</h1>"
   148: }
   149: 
   150: html_footer()
   151: {
   152:   echo "<hr>" \
   153:     "Generated by" \
   154:     "<a href=\"http://hssl.cs.jhu.edu/~neal/git2html\">git2html</a>."
   155: }
   156: 
   157: # Ensure that some directories we need exist.
   158: if test x"$force_rebuild" = x1
   159: then
   160:   rm -rf "$TARGET/objects" "$TARGET/commits"
   161: fi
   162: 
   163: if test ! -d "$TARGET/objects"
   164: then
   165:   mkdir "$TARGET/objects"
   166: fi
   167: 
   168: if test ! -e "$TARGET/commits"
   169: then
   170:   mkdir "$TARGET/commits"
   171: fi
   172: 
   173: if test ! -e "$TARGET/branches"
   174: then
   175:   mkdir "$TARGET/branches"
   176: fi
   177: 
   178: unset GIT_DIR
   179: 
   180: # Get an up-to-date copy of the repository.
   181: if test ! -e "$TARGET/repository"
   182: then
   183:   # Clone the repository.
   184:   git clone --depth 2 "$REPOSITORY" "$TARGET/repository"
   185:   cd "$TARGET/repository"
   186: 
   187:   # We don't need any local branches.  In fact, they only make trouble
   188:   # when there is a non-fast forward merge.  We do want one branch:
   189:   # the main branch, which we preferred as a detached head.
   190:   first=""
   191:   git branch -l | sed 's/^..//' | while read -r branch
   192:   do
   193:     if test x"$first" = x
   194:     then
   195:       # Create the detached head.  This also allows us to delete the
   196:       # main branch (you can't delete a branch that is checked out).
   197:       first="$branch"
   198:       git checkout "origin/$branch"
   199:     fi
   200: 
   201:     git branch -D "$branch"
   202:   done
   203: else
   204:   cd "$TARGET/repository"
   205: fi
   206: 
   207: # git merge fails if these are not set.  Fake them.
   208: git config user.email "git2html@git2html"
   209: git config user.name "git2html"
   210: 
   211: if test x"$BRANCHES" = x
   212: then
   213:   # Strip the start of lines of the form 'origin/HEAD -> origin/master'
   214:   BRANCHES=$(git branch --no-color -r \
   215:                | sed 's#.*->##; s#^ *origin/##;')
   216: fi
   217: 
   218: first=""
   219: # Ignore 'origin/HEAD -> origin/master'
   220: for branch in ${BRANCHES:-$(git branch --no-color -r \
   221:                               | sed 's#.*->.*##;
   222:                                      s#^ *origin/##;
   223:                                      s#^ *HEAD *$##;')}
   224: do
   225:   first="$branch"
   226:   break
   227: done
   228: 
   229: # Due to branch aliases (a la origin/HEAD), a branch might be listed
   230: # multiple times.  Eliminate this possibility.
   231: BRANCHES=$(for branch in $BRANCHES
   232:   do
   233:     echo "$branch"
   234:   done | sort | uniq)
   235: 
   236: for branch in $BRANCHES
   237: do
   238:   # Suppress already up to date status messages, but don't use grep -v
   239:   # as that returns 1 if there is no output and causes the script to
   240:   # abort.
   241:   git fetch --force origin "refs/heads/${branch}:refs/origin/${branch}" \
   242:       | gawk '/^Already up-to-date[.]$/ { skip=1; }
   243:               { if (! skip) print; skip=0 }'
   244: done
   245: git checkout "origin/$first"
   246: 
   247: # For each branch and each commit create and extract an archive of the form
   248: #   $TARGET/commits/$commit
   249: #
   250: # and a link:
   251: #
   252: #   $TARGET/branches/$commit -> $TARGET/commits/$commit
   253: 
   254: # Count the number of branch we want to process to improve reporting.
   255: bcount=0
   256: for branch in $BRANCHES
   257: do
   258:   (( ++bcount ))
   259: done
   260: 
   261: INDEX="$TARGET/index.html"
   262: 
   263: {
   264:   html_header
   265: 
   266:   while read -r F
   267:   do
   268:     echo -e "<h2>Readme</h2>\\n<pre><code>"
   269:     cat "$F"
   270:     echo -e "</code></pre>"
   271:     break
   272:   done < <(find "$TARGET/repository" -maxdepth 1 -name 'README*' -type f)
   273: 
   274:   echo "<h2>Repository</h2>"
   275:   if test x"$PUBLIC_REPOSITORY" != x
   276:   then
   277:     echo  "Clone this repository using:" \
   278:       "<pre>" \
   279:       " git clone $PUBLIC_REPOSITORY" \
   280:       "</pre>"
   281:   fi
   282: 
   283:   echo "<h2>Branches</h2>" \
   284:     "<ul>"
   285: } > "$INDEX"
   286: 
   287: b=0
   288: for branch in $BRANCHES
   289: do
   290:   (( ++b ))
   291: 
   292:   cd "$TARGET/repository"
   293: 
   294:   COMMITS=$(mktemp)
   295:   git rev-list -n 1 --graph "origin/$branch" > "$COMMITS"
   296: 
   297:   # Count the number of commits on this branch to improve reporting.
   298:   ccount=$(grep -c '[0-9a-f]' < "$COMMITS")
   299: 
   300:   progress "Branch $branch ($b/$bcount): processing ($ccount commits)."
   301: 
   302:   BRANCH_INDEX="$TARGET/branches/$branch.html"
   303: 
   304:   c=0
   305:   while read -r commitline
   306:   do
   307:     # See http://www.itnewb.com/unicode
   308:     graph=$(echo "$commitline" \
   309:             | sed 's/ [0-9a-f]*$//; s/|/\┃/g; s/[*]/\●/g;
   310:                    s/[\]/\⬊/g; s/\//\⬋/g;')
   311:     commit=$(echo "$commitline" | sed 's/^[^0-9a-f]*//')
   312: 
   313:     if test x"$commit" = x
   314:     then
   315:       # This is just a bit of graph.  Add it to the branch's
   316:       # index.html and then go to the next commit.
   317:       echo "<tr><td valign=\"middle\"><pre>$graph</pre></td><td></td><td></td><td></td></tr>" \
   318:         >> "$BRANCH_INDEX"
   319:       continue
   320:     fi
   321: 
   322:     (( ++c ))
   323:     progress "Commit $commit ($c/$ccount): processing."
   324: 
   325:     # Extract metadata about this commit.
   326:     metadata=$(git log -n 1 --pretty=raw "$commit" \
   327:         | sed 's#<#\<#g; s#>#\>#g; ')
   328:     parent=$(echo "$metadata" \
   329:         | gawk '/^parent / { $1=""; sub (" ", ""); print $0 }')
   330:     author=$(echo "$metadata" \
   331:         | gawk '/^author / { NF=NF-2; $1=""; sub(" ", ""); print $0 }')
   332:     date=$(echo "$metadata" | gawk '/^author / { print $(NF=NF-1); }')
   333:     date=$(date -u -d "1970-01-01 $date sec")
   334:     log=$(echo "$metadata" | gawk '/^    / { if (!done) print $0; done=1; }')
   335:     loglong=$(echo "$metadata" | gawk '/^    / { print $0; }')
   336: 
   337:     if test "$c" = "1"
   338:     then
   339:       # This commit is the current head of the branch.  Update the
   340:       # branch's link, but don't use ln -sf: because the symlink is to
   341:       # a directory, the symlink won't be replaced; instead, the new
   342:       # link will be created in the existing symlink's target
   343:       # directory:
   344:       #
   345:       #   $ mkdir foo
   346:       #   $ ln -s foo bar
   347:       #   $ ln -s baz bar
   348:       #   $ ls -ld bar bar/baz
   349:       #   lrwxrwxrwx 1 neal neal 3 Aug  3 09:14 bar -> foo
   350:       #   lrwxrwxrwx 1 neal neal 3 Aug  3 09:14 bar/baz -> baz
   351:       rm -f "$TARGET/branches/$branch"
   352:       ln -s "../commits/$commit" "$TARGET/branches/$branch"
   353: 
   354:       # Update the project's index.html and the branch's index.html.
   355:       echo "<li><a href=\"branches/$branch.html\">$branch</a>: " \
   356:         "<b>$log</b> $author <i>$date</i>" >> "$INDEX"
   357: 
   358:       {
   359:         html_header "Branch: $branch" ".."
   360:         echo "<p><a href=\"$branch/index.html\">HEAD</a>"
   361:         echo "<p><table>"
   362:       } > "$BRANCH_INDEX"
   363:     fi
   364: 
   365:     # Add this commit to the branch's index.html.
   366:     echo "<tr><td valign=\"middle\"><pre>$graph</pre></td><td><a href=\"../commits/$commit/index.html\">$log</a></td><td>$author</td><td><i>$date</i></td></tr>" \
   367:         >> "$BRANCH_INDEX"
   368: 
   369: 
   370:     # Commits don't change.  If the directory already exists, it is up
   371:     # to date and we can save some work.
   372:     COMMIT_BASE="$TARGET/commits/$commit"
   373:     if test -e "$COMMIT_BASE"
   374:     then
   375:       progress "Commit $commit ($c/$ccount): already processed."
   376:       continue
   377:     fi
   378: 
   379:     mkdir "$COMMIT_BASE"
   380: 
   381:     # Get the list of files in this commit.
   382:     FILES=$(mktemp)
   383:     git ls-tree -r "$commit" > "$FILES"
   384: 
   385:     # Create the commit's index.html: the metadata, a summary of the changes
   386:     # and a list of all the files.
   387:     COMMIT_INDEX="$COMMIT_BASE/index.html"
   388:     {
   389:       html_header "Commit: $commit" "../.."
   390: 
   391:       # The metadata.
   392:       echo "<h2>Branch: <a href=\"../../branches/$branch.html\">$branch</a></h2>" \
   393:         "<p>Author: $author" \
   394:         "<br>Date: $date" \
   395:         "<br>Commit: $commit"
   396:       for p in $parent
   397:       do
   398:         echo "<br>Parent: <a href=\"../../commits/$p/index.html\">$p</a>" \
   399:         " (<a href=\"../../commits/$commit/diff-to-$p.html\">diff to parent</a>)"
   400:       done
   401:       echo "<br>Log message:" \
   402:         "<p><pre>$loglong</pre>"
   403:       for p in $parent
   404:       do
   405:         echo "<br>Diff Stat to $p:" \
   406:              "<blockquote><pre>"
   407: 
   408:         #shellcheck disable=SC1004
   409:         git diff --stat "$p".."$commit" \
   410:           | gawk \
   411:               '{ if (last_line) print last_line;
   412:                  last_line_raw=$0;
   413:                  $1=sprintf("<a href=\"%s.raw.html\">%s</a>" \
   414:                             " (<a href=\"../../commits/'"$p"'/%s.raw.html\">old</a>)" \
   415:                             "%*s" \
   416:                             "(<a href=\"diff-to-'"$p"'.html#%s\">diff</a>)",
   417:                             $1, $1, $1, 60 - length ($1), " ", $1);
   418:                     last_line=$0; }
   419:                   END { print last_line_raw; }'
   420:         echo "</pre></blockquote>"
   421:       done
   422:       echo "<p>Files:" \
   423:         "<ul>"
   424: 
   425:       # The list of files as a hierarchy.  Sort them so that within a
   426:       # directory, files preceed sub-directories
   427:       sed 's/\([^ \t]\+[ \t]\)\{3\}//;
   428:                  s#^#/#; s#/\([^/]*/\)#/1\1#; s#/\([^/]*\)$#/0\1#;' \
   429:           < "$FILES" \
   430:           | sort | sed 's#/[01]#/#g; s#^/##' \
   431:           | gawk '
   432:            function spaces(l) {
   433:              for (space = 1; space <= l; space ++) { printf ("  "); }
   434:            }
   435:            function max(a, b) { if (a > b) { return a; } return b; }
   436:            function min(a, b) { if (a < b) { return a; } return b; }
   437:            function join(array, sep, i, s) {
   438:              s="";
   439:              for (i in array) {
   440:                if (s == "")
   441:                  s = array[i];
   442:                else
   443:                  s = s sep array[i];
   444:              }
   445:              return s;
   446:            }
   447:            BEGIN {
   448:              current_components[1] = "";
   449:              delete current_components[1];
   450:            }
   451:            {
   452:              file=$0;
   453:              split(file, components, "/")
   454:              # Remove the file.  Keep the directories.
   455:              file=components[length(components)]
   456:              delete components[length(components)];
   457: 
   458:              # See if a path component changed.
   459:              for (i = 1;
   460:                   i <= min(length(components), length(current_components));
   461:                   i ++)
   462:              {
   463:                if (current_components[i] != components[i])
   464:                  # It did.
   465:                  break
   466:              }
   467: 
   468:              # i-1 is the last common component.  The rest from the
   469:              # current_component stack.
   470:              last=length(current_components);
   471:              for (j = last; j >= i; j --)
   472:              {
   473:                spaces(j);
   474:                printf ("</ul> <!-- %s -->\n", current_components[j]);
   475:                delete current_components[j];
   476:              }
   477: 
   478:              # If there are new path components push them on the
   479:              # current_component stack.
   480:              for (; i <= length(components); i ++)
   481:              {
   482:                  current_components[i] = components[i];
   483:                  spaces(i);
   484:                  printf("<li><a name=\"files:%s\">%s</a>\n",
   485:                         join(current_components, "/"), components[i]);
   486:                  spaces(i);
   487:                  printf("<ul>\n");
   488:              }
   489: 
   490:              spaces(length(current_components))
   491:              printf ("<li><a name=\"files:%s\" href=\"%s.raw.html\">%s</a>\n",
   492:                      $0, $0, file);
   493:              printf ("  (<a href=\"%s\">raw</a>)\n", $0, file);
   494:            }
   495: 
   496:            END {
   497:              for (i = length(current_components); j >= 1; j --)
   498:              {
   499:                spaces(j);
   500:                printf ("</ul> <!-- %s -->\n", current_components[j]);
   501:                delete current_components[j];
   502:              }
   503:            }'
   504: 
   505:       echo "</ul>"
   506:       html_footer
   507:     } > "$COMMIT_INDEX"
   508: 
   509:     # Create the commit's diff-to-parent.html file.
   510:     for p in $parent
   511:     do
   512:       {
   513:         # shellcheck disable=SC1004
   514:         html_header "diff $(echo "$commit" | sed 's/^\(.\{8\}\).*/\1/') $(echo "$p" | sed 's/^\(.\{8\}\).*/\1/')" "../.."
   515:         echo "<h2>Branch: <a href=\"../../branches/$branch.html\">$branch</a></h2>" \
   516:           "<h3>Commit: <a href=\"index.html\">$commit</a></h3>" \
   517:         "<p>Author: $author" \
   518:         "<br>Date: $date" \
   519:         "<br>Parent: <a href=\"../$p/index.html\">$p</a>" \
   520:         "<br>Log message:" \
   521:         "<p><pre>$loglong</pre>" \
   522:         "<p>" \
   523:           "<pre>"
   524:         git diff -p "$p".."$commit" \
   525:           | sed 's#<#\<#g; s#>#\>#g;
   526:                  s#^\(diff --git a/\)\([^ ]\+\)#\1<a name="\2">\2</a>#;
   527:                  s#^\(\(---\|+++\|index\|diff\|deleted\|new\) .\+\)$#<b>\1</b>#;
   528:                  s#^\(@@ .\+\)$#<font color=\"blue\">\1</font>#;
   529:                  s#^\(-.*\)$#<font color=\"red\">\1</font>#;
   530:                  s#^\(+.*\)$#<font color=\"green\">\1</font>#;' \
   531:           | gawk '{ ++line; printf("%5d: %s\n", line, $0); }'
   532:         echo "</pre>"
   533:         html_footer
   534:       } > "$COMMIT_BASE/diff-to-$p.html"
   535:     done
   536: 
   537: 
   538:     # For each file in the commit, ensure the object exists.
   539:     while read -r line
   540:     do
   541:       # Some changes bump the commit of a submodule; ignore these
   542:       sort=$(echo "$line" | gawk '{ print $2 }')
   543:       if test x"$sort" = xcommit
   544:       then
   545:         continue
   546:       fi
   547: 
   548:       file_base=$(echo "$line" | gawk '{ print $4 }')
   549:       file="$TARGET/commits/$commit/$file_base"
   550:       sha=$(echo "$line" | gawk '{ print $3 }')
   551: 
   552:       object_dir="$TARGET/objects/"$(echo "$sha" \
   553:           | sed 's#^\([a-f0-9]\{2\}\).*#\1#')
   554:       object="$object_dir/$sha"
   555: 
   556:       if test ! -e "$object"
   557:       then
   558:         # File does not yet exists in the object repository.
   559:         # Create it.
   560:         if test ! -d "$object_dir"
   561:         then
   562:           mkdir "$object_dir"
   563:         fi
   564: 
   565:         # The object's file should not be commit or branch specific:
   566:         # the same html is shared among all files with the same
   567:         # content.
   568:         {
   569:           html_header "$sha"
   570:           echo "<pre>"
   571:           git show "$sha" \
   572:             | sed 's#<#\<#g; s#>#\>#g; ' \
   573:             | gawk '{ ++line; printf("%6d: %s\n", line, $0); }'
   574:           echo "</pre>"
   575:           html_footer
   576:         } > "$object"
   577:       fi
   578: 
   579:       # Create a hard link to the formatted file in the object repository.
   580:       mkdir -p "$(dirname "$file")"
   581:       ln -f "$object" "$file.raw.html"
   582: 
   583:       # Create a hard link to the raw file.
   584:       raw_filename="raw/$(echo "$sha" | sed 's/^\(..\)/\1\//')"
   585:       if ! test -e "$raw_filename"
   586:       then
   587:           mkdir -p "$(dirname "$raw_filename")"
   588:           git cat-file blob "$sha" > "$raw_filename"
   589:       fi
   590:       ln -f "$raw_filename" "$file"
   591:     done <"$FILES"
   592:     rm -f "$FILES"
   593:   done < "$COMMITS"
   594:   rm -f "$COMMITS"
   595: 
   596:   {
   597:     echo "</table>"
   598:     html_footer
   599:   } >> "$BRANCH_INDEX"
   600: done
   601: 
   602: {
   603:   echo "</ul>"
   604:   html_footer
   605: } >> "$INDEX"

Generated by git2html.