#!/usr/bin/perl
# My personal web site mapper
# Author: Dan Jacobson https://www.jidanni.org/
# Copyright: https://www.gnu.org/licenses/gpl.html
# Created: Thu Jul 24 07:36:30 2003
# Last-Updated: 2024-04-06T06:18:19+0000
#     Update #: 293
use strict;
use warnings 'all';
use XML::LibXML;
use Time::Piece;
use File::Find;
require HTML::HeadParser;
unless ( exists $ENV{DEBUG} ) { $ENV{DEBUG} = 0; }
my $tit = "積丹尼網站索引 Dan Jacobson's site map";
my $url = 'https://www.jidanni.org/';
chdir "$ENV{HOME}/jidanni.org" or die;
my @f;
find( { wanted => \&process, no_chdir => 1 }, "." );

sub process {
    -f && push @f, $File::Find::name;
}
my %g;
for (@f) {
    s@^\./@@;
    if (/([_-]en\.html|\.jpg|\.png|\.gif|\.htaccess|\.ico)$/) {
        if ( $ENV{DEBUG} >= 4 ) {
            print STDERR "Skipping $_\n";
        }
        next;
    }
    if (
        m@^(\.well-known|idname|robots|location|dyy|(missing|forbidden|internal_error|google.*)\.html)@
    ) {
        if ( $ENV{DEBUG} >= 3 ) {
            print STDERR "Skipped $_\n";
        }
        next;
    }
    undef $g{$_};
}
for ( keys %g ) {
    if (/\.html$/) {
        open( my $fh, "<:utf8", $_ ) || die;
        my $p = HTML::HeadParser->new;
        $p->parse_file($fh);
        $g{$_} = $p->header('Title');
        close($fh);
    }
}
my $doc  = XML::LibXML::Document->new( '1.0', "UTF-8" );
my $html = $doc->createElement('html');
my $head = $doc->createElement('head');
$html->appendChild($head);
my $body = $doc->createElement('body');
$html->appendChild($body);
$doc->setDocumentElement($html);
$doc->createInternalSubset( "html", (undef) x 2 );
my $title = XML::LibXML::Element->new('title');
my $meta  = XML::LibXML::Element->new('meta');
$head->appendChild($meta);
$meta->setAttribute( 'charset', 'utf-8' );
$meta = XML::LibXML::Element->new('meta');
$head->appendChild($meta);
$meta->setAttribute( 'name',    'viewport' );
$meta->setAttribute( 'content', 'width=device-width, initial-scale=1' );
$head->appendChild($title);
my $h1 = $doc->createElement('h1');
$body->appendChild($h1);
$h1->appendText($tit);
my $p = $doc->createElement('p');
$body->appendChild($p);
$ENV{TZ} = "Asia/Taipei";
my $a = $doc->createElement('a');
$a->setAttribute( 'href', $url );
$a->appendText($url);
$p->appendChild($a);
$p->appendText(
    (localtime)->strftime(" as of %Y/%m/%d %H:%M %z. No images listed.") );
$title->appendText($tit);
my $hr = $doc->createElement('hr');
$body->appendChild($hr);
my $dl = $doc->createElement('dl');
$body->appendChild($dl);
for ( sort keys %g ) {
    my $a = $doc->createElement('a');
    $a->setAttribute( 'href', $_ );
    $a->appendText($_);
    my $dt = $doc->createElement('dt');
    $dt->appendChild($a);
    $dl->appendChild($dt);
    next unless $g{$_};
    my $dd = $doc->createElement('dd');
    $dl->appendChild($dd);
    $dd->appendText( $g{$_} );
}
$_ = $doc->toString(1);
s/^<\?xml.*\n//;
print;
