#!/usr/bin/perl # My personal web site mapper # Author: Dan Jacobson https://www.jidanni.org/ # Copyright: https://www.gnu.org/licenses/gpl.html # Created: Thu Jul 24 07:36:30 2003 # Last-Updated: 2024-04-06T06:18:19+0000 # Update #: 293 use strict; use warnings 'all'; use XML::LibXML; use Time::Piece; use File::Find; require HTML::HeadParser; unless ( exists $ENV{DEBUG} ) { $ENV{DEBUG} = 0; } my $tit = "積丹尼網站索引 Dan Jacobson's site map"; my $url = 'https://www.jidanni.org/'; chdir "$ENV{HOME}/jidanni.org" or die; my @f; find( { wanted => \&process, no_chdir => 1 }, "." ); sub process { -f && push @f, $File::Find::name; } my %g; for (@f) { s@^\./@@; if (/([_-]en\.html|\.jpg|\.png|\.gif|\.htaccess|\.ico)$/) { if ( $ENV{DEBUG} >= 4 ) { print STDERR "Skipping $_\n"; } next; } if ( m@^(\.well-known|idname|robots|location|dyy|(missing|forbidden|internal_error|google.*)\.html)@ ) { if ( $ENV{DEBUG} >= 3 ) { print STDERR "Skipped $_\n"; } next; } undef $g{$_}; } for ( keys %g ) { if (/\.html$/) { open( my $fh, "<:utf8", $_ ) || die; my $p = HTML::HeadParser->new; $p->parse_file($fh); $g{$_} = $p->header('Title'); close($fh); } } my $doc = XML::LibXML::Document->new( '1.0', "UTF-8" ); my $html = $doc->createElement('html'); my $head = $doc->createElement('head'); $html->appendChild($head); my $body = $doc->createElement('body'); $html->appendChild($body); $doc->setDocumentElement($html); $doc->createInternalSubset( "html", (undef) x 2 ); my $title = XML::LibXML::Element->new('title'); my $meta = XML::LibXML::Element->new('meta'); $head->appendChild($meta); $meta->setAttribute( 'charset', 'utf-8' ); $meta = XML::LibXML::Element->new('meta'); $head->appendChild($meta); $meta->setAttribute( 'name', 'viewport' ); $meta->setAttribute( 'content', 'width=device-width, initial-scale=1' ); $head->appendChild($title); my $h1 = $doc->createElement('h1'); $body->appendChild($h1); $h1->appendText($tit); my $p = $doc->createElement('p'); $body->appendChild($p); $ENV{TZ} = "Asia/Taipei"; my $a = $doc->createElement('a'); $a->setAttribute( 'href', $url ); $a->appendText($url); $p->appendChild($a); $p->appendText( (localtime)->strftime(" as of %Y/%m/%d %H:%M %z. No images listed.") ); $title->appendText($tit); my $hr = $doc->createElement('hr'); $body->appendChild($hr); my $dl = $doc->createElement('dl'); $body->appendChild($dl); for ( sort keys %g ) { my $a = $doc->createElement('a'); $a->setAttribute( 'href', $_ ); $a->appendText($_); my $dt = $doc->createElement('dt'); $dt->appendChild($a); $dl->appendChild($dt); next unless $g{$_}; my $dd = $doc->createElement('dd'); $dl->appendChild($dd); $dd->appendText( $g{$_} ); } $_ = $doc->toString(1); s/^<\?xml.*\n//; print;