#!/usr/bin/perl # My super brilliant indexer. Just hollows out the content! # Copyright : http://www.fsf.org/copyleft/gpl.html # Author : Dan Jacobson -- http://jidanni.org/ # Created On : Sun Dec 26 10:26:17 2021 # Last Modified On: Mon Dec 27 19:56:34 2021 # Update Count : 53 use strict; use warnings FATAL => 'all'; use XML::LibXML; binmode STDERR, ":utf8"; my $basename = "@ARGV"; unless ( $basename =~ /^[\w.]+\.html$/ ) { die "Usage: $0 file.html" } my $doc = XML::LibXML->load_html( location => $basename || die ); for ($doc) { for my $e (qw!p blockquote dd!) { $_->unbindNode() for $_->findnodes("//$e"); } for my $e (qw!title h1!) { $_->appendText(" (目錄 Table of Contents)") for $_->findnodes("//$e"); } for ( $_->findnodes('//*') ) { my $nodeName = $_->nodeName; my $id = $_->getAttribute('id'); if ( $_->nodeName ne 'a' ) { die "Not ready for $_, dude.Use ..." if $id; } else { next if $_->getAttribute('href'); die "Where's the ID at $_, dude?" unless $id; $_->setAttribute( 'href', "#$id" ); $_->removeAttribute('id'); } } my $comment = $_->createComment("Made by $0. Do not edit by hand."); my $base = $doc->createElement('base'); $base->setAttribute( 'href', $basename ); for ( $_->findnodes('/html/head') ) { $_->appendChild($comment); $_->appendChild($base); } } my $z = $doc->toString(1); $z =~ s/^<\?xml [^\n]+\?>\n//; # hack, yes. print $z;