#!/usr/bin/perl -w
# striptag -- strip given tags out of HTML
# Copyright       : http://www.fsf.org/copyleft/gpl.html
# Created On      : Summer 2003
# Last Modified On: Thu Aug  5 05:59:14 2004
# Update Count    : 17
# Nominal Author  : Dan Jacobson -- http://jidanni.org/
# Actual brains   : Jonathan Stowe -- http://www.gellyfish.com/

#Usage example: striptag font div < file.html
#That will strip all <DIV>, </font> etc.
#Be sure you give lower case tag names [why?]
#[How to strip _all_ tags? Can't give regexp.]
use strict;
use warnings;
use HTML::Parser;

my $parser = HTML::Parser->new(
    text_h    => [ sub    { print shift; },       "text" ],
    default_h => [ sub    { print shift },        'text' ],
    start_h   => [ \&tag, "self, tagname, text" ],
    end_h     => [ \&tag, "self, tagname, text" ]
);
$parser->{_striptags}->{$_} = 1 foreach @ARGV;
$parser->parse_file(*STDIN);

sub tag {
    my ( $self, $tag, $text ) = @_;

    print $text unless $self->{_striptags}->{$tag};
}
