#!/usr/bin/perl -w # striptag -- strip given tags out of HTML # Copyright : http://www.fsf.org/copyleft/gpl.html # Created On : Summer 2003 # Last Modified On: Thu Aug 5 05:59:14 2004 # Update Count : 17 # Nominal Author : Dan Jacobson -- http://jidanni.org/ # Actual brains : Jonathan Stowe -- http://www.gellyfish.com/ #Usage example: striptag font div < file.html #That will strip all
, etc. #Be sure you give lower case tag names [why?] #[How to strip _all_ tags? Can't give regexp.] use strict; use warnings; use HTML::Parser; my $parser = HTML::Parser->new( text_h => [ sub { print shift; }, "text" ], default_h => [ sub { print shift }, 'text' ], start_h => [ \&tag, "self, tagname, text" ], end_h => [ \&tag, "self, tagname, text" ] ); $parser->{_striptags}->{$_} = 1 foreach @ARGV; $parser->parse_file(*STDIN); sub tag { my ( $self, $tag, $text ) = @_; print $text unless $self->{_striptags}->{$tag}; }