# makefile -- what I use to manage my website
# Author: Dan Jacobson https://www.jidanni.org/
# Copyright: https://www.gnu.org/licenses/gpl.html
# Created: Jan 2001
# Last-Updated: 2025-01-22T15:28:56+0000
#     Update #: 1738
boom:; false #avoid accidental "make" accidents
U=0077
J=jidanni.org
S=dreamhost #radioscanningtw #taizhongbus
sitemap.html:/proc/self#$W#Remake each time as titles might have changed
	cp -av $@ /tmp/$@.old
	comp/sitemap-jidanni-ch > /tmp/TT$@ #Else its own entry has no title.
	mv /tmp/TT$@ $@
	tidy -qe $@
	diff /tmp/$@.old $@; test $$? != 2
#$(shell) wasteful
#depth checks, equivalent:
Nonascii=[^[:ascii:]]#one day strip less, e.g., symbols, accents... IPA?!
tidy0=tidy --tidy-mark no -utf8
tidy=$(tidy0) -qm $@
getp:
	sh ~/bin/update-website-programs
disallow_images_check:
	find * -name images -type d -printf "Disallow: /%p/\n"|\
	sort robots.txt -|uniq -u
robots_all_valid_check:robots.txt
	perl -nwe 'next unless s@^Disallow: /@@;chomp;$(\
	)if(!-e$$_){print"gone: $$_\n"}' $?
hidden_trees:robots.txt
	@LC_ALL=C date
	perl -nwe 's@^Disallow: */@@||next;/images|sitemap/&&next;print' $?|\
	xargs ls -RA1F>/tmp/$@
	N=$@ pluckme /tmp/$@
#"When defining fragment identifiers
#to be backward-compatible, only strings matching the pattern
#[A-Za-z][A-Za-z0-9:_.-]* should be used. See Section 6.2 of [HTML4]
#for more information. http://www.w3.org/TR/html/#ref-html4"
https:
	find * -name \*.html|xargs grep $@
name_check:
	find * -name \*.html|\
	xargs lynx -listonly -dump|perl -nwe \
	'if(/#/&&!/#[A-Za-z][A-Za-z0-9:_.-]*$$/){s@.*$J/@@;print}'
access:
	find * -name \*.html|\
	xargs $(tidy0) -e -access 3 2>&1|\
	perl -pwe 'if(/^line/){s/[^-]+//}'|sort|uniq -c
access2:
	set -e -- $$(find * -name \*.html);\
	for i do echo $$i:;  $(tidy0) -e -access 3 $$i 2>&1|sed '/^$$/d'; done
#One day clean up comments in HTML
#GREAT analysis of long lines
zf8:#/tmp/zhlist
	set -e -- $$(find $(uhrange) -name \*.html);\
	for i do echo $$i:;perl -C -nlwe \
	'$$l=$$_;s/$(Nonascii)/12/g;print $$l if length>80' $$i;done
Noenus:
	find * -name \*html|\
	xargs grep --files-without-match content=\"..-..\"|tee /tmp/$@
tggz:
	find * -name \*.zip -o -name \*.gz
show_filetypes:
	find * -type f|perl -pwe's/[^.]*//'|sort|uniq -c|sort -rn
lastmod1:
	find * -name \*.html|\
	xargs grep --files-without-match Last\ modified:
no_charset:
	find * -name \*.html|\
	xargs grep --files-without-match charset=
no_strict:
	find * -name \*.html|\
	xargs fgrep --files-without-match \
	'"http://www.w3.org/TR/html4/strict.dtd">'
html5:
	find * -name \*.html|\
	xargs fgrep --files-with-matches '<!DOCTYPE html>'
no_html5:
	find * -name \*.html|\
	xargs fgrep --files-without-match \
	'<!DOCTYPE html>'

#Viewport bad advice and recovery:
VP=<meta name="viewport" content="width=device-width">
# noviewportCand:
# 	find * -name \*.html|\
# 	xargs fgrep --files-without-match '$(VP)'|\
# 	sed /^google/d > $@
# noviewport:noviewportCand
# 	mkdir -p /tmp/r
# 	set -xue -- $$(cat $?); while expr $$# > /dev/null; do mv $$1 /tmp/r/$$#;\
# 	perl -pwle 's!<head>!$$&\n $(VP)!' /tmp/r/$$# > $$1; shift; done
viewport_disater_victims:
	find * -name \*.html|\
	xargs fgrep --files-with-match '$(VP)'
viewport_not_set:
	find * -name \*.html|xargs fgrep --files-without-match viewport	
#End: viewport bad advice and recovery

wide_whitespace:
	find * -type f|\
	xargs fgrep -n '　'
### before connecting modem
any_leftover_backups:
	if find .|grep \#;\
	then echo BACKUP FILES LEFT OVER, TURKEY; exit 99; fi
#b5char=[\x80-\xFE][\x40-\x7E\xA1-\xFE]#but not below
find_big5:
	set -e -- $$(find .  -type f ! -name '*'.png ! \
	-name '*'.jpg ! -name '*'.zip ! -name '*'.gz ! -name '*'.tgz \
	! -name '*'.pdf ! -name '*'.ico ! -name '*'.kmz)&&\
	for i do perl -nlwe \
	'if(/[[:print:]][\x80-\xFE][\x40-\x7E]/){print$$ARGV;exit}' $$i;done
valid:#wow, fast
	find * -name '*.html'|grep -v ^googleefff|xargs tidy -q -e --gnu-emacs yes
overlyenglish:
	set -- $$(find -name \*.html ! -name \*_en.html);\
	for i do test -f $${i/.html/_en.html}||echo $$i; done|\
	xargs fgrep -l content=\"en-us|xargs fgrep _en.html

modem:
	make any_leftover_backups
	make sitemap.html
#	make latest.txt
	make valid
#http://wiki.dreamhost.com/index.php/KB_/_Web_Programming_/_Error_messages
hardlinks:; find . -type f ! -links 1 -ls
images_with_no_width_and_height:
	find -name \*.html \( -exec perl -nwle \
	'BEGIN{$$i=$$w=$$h=0;}for(/<img/g){$$i++}; for(/width=(?:"\d|$$)/){$$w++}; for(/height=(?:"\d|$$)/){$$h++};$(\
	)END{exit unless $$i; print "$$ARGV: h=$$h/w=$$w/i=$$i" unless $$i==$$w && $$w==$$h;};' {}  \; -o -quit \)
O=orphan
orphan:
	linklint -root ~/$J/ -orphan -warn -limit 1111 \
	-doc /tmp/$O -dont_output skipped -dont_output A$$ \
	-dont_output F$$ -dont_output warn \
	-dont_output file -skip /sitemap.html -htmlonly /@
	w3m -dump /tmp/$O/errorAX.html
	@echo But reality:
	$(HOME)/$J/idname /tmp/$O/errorAX.html
Sitemap_orphan:#guessing
	linklint -root ~/$J/ -orphan -warn -limit 1111 \
	-doc /tmp/$@ -dont_output skipped -dont_output A$$ \
	-dont_output F$$ -dont_output warn \
	-dont_output file -htmlonly /sitemap.html
todays_changes:; find . -mtime -1 -type f|cpio -ov|gzip>/tmp/`hostname`.org.`date +%s`.cpio.gz
z-variants: #see also comp/index.html
	find \( -name \*.html -o -name \*.txt \) \
	-print -exec decompozdiff {} \;
iconvdiff:; find \( -name \*.html -o -name \*.txt \) -print -exec $@ {} \;
#comp/configuration/index.html: Can't set viewport. So do by hand.
#	cd $(dir $@) && tree -a -H . -T "Dan Jacobson's configuration files" \
#	-I $(notdir $@) | tee $(notdir $@) | w3m -dump -T text/html
#Bug#565214: --verify doesn't proceed deeper than the top directory, so need:
verify_but_ignore_permissions:/tmp/local_list.np /tmp/remote_list.np
	ls -l /tmp/*_list*
	sum /tmp/*_list*
	diff -U 0 $?
verify:/tmp/local_list /tmp/remote_list; diff -U 0 $^
verify_sparse: /tmp/local_list /tmp/remote_list
	perl -F\\t -anwle '$$h{$$F[-1]}=[@F[-2,-3]];END{open(my $$re, "<", "/tmp/remote_list");'\
	'while (my $$line = readline($$re)){'\
	'chomp $$line; @K=split/\t/,$$line;next unless $$h{$$K[-1]};'\
	'if($$h{$$K[-1]}[0] ne $$K[-2] || $$h{$$K[-1]}[1] ne $$K[-3])$(\
	){print "\n", $$line, $(\
	) "\n", join "\t", (reverse @{$$h{$$K[-1]}}), q([local]);} delete $$h{$$K[-1]};}$(\
	)for(sort keys %h){print "[local] $$_"}}' $<
verify_clear_cache:
	rm -v /tmp/local_list* /tmp/remote_list* /tmp/*_sums||:
verify_permissions1:/tmp/local_list /tmp/remote_list
	for i in $^; do echo $$i:; cut -f 1 $$i|sort|uniq -c; done
odd_permiss:; find * -ls|perl -nwle 'next if /-rw-r--r--|drwxr-xr-x|-rw-r--r--|-rwxr-xr-x/; print;'
Stat=find|sort|xargs stat --format="%a %n"
stat_list:; $(Stat)
stat_get:; set -eu; ssh $J cd $J '&&' make stat_list > /tmp/StatRem; $(Stat) > /tmp/StatLocal
stat_compare:
	perl -we 'my %k; open FL, "/tmp/StatLocal" or die; while(<FL>){next unless @F=split; $$k{$$F[1]}=$$F[0]};$(\
	)close FL or die;$(\
	)open FR, "/tmp/StatRem" or die; while(<FR>){next unless @F=split; if(exists $$k{$$F[1]} && $$k{$$F[1]}==$$F[0]){delete $$k{$$F[1]}}};$(\
	)close FR or die; for(sort keys %k){print "chmod $$k{$$_} $$_\n"};'
%.np:%;	cut -f 2- $? > $@
/tmp/remote_list:; ssh $J 'umask $U && cd $J && make -s local_list' > $@
/tmp/local_list:; umask $U && make -s local_list > $@ &&  test -s $@
/tmp/remote_sums:; ssh $J 'umask $U && cd $J && make -s local_sums' > $@
/tmp/local_sums:; umask $U && make -s local_sums > $@ && test -s $@
verify_sums:/tmp/local_sums /tmp/remote_sums; diff -U 0 $?
local_list:
	find .??* * -ls|\
	egrep -v \\.dh-diag\|\\.well-known|\
	perl -alnwe 'die $$_ unless $$F[2] =~ /^(-r..r-.r-.|dr.xr-xr-x)/;'\
	'if($$F[2]=~/^d/){$$F[6]="----"};$(\
	)print join "\t",@F[2,6,-1]'|sort -k 3
local_sums:; find -type f|sort|xargs sum
external_robots:
	mech-dump --links index.html|perl -nwle 'next unless m!http://[^/]+/!; print $$& . "robots.txt"'|xargs -n 1 wwwoffle
scp: #mmin, not cmin!
	set -ex -- $$(find * -type f -mmin -$${min-111});\
	for i do $${TEST1+echo} scp $$i $J:$J/$$i; $${TEST2+echo} xcpiome $$i; sleep 3; done
kml_no_name:
	set -- $$(find -name \*.km[lz]); for i do zcat -f $$i | xml2 | grep -q /kml/Document/name || echo $$i; done
staticmap panoramio:; find * -name \*.html | xargs grep -nH -e $@
transparent: # Fix with: https://stackoverflow.com/a/8437562 then maybe pngcrush
	set -eu -- $$(find * -name \*.png); identify -verbose $$@|egrep 'png:tRNS|Alpha:|Image:'
wide_images: #but what about wide <pre> tables?
	set $$(find * -name \*.html); perl -nwle 'next unless /width="(\d+)"/ && $$1 > 320;$(\
	)for($$k{$$ARGV}){$$_=$$1 if $$1 > ($$_||0);}; END{for(sort keys %k){print "$$k{$$_}\t$$_";};}' $$@|sort -rn
http-equiv:
	grep --include=\*.html --recursive --count $@ *|perl -F: -awnle 'print $$F[1]," ",$$F[0];'|sort -n
# http://validator.w3.org/i18n-checker/
index_back_to_home_page_depth_checker: #rough
	for i in 0 1 2 3 4 5; do find * -maxdepth $$i -mindepth $$i -name \*.html|xargs grep -h index.html.*積;\
	echo ==========================================; done|perl -pwle 's/.*href//;'
files_without_extensions:
	find -type f ! -name '*.*'|xargs file --mime-encoding
old_header_format:
	find -type f ! -name \*.html ! -name \*.jpg -newermt 1/1/2023|\
	xargs grep -F Update\ Count
