#!/usr/bin/perl ## 排序台灣門牌 Taiwan address sorter ## 輸入: Input: 台灣中文 addresses one per line. ## 如內政部戶政司格式,若含鄰就要如 001 鄰。 ## 輸出: Output: sorted list. 詳: # https://www.jidanni.org/geo/house_numbering/sorting/ # Copyright : https://www.gnu.org/licenses/gpl.html # Author : 積丹尼 Dan Jacobson https://www.jidanni.org/ # Created On : Mon Apr 19 01:09:31 2021 # Last Modified By: Dan Jacobson # Last Modified On: Mon Apr 26 06:48:52 2021 # Update Count : 307 use strict; use warnings FATAL => q/all/; use utf8; use open qw/:std :encoding(utf8)/; use Unicode::Normalize 'decompose'; my %h; while (<>) { chomp; die "\"$_\" already seen! 雙胞胎!" if $h{$_}; my $original = $_; $_ = decompose( $_, 1 ); push @{ $h{$original} }, s/(.*?)(\d+)(鄰)// ? $1 . ( sprintf "%03d", $2 ) . $3 : ""; my $m = s/\d+樓.*// ? $& : ""; push @{ $h{$original} }, [ $m =~ /\d+/g ], [/\d+/g]; } sub rcmp { return unless $h{$a}[ $_[0] ][ $_[1] ] || $h{$b}[ $_[0] ][ $_[1] ]; ( $h{$a}[ $_[0] ][ $_[1] ] || 0 ) <=> ( $h{$b}[ $_[0] ][ $_[1] ] || 0 ) or rcmp( $_[0], $_[1] + 1 ); } print "$_\n" for sort { $h{$a}[0] cmp $h{$b}[0] or rcmp( 2, 0 ) or rcmp( 1, 0 ) or $a cmp $b } keys %h;