#!/usr/bin/perl
## 排序台灣門牌 Taiwan address sorter
## 輸入： Input: 台灣中文 addresses one per line.
## 如內政部戶政司格式，若含鄰就要如 001 鄰。
## 輸出： Output: sorted list. 詳：
# https://www.jidanni.org/geo/house_numbering/sorting/
# Copyright       : https://www.gnu.org/licenses/gpl.html
# Author          : 積丹尼 Dan Jacobson https://www.jidanni.org/
# Created On      : Mon Apr 19 01:09:31 2021
# Last Modified By: Dan Jacobson
# Last Modified On: Mon Apr 26 06:48:52 2021
# Update Count    : 307
use strict;
use warnings FATAL => q/all/;
use utf8;
use open qw/:std :encoding(utf8)/;
use Unicode::Normalize 'decompose';
my %h;
while (<>) {
    chomp;
    die "\"$_\" already seen! 雙胞胎！" if $h{$_};
    my $original = $_;
    $_ = decompose( $_, 1 );
    push @{ $h{$original} },
      s/(.*?)(\d+)(鄰)// ? $1 . ( sprintf "%03d", $2 ) . $3 : "";
    my $m = s/\d+樓.*// ? $& : "";
    push @{ $h{$original} }, [ $m =~ /\d+/g ], [/\d+/g];
}

sub rcmp {
    return
      unless $h{$a}[ $_[0] ][ $_[1] ] || $h{$b}[ $_[0] ][ $_[1] ];
    ( $h{$a}[ $_[0] ][ $_[1] ] || 0 ) <=> ( $h{$b}[ $_[0] ][ $_[1] ] || 0 )
      or rcmp( $_[0], $_[1] + 1 );
}

print "$_\n"
  for
  sort { $h{$a}[0] cmp $h{$b}[0] or rcmp( 2, 0 ) or rcmp( 1, 0 ) or $a cmp $b }
  keys %h;
