X-Git-Url: https://git.stg.codes/stg.git/blobdiff_plain/793149f04ce52bf75dc4efb7b83a3f8ed30d1fff..35ec793690104efdd610964d255302a0310a2daf:/doc/xslt/fo/pdf2index diff --git a/doc/xslt/fo/pdf2index b/doc/xslt/fo/pdf2index deleted file mode 100755 index c14d8ecd..00000000 --- a/doc/xslt/fo/pdf2index +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/perl -- # -*- Perl -*- - -# this needs some cleanup... - -my $PSTOTEXT = "pstotext"; - -my $pdf = shift @ARGV; - -my $index = ""; -my $inindex = 0; -open (F, "$PSTOTEXT $pdf |"); -while () { - if (/^<\/index/) { - $index .= $_; - $inindex = 0; - } - $inindex = 1 if /^.*?<\/phrase>\s*)+)/s) { - $cindex .= $1; - $_ = $2; - $index = $'; # ' - - my @pages = m/.*?<\/phrase>\s*/sg; - - # Expand ranges - if ($#pages >= 0) { - my @mpages = (); - foreach my $page (@pages) { - my $pageno = &pageno($page); - if ($pageno =~ /^([0-9]+)[^0-9]([0-9]+)$/) { # funky - - for (my $count = $1; $count <= $2; $count++) { - push (@mpages, "$count"); - } - } else { - push (@mpages, $page); - } - } - @pages = sort rangesort @mpages; - } - - # Remove duplicates... - if ($#pages > 0) { - my @mpages = (); - my $current = ""; - foreach my $page (@pages) { - my $pageno = &pageno($page); - if ($pageno ne $current) { - push (@mpages, $page); - $current = $pageno; - } - } - @pages = @mpages; - } - - # Collapse ranges... - if ($#pages > 1) { - my @cpages = (); - while (@pages) { - my $count = 0; - my $len = &rangelen($count, @pages); - if ($len <= 2) { - my $page = shift @pages; - push (@cpages, $page); - } else { - my $fpage = shift @pages; - my $lpage = ""; - while ($len > 1) { - $lpage = shift @pages; - $len--; - } - my $fpno = &pageno($fpage); - my $lpno = &pageno($lpage); - $fpage =~ s/>$fpno${fpno}-$lpno//; - $page =~ s/^//; - - return $1 if $page =~ /^([^<>]+)/; - return "?"; -} - -sub rangesort { - my $apno = &pageno($a); - my $bpno = &pageno($b); - - # Make sure roman pages come before arabic ones, otherwise sort them in order - return -1 if ($apno !~ /^\d+/ && $bpno =~ /^\d+/); - return 1 if ($apno =~ /^\d+/ && $bpno !~ /^\d+/); - return $apno <=> $bpno; -} - -sub rangelen { - my $count = shift; - my @pages = @_; - my $len = 1; - my $inrange = 1; - - my $current = &pageno($pages[$count]); - while ($count < $#pages && $inrange) { - $count++; - my $next = &pageno($pages[$count]); - if ($current + 1 eq $next) { - $current = $next; - $inrange = 1; - $len++; - } else { - $inrange = 0; - } - } - - return $len; -}