#!/usr/local/bin/perl -w ######################################################################## # # sortLC.pl : a Perl app to sort Library of Congress (LC) call numbers # # Version: 1.2 beta (as in *BETA*) # # Created by Michael Doran, doran@uta.edu # # University of Texas at Arlington Libraries # Box 19497, Arlington, TX 76019, USA # # Usage: # sortLC.pl < call_number_file [> sorted_file] # or: # cat call_number_file | sortLC.pl [> sorted_file] # # The "call_number_file" should contain one call number per line. # # Identical call numbers are deduped. # # Unparsable call numbers printed to STDERR with error message. # # Disclaimer: This application relies on an LC call number # normalization routine for sorting. The normalization # routine is far from perfect, therefore the sorted # output will also be imperfect. # # Adapted from subroutines in the ShelfLister application. # See: http://rocky.uta.edu/doran/shelflister/ # ######################################################################## # # Changes: # # 1.2 Minor change to regex for error handling. # changed this: $lc_call_no =~ /^([A-Z]{1,3})\s*(\d*) # to this: $lc_call_no =~ /^([A-Z]{1,3})\s*(\d+) # 1.1 Per Bryan Baldus suggestion, used more efficient syntax for # adding elements to list(s). # ######################################################################## # # Copyright 2005, The University of Texas at Arlington ("UTA"). # All rights reserved. # # By using this software the USER indicates that he or she # has read, understood and will comply with the following: # # UTA hereby grants USER permission to use, copy, modify, and # distribute this software and its documentation for any # purpose and without fee, provided that: # # 1. the above copyright notice appears in all copies of the # software and its documentation, or portions thereof, and # # 2. a full copy of this notice is included with the software # and its documentation, or portions thereof, and # # 3. neither the software nor its documentation, nor portions # thereof, is sold for profit. Any commercial sale or license # of this software, copies of the software, its associated # documentation and/or modifications of either is strictly # prohibited without the prior consent of UTA. # # Title to copyright to this software and its associated # documentation shall at all times remain with UTA. No right # is granted to use in advertising, publicity or otherwise any # trademark, service mark, or the name of UTA. # # This software and any associated documentation are provided # "as is," and UTA MAKES NO REPRESENTATIONS OR WARRANTIES, # EXPRESSED OR IMPLIED, INCLUDING THOSE OF MERCHANTABILITY OR # FITNESS FOR A PARTICULAR PURPOSE, OR THAT USE OF THE SOFTWARE, # MODIFICATIONS, OR ASSOCIATED DOCUMENTATION WILL NOT INFRINGE # ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER INTELLECTUAL # PROPERTY RIGHTS OF A THIRD PARTY. UTA, The University of Texas # System, its Regents, officers, and employees shall not be # liable under any circumstances for any direct, indirect, special, # incidental, or consequential damages with respect to any claim # by USER or any third party on account of or arising from the # use, or inability to use, this software or its associated # documentation, even if UTA has been advised of the possibility # of those damages. # # Submit commercialization requests to: The University of Texas # at Arlington, Office of Grant and Contract Services, 701 South # Nedderman Drive, Box 19145, Arlington, Texas 76019-0145, # ATTN: Director of Technology Transfer. # ######################################################################## use strict; &GetInput; ########################################################## # GetInput ########################################################## sub GetInput { my $call_no = (); my @input_list = (); while (defined ($call_no = )) { chomp($call_no); if ($call_no) { push @input_list, $call_no; } } my @sorted_list = &sortLC(@input_list); for (@sorted_list) { print "$_\n"; } } ########################################################## # sortLC ########################################################## sub sortLC { my (@unsorted_list) = @_; my @sorted_list = (); my $normal_call_no; my %call_no_array = (); for my $orig_call_no (@unsorted_list) { $normal_call_no = &NormalizeLC($orig_call_no); if ($normal_call_no) { if (! $call_no_array{$normal_call_no}) { $call_no_array{$normal_call_no} = $orig_call_no; } } } foreach my $key (sort keys %call_no_array) { push @sorted_list, $call_no_array{$key}; } return (@sorted_list); } ########################################################## # NormalizeLC ########################################################## sub NormalizeLC { my ($lc_call_no_orig) = @_; my $error_message = "Unparsable call number:"; my ($initial_letters, $class_number, $decimal_number, $cutter_1_letter, $cutter_1_number, $cutter_2_letter, $cutter_2_number, $the_trimmings, $normalized, $lc_call_no); # Remove any initial white space $lc_call_no = $lc_call_no_orig; $lc_call_no =~ s/^\s*//g; # Convert all alpha to uppercase $lc_call_no = uc($lc_call_no); if ($lc_call_no =~ /^([A-Z]{1,3})\s*(\d+)\s*\.*(\d*)\s*\.*\s*([A-Z]*)(\d*)\s*([A-Z]*)(\d*)\s*(.*)$/) { $initial_letters = $1; $class_number = $2; $decimal_number = $3; $cutter_1_letter = $4; $cutter_1_number = $5; $cutter_2_letter = $6; $cutter_2_number = $7; $the_trimmings = $8; if ($cutter_2_letter && ! ($cutter_2_number)) { $the_trimmings = $cutter_2_letter . $the_trimmings; $cutter_2_letter = ''; } if ($class_number) { $class_number = sprintf("%5s", $class_number); } $decimal_number = sprintf("%-12s", $decimal_number); if ($cutter_1_number) { $cutter_1_number = " $cutter_1_number"; } if ($cutter_2_letter) { $cutter_2_letter = " $cutter_2_letter"; } if ($cutter_2_number) { $cutter_2_number = " $cutter_2_number"; } if ($the_trimmings) { $the_trimmings =~ s/(\.)(\d)/$1 $2/g; $the_trimmings =~ s/(\d)\s*-\s*(\d)/$1-$2/g; $the_trimmings =~ s/(\d+)/sprintf("%5s", $1)/ge; $the_trimmings = " $the_trimmings"; } $normalized = "$initial_letters" . "$class_number" . "$decimal_number" . "$cutter_1_letter" . "$cutter_1_number" . "$cutter_2_letter" . "$cutter_2_number" . "$the_trimmings"; return "$normalized"; } else { print STDERR "$error_message $lc_call_no_orig\n"; return (); } } exit(0);