package MARC::File::USMARC; =head1 NAME MARC::File::USMARC - USMARC-specific file handling =cut use strict; use integer; eval 'use bytes' if $] >= 5.006; eval 'use warnings' if $] >= 5.006; use vars qw( $ERROR ); =head1 VERSION Version 1.10 $Id: USMARC.pm 3430 2002-09-24 05:17:39Z jrm21 $ =cut use vars '$VERSION'; $VERSION = '1.10'; use MARC::File; use vars qw( @ISA ); @ISA = qw( MARC::File ); use MARC::Record qw( LEADER_LEN ); use constant SUBFIELD_INDICATOR => "\x1F"; use constant END_OF_FIELD => "\x1E"; use constant END_OF_RECORD => "\x1D"; use constant DIRECTORY_ENTRY_LEN => 12; =head1 SYNOPSIS use MARC::File::USMARC; my $file = MARC::File::USMARC::in( $filename ); while ( my $marc = $file->next() ) { # Do something } $file->close(); undef $file; =head1 EXPORT None. =head1 METHODS =for internal Internal function to get the next raw record out of a file. =cut sub _next { my $self = shift; my $fh = $self->{fh}; my $reclen; return undef if eof($fh); local $/ = END_OF_RECORD; my $usmarc = <$fh>; if ( length($usmarc) < 5 ) { $self->_warn( "Couldn't find record length" ); return $self->_next(); } $reclen = substr($usmarc,0,5); if ( $reclen !~ /^\d{5}$/ or $reclen != length($usmarc) ) { $self->_warn( "Invalid record length \"$reclen\"" ); return $self->_next(); } return $usmarc; } =head2 decode() Constructor for handling data from a USMARC file. This function takes care of all the tag directory parsing & mangling. Any warnings or coercions can be checked in the C function. =cut sub decode { my $text = shift; $text = shift if (ref($text)||$text) =~ /^MARC::File/; my $marc = MARC::Record->new(); # Check for an all-numeric record length ($text =~ /^(\d{5})/) or return $marc->_warn( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric" ); my $reclen = $1; ($reclen == length($text)) or return $marc->_gripe( "Invalid record length: Leader says $reclen bytes, but it's actually ", length( $text ) ); $marc->leader( substr( $text, 0, LEADER_LEN ) ); my @fields = split( END_OF_FIELD, substr( $text, LEADER_LEN ) ); my $dir = shift @fields or return $marc->_warn( "No directory found" ); (length($dir) % 12 == 0) or return $marc->_gripe( "Invalid directory length" ); my $nfields = length($dir)/12; my $finalfield = pop @fields; # Check for the record terminator, and ignore it ($finalfield eq END_OF_RECORD) or $marc->_gripe( "Invalid record terminator: \"$finalfield\"" ); # Walk thru the directories, and shift off the fields while we're at it # Shouldn't be any non-digits anywhere in any directory entry my @directory = unpack( "A3 A4 A5" x $nfields, $dir ); my @bad = grep /\D/, @directory; if ( @bad ) { return $marc->_gripe( "Non-numeric entries in the tag directory: ", join( ", ", map { "\"$_\"" } @bad ) ); } my $databytesused = 0; while ( @directory ) { my $tagno = shift @directory; my $len = shift @directory; my $offset = shift @directory; my $tagdata = shift @fields; warn "Specs: ", join( "|", $tagno, $len, $offset, $tagdata ), "\n" if $MARC::Record::DEBUG; # Check directory validity ($tagno =~ /^\d\d\d$/) or return $marc->_gripe( "Invalid field number in directory: \"$tagno\"" ); ($len == length($tagdata) + 1) or $marc->_warn( "Invalid length in the directory for tag $tagno" ); ($offset == $databytesused) or $marc->_warn( "Directory offsets are out of whack" ); $databytesused += $len; if ( $tagno < 10 ) { $marc->add_fields( $tagno, $tagdata ) or return undef; # We're relying on add_fields() having set $MARC::Record::ERROR } else { my @subfields = split( SUBFIELD_INDICATOR, $tagdata ); my $indicators = shift @subfields or return $marc->_gripe( "No subfields found." ); my ($ind1,$ind2); if ( $indicators =~ /^([0-9 ])([0-9 ])$/ ) { ($ind1,$ind2) = ($1,$2); } else { $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks\n" ); ($ind1,$ind2) = (" "," "); } # Split the subfield data into subfield name and data pairs my @subfield_data = map { (substr($_,0,1),substr($_,1)) } @subfields; $marc->add_fields( $tagno, $ind1, $ind2, @subfield_data ) or return undef; } } # while # Once we're done, there shouldn't be any fields left over: They should all have shifted off. (@fields == 0) or return $marc->_gripe( "I've got leftover fields that weren't in the directory" ); return $marc; } =head2 update_leader() If any changes get made to the MARC record, the first 5 bytes of the leader (the length) will be invalid. This function updates the leader with the correct length of the record as it would be if written out to a file. =cut sub update_leader() { my $self = shift; my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory(); $self->_set_leader_lengths( $reclen, $baseaddress ); } =head2 _build_tag_directory() Function for internal use only: Builds the tag directory that gets put in front of the data in a MARC record. Returns two array references, and two lengths: The tag directory, and the data fields themselves, the length of all data (including the Leader that we expect will be added), and the size of the Leader and tag directory. =cut sub _build_tag_directory { my $marc = shift; $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/; die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record"; my @fields; my @directory; my $dataend = 0; for my $field ( $marc->fields() ) { # Dump data into proper format my $str = $field->as_usmarc; push( @fields, $str ); # Create directory entry my $len = length $str; my $direntry = sprintf( "%03d%04d%05d", $field->tag, $len, $dataend ); push( @directory, $direntry ); $dataend += $len; } my $baseaddress = LEADER_LEN + # better be 24 ( @directory * DIRECTORY_ENTRY_LEN ) + # all the directory entries 1; # end-of-field marker my $total = $baseaddress + # stuff before first field $dataend + # Length of the fields 1; # End-of-record marker return (\@fields, \@directory, $total, $baseaddress); } =head2 encode() Returns a string of characters suitable for writing out to a USMARC file, including the leader, directory and all the fields. =cut sub encode() { my $marc = shift; $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/; my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc); $marc->set_leader_lengths( $reclen, $baseaddress ); # Glomp it all together return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD); } 1; __END__ =head1 RELATED MODULES L =head1 TODO Make some sort of autodispatch so that you don't have to explicitly specify the MARC::File::X subclass, sort of like how DBI knows to use DBD::Oracle or DBD::Mysql. =head1 LICENSE This code may be distributed under the same terms as Perl itself. Please note that these modules are not products of or supported by the employers of the various contributors to the code. =head1 AUTHOR Andy Lester, Emarc@petdance.comE or Ealester@flr.follett.comE =cut