#!/usr/bin/perl use strict ; #use warnings ; use File::Slurp ; use Data::Dumper ; my @acord_fields = qw( Element Length Description ) ; my @db_fields = qw( fld_num fld_len al3_desc ) ; my %seen_field ; my $db_rec_descs = process_acord_files() ; #print Dumper $db_rec_descs ; write_db_csv( $db_rec_descs ) ; write_parser_info( $db_rec_descs ) ; exit ; sub process_acord_files { my @db_rec_descs ; foreach my $accord_file ( @ARGV ) { # read in and skip all the lines of ---- my @acord_lines = grep !/----/, read_file( $accord_file ) ; # the first row has the column names which are the acord description fields # get the fields for this acord record description with trimmed whitespace my @desc_fields = map /(\w+)/, split /\|/, shift @acord_lines ; # for some odd reason they name 2 of the columns 'Group'. this fixes # it and we don't use this column anyway $desc_fields[4] .= '_Type' ; # print map( "[$_]", @desc_fields), "\n" ; my @field_descs ; my $rec_type ; foreach my $line ( @acord_lines ) { my %al3_field ; @al3_field{ 'dummy', @desc_fields } = map { s/^\s+//; s/\s+$//; $_} split /\|/, $line ; # if this is a GROUP description get the group type and skip the row if ( $al3_field{ReferenceID} eq 'GROUP' ) { $rec_type = $al3_field{Group} ; next ; } #print Dumper \%al3_field ; push @field_descs, create_db_desc( \%al3_field, $rec_type ) ; } push @db_rec_descs, { type => $rec_type, field_descs => \@field_descs, } ; } #print Dumper \@db_rec_descs ; return \@db_rec_descs ; } sub create_db_desc { my ( $acord_rec, $rec_type ) = @_ ; my %db_rec ; @db_rec{ @db_fields } = @{$acord_rec}{ @acord_fields } ; my $fld_name = lc $db_rec{ al3_desc } ; $fld_name =~ tr{\[]()'-}{}d ; $fld_name =~ s/deler?ted\s*//i ; $fld_name =~ s/future//i ; $fld_name =~ s/\s*see\s+\w+\s*//i ; $fld_name =~ tr{ /\\}{_}s ; if( $fld_name && $seen_field{ "$rec_type:$fld_name" }++ >= 1 ) { print "DUPE: $rec_type:$fld_name $seen_field{ $fld_name }\n" ; $fld_name .= $seen_field{ "$rec_type:$fld_name" } ; } $db_rec{ fld_name } ||= $fld_name ; # clean up field numbers that start with A. they are deleted. make the # number prefix 12 so they are 121 and up. 6HRU has over 109 fields already. $db_rec{ fld_num } =~ s/[ABC]/12/ ; # print Dumper \%db_rec ; return \%db_rec ; } sub write_db_csv { my( $rec_info ) = @_ ; my $csv_text ; foreach my $rec ( @{$rec_info} ) { my $rec_type = $rec->{type} ; my $rec_fields = $rec->{field_descs} ; foreach my $field_row ( @{$rec_fields} ) { $csv_text .= sprintf "$rec_type|%3s|%3d|%-60s|%-50s\n", @{$field_row}{ qw( fld_num fld_len fld_name al3_desc ) } ; } } write_file( 'db_csv', $csv_text ) ; } sub write_parser_info { my( $rec_info ) = @_ ; my $parser_text ; foreach my $rec ( @{$rec_info} ) { #print Dumper $rec ; my $rec_type = $rec->{type} ; $parser_text .= < { fields => [ qw( TEXT my $rec_fields = $rec->{field_descs} ; my $unpack_format ; foreach my $field_row ( @{$rec_fields} ) { my $fld_name = $field_row->{fld_name} ; $parser_text .= "\t\t\t$fld_name\n" if $fld_name ; $unpack_format .= ( ( $fld_name ) ? 'A' : 'x' ) . sprintf( '%d', $field_row->{fld_len} ) ; } $parser_text .= < '$unpack_format', }, TEXT } write_file( 'parser_info', $parser_text ) ; }