#!/usr/freeware/bin/perl -w # written by Mark Bennett and Peter Gavin # This script generates an index of all the NetCDF files found under a # given directory. This index has 1 line for each NetCDF file, and # contains several fields, each separated by a comma. The fields # are as follows: # # 1. CD name # 2. Path to the NetCDF file, relative to the base directory # 3. Compressed file name (when present) # 4. File name # 5. Minimum woce_date # 6. Maximum woce_date # 7. Minimum latitude in the file # 8. Maximum latitude in the file # 9. Westmost longitude in the file # 10. Eastmost longitude in the file # 11. Minimum depth # 12. Maximum depth # ** Note: mkinv is coded to set min/max depth to 0.0. If you want to determine a # range, the code will need to be modified. # 13. Experiment ID (expocode) # 14. Minimum sea temperature # 15. Maximum sea temperature # 16. Minimum wind speed # 17. Maximum wind speed # 18. Platform type and name # 19. Existence of heading PL_HD # 20. Existence of course PL_CRS # 21. Existence of platform speed PL_SPD # 22. Existence of platform relative wind direction PL_WDIR # 23. Existence of platform relative wind speed PL_WSPD # 24. Existence of earth relative wind direction DIR # 25. Existence of atmospheric pressure P # 26. Existence of air temperature T # 27. Existence of wet-bulb temperature TW # 28. Existence of dewpoint temperature TD # 29. Existence of relative humidity RH # 30. Existence of specific humidity Q # 31. Existence of precipitation PRECIP # 32. Existence of rain rate RRATE # 33. Existence of atmospheric radiation RAD # 34. Existence of weather code WX # 35. Existence of total cloud amount TCA # 36. Existence of low/middle cloud amount LMCA # 37. Existence of cloud base height ZCL # 38. Existence of low cloud type LCT # 39. Existence of middle cloud type MCT # 40. Existence of high cloud type HCT # # All times are given as minutes from 1-1-1980 00:00 UTC. # Edit this path to the top directory in your data structure @basedir = (); $basedir[0] = "/Net/woce/cd3/MET/data/woce_rv"; #woce data $basedir[1] = "/Net/woce/cd3/MET/data/woce_rels"; #non-woce data @main::dr = (); $main::dr[0] = "woce_rv"; $main::dr[1] = "woce_rel"; $main::dr[2] = "woce_buoy"; # This it the filename you wish to output the index into #$indexfile = "/usr/people/bennett/Woce/data.idx"; $verbose = 0; $switches = 0; while ($argument = shift(@ARGV)) { SWITCH: { $argument =~ /^-o$/ && do { $indexfile = shift(@ARGV); $switches = 1; last SWITCH; }; $argument =~ /^-v$/ && do { $verbose = 1; last SWITCH; }; $argument =~ /^-h$/ && do { die "Usage: mkinv -o -h\n -o \n REQUIRED. Name of output file to generate.\n Optional base directry to index.\n -h\n Prints this help message.\n"; }; } } if($switches == 1){ use NetCDF; open NO_DATE, ">no_date.txt"; @nc_info_datavars = ( "PL_HD", "PL_CRS", "PL_SPD", "PL_WDIR", "PL_WSPD", "DIR", "P", "T", "TW", "TD", "RH", "Q", "PRECIP", "RRATE", "RAD", "WX", "TCA", "LMCA", "ZCL", "LCT", "MCT", "HCT" ); @nc_info_keys = ("cd_name", "file_path", "file_compressed_name", "file_name", "woce_date_min", "woce_date_max", "latitude_min", "latitude_max", "longitude_westmost", "longitude_eastmost", "depth_min", "depth_max", "EXPOCODE", "temperature_min", "temperature_max", "wind_speed_min", "wind_speed_max", "site", @nc_info_datavars); $switch_lon = 0; ######################################################################## # junk routines sub max (@) { my $wflag = 0; my $eflag = 0; $switch_lon = 0; my $max = shift(@_); foreach my $value (@_) { $max = $value if ($value > $max); if($value >= 175 and $value <= 180) { $eflag = 1; } if($value < -175 and $value > -180) { $wflag = 1; } } if($eflag == 1 and $wflag == 1) { $switch_lon = 1; } return $max; } sub min (@) { my $min = shift(@_); if($min < -6000) { $min = 9999; } foreach my $value (@_) { $min = $value if ($value > -6000 and $value < $min); } return $min; } sub maxp (@) { my $max = shift(@_); foreach my $value (@_) { $max = $value if (($value < $max && $value > 0.0) || ($max < 0.0)); } return $max; } sub minp (@) { my $min = shift(@_); if($min < -6000) { $min = 9999; } foreach my $value (@_) { $min = $value if (($value > $min && $value < 0.0) || ($min > 0.0)); } return $min; } sub round ($$) { my $places = shift(@_); my $value = shift(@_); return (int($value*(10.0**$places) + .5))/(10.0**$places); } ######################################################################## # read a bunch of stuff from an nc file and return it as a hash sub read_nc_info ($$) { my %info = (); my ($ncfilename, $dr) = @_; my $ncid = NetCDF::open($ncfilename, NetCDF::NOWRITE); my $ncndims = ""; my $ncnvars = ""; my $ncnatts = ""; my $ncrecdim = ""; my $ncvarid = 0; my @ncvar = (); my $ncvarname = ""; my $ncvartype = ""; my $ncvarndims = ""; my @ncvardimids = (); my $ncvarnatts = ""; my @ncvarstart = (); my @ncvarcount = (); my @ncvarvalue = (); my @varnames = (); my $ncatttype = ""; my $ncattlen = ""; my $ncattvalue = ""; my $ncdimid = ""; my $ncdimname = ""; my $dim_time_size = ""; my @path = (); $info{"cd_name"} = "wocemet"; @path = split /\//, $ncfilename; if($dr eq "woce_buoy") { $path[2] = $path[1]; $path[1] = ""; $info{"file_path"} = join "/", "./data", $dr, $path[0], ""; } else { $info{"file_path"} = join "/", "./data", $dr, $path[0], $path[1],""; } $info{"file_name"} = $path[2]; if(($dr eq "woce_rv" and ($path[0] eq "DBBH" or $path[0] eq "KCEJ")) or ($dr eq "woce_rel")) { $info{"file_compressed_name"} = $info{"file_name"} . ".gz"; } else { $info{"file_compressed_name"} = ""; } die "Could not open \"$ncfilename\" for reading\n" if ($ncid == -1); die "Could not inquire file \"$ncfilename\"\n" if (NetCDF::inquire($ncid, \$ncndims, $ncnvars, \$ncnatts, \$ncrecdim) == -1); # get number of records $ncdimid = NetCDF::dimid($ncid, "time"); die "Could not get id for dimension \"time\" in file \"$ncfilename\"\n" if ($ncdimid == -1); die "Could not inquire dimension \"time\" in file \"$ncfilename\"\n" if (NetCDF::diminq($ncid, $ncdimid, \$ncdimname, \$dim_time_size) == -1); # read max and min latitude $ncvarid = NetCDF::varid($ncid, "latitude"); die "Could not get id for variable \"latitude\" in file \"$ncfilename\"\n" if ($ncvarid == -1); @ncvarstart = ( 0 ); @ncvarcount = ( $dim_time_size ); @ncvarvalue = (); die "Could not get variable \"latitude\" in file \"$ncfilename\"\n" if (NetCDF::varget($ncid, $ncvarid, @ncvarstart, @ncvarcount, \@ncvarvalue) == -1); $info{"latitude_min"} = round 2, min(@ncvarvalue); $info{"latitude_max"} = round 2, max(@ncvarvalue); # read max and min longitude $ncvarid = NetCDF::varid($ncid, "longitude"); die "Could not get id for variable \"longitude\" in file \"$ncfilename\"\n" if ($ncvarid == -1); @ncvarstart = ( 0 ); @ncvarcount = ( $dim_time_size ); @ncvarvalue = (); die "Could not get variable \"longitude\" in file \"$ncfilename\"\n" if (NetCDF::varget($ncid, $ncvarid, @ncvarstart, @ncvarcount, \@ncvarvalue) == -1); # check for ship crossing date line $info{"longitude_eastmost"} = round 2, max(@ncvarvalue); if($switch_lon eq 0) { # if ship doesn't cross, use normal max/min $info{"longitude_westmost"} = round 2, min(@ncvarvalue); } else { #ship crosses, switch values print "\n** CROSSING 180 DEGREES **\n\n"; print POLAR "$info{\"file_path\"} $info{\"file_name\"}\n"; $info{"longitude_westmost"} = round 2, maxp(@ncvarvalue); $info{"longitude_eastmost"} = round 2, minp(@ncvarvalue); } # read max and min temperature my $temperature_exist = 0; for ($ncvarid = 0; $ncvarid < $ncnvars; $ncvarid++) { die "Could not inquire variable with id $ncvarid in file \"$ncfilename\"\n" if (NetCDF::varinq($ncid, $ncvarid, \$ncvarname, \$ncvartype, \$ncvarndims, \@ncvardimids, \$ncvarnatts) == -1); push @varnames, $ncvarname if ($ncvarname =~ m/[A-Z]+/); if($ncvarname eq "temperature") { $temperature_exist = 1; } } if($temperature_exist == 1) { $ncvarid = NetCDF::varid($ncid, "temperature"); @ncvarstart = ( 0 ); @ncvarcount = ( $dim_time_size ); @ncvarvalue = (); die "Could not get variable \"temperature\" in file \"$ncfilename\"\n" if (NetCDF::varget($ncid, $ncvarid, @ncvarstart, @ncvarcount, \@ncvarvalue) == -1); $info{"temperature_min"} = round 2, min(@ncvarvalue); $info{"temperature_max"} = round 2, max(@ncvarvalue); } else { $info{"temperature_min"} = ""; $info{"temperature_max"} = ""; } $info{"depth_min"} = 0; $info{"depth_max"} = 0; # read min and max date my $date_exist = 0; for ($ncvarid = 0; $ncvarid < $ncnvars; $ncvarid++) { die "Could not inquire variable with id $ncvarid in file \"$ncfilename\"\n" if (NetCDF::varinq($ncid, $ncvarid, \$ncvarname, \$ncvartype, \$ncvarndims, \@ncvardimids, \$ncvarnatts) == -1); push @varnames, $ncvarname if ($ncvarname =~ m/[A-Z]+/); if($ncvarname eq "woce_date") { $date_exist = 1; } } if($date_exist == 1) { $ncvarid = NetCDF::varid($ncid, "woce_date"); die "Could not get id for variable \"woce_date\" in file \"$ncfilename\"\n" if ($ncvarid == -1); @ncvarstart = ( 0 ); @ncvarcount = ( $dim_time_size ); @ncvarvalue = (); die "Could not get variable \"woce_date\" in file \"$ncfilename\"\n" if (NetCDF::varget($ncid, $ncvarid, @ncvarstart, @ncvarcount, \@ncvarvalue) == -1); $info{"woce_date_min"} = min(@ncvarvalue); $info{"woce_date_max"} = max(@ncvarvalue); } else { $info{"woce_date_min"} = 0; $info{"woce_date_max"} = 0; print NO_DATE "$info{\"filename\"}\n"; } # read min and max speed my $speed_exist = 0; for ($ncvarid = 0; $ncvarid < $ncnvars; $ncvarid++) { die "Could not inquire variable with id $ncvarid in file \"$ncfilename\"\n" if (NetCDF::varinq($ncid, $ncvarid, \$ncvarname, \$ncvartype, \$ncvarndims, \@ncvardimids, \$ncvarnatts) == -1); push @varnames, $ncvarname if ($ncvarname =~ m/[A-Z]+/); if($ncvarname eq "SPD") { $speed_exist = 1; } } if($speed_exist == 1) { $ncvarid = NetCDF::varid($ncid, "SPD"); @ncvarstart = ( 0 ); @ncvarcount = ( $dim_time_size ); @ncvarvalue = (); die "Could not get variable \"wind_speed\" in file \"$ncfilename\"\n" if (NetCDF::varget($ncid, $ncvarid, @ncvarstart, @ncvarcount, \@ncvarvalue) == -1); $info{"wind_speed_min"} = round 2, min(@ncvarvalue); $info{"wind_speed_max"} = round 2, max(@ncvarvalue); } else { $info{"wind_speed_min"} = ""; $info{"wind_speed_max"} = ""; } # read variable names DATAVAR: foreach my $datavar (@nc_info_datavars) { $info{$datavar} = 0; foreach my $varname (@varnames) { if ($varname =~ m/$datavar([0-9]*)/) { $info{$datavar} = 1; next DATAVAR; } } } # read experiment ID die "Could not get attribute \"EXPOCODE\" in file \"$ncfilename\"\n" if (NetCDF::attget($ncid, NetCDF::GLOBAL, "EXPOCODE", \$ncattvalue) == -1); # for some reason, $ncattvalue gets an extra \0 at the end, so we # need to get rid of it $ncattvalue =~ s/\0//; $info{"EXPOCODE"} = $ncattvalue; # read die "Could not get attribute \"site\" in file \"$ncfilename\"\n" if (NetCDF::attget($ncid, NetCDF::GLOBAL, "site", \$ncattvalue) == -1); $info{"site"} = $ncattvalue;# . "_test"; # print "$info{\"site\"} " . length $info{"site"} . " "; if((length $info{"site"}) lt 23) { chop $info{"site"}; } die "Could not close file \"$ncfilename\"\n" if NetCDF::close($ncid); return \%info; } ######################################################################## # convert the hash returned from the above function to a string so it # can be saved into a file sub nc_info_to_string (\%) { *info = shift(@_); return join("\t", @info{@nc_info_keys}); } ######################################################################## # convert a string (from a file) into a hash like the one returned # from the function above sub string_to_nc_info ($) { my %info = (); @info{@nc_info_keys} = split /,/, shift(@_); return \%info; } # process arguments while ($argument = shift(@ARGV)) { SWITCH: { $argument =~ /^-f$/ && do { $indexfile = shift(@ARGV); last SWITCH; }; $argument =~ /^-v$/ && do { $verbose = 1; last SWITCH; }; $argument =~ /^-h$/ && do { die "Usage: mkinv [ -f indexfile ] [ -v ] [ -h ]\n"; }; } } open POLAR, ">polar.txt"; $main::number = 0; open INDEXFILE, ">$indexfile" or die "Could not open \"$indexfile\"\n"; print INDEXFILE join("\t", @nc_info_keys), "\n"; foreach $main::i (@basedir) { chdir $main::i; print "Using base directory $main::i\n" if ($verbose); print "Using index file $indexfile\n" if ($verbose); open FINDPIPE, 'find . -type f -name "*.nc" |' or die "Could not run find\n"; while ($file = ) { $ship = ""; chop $file; $file =~ s/^\.\///; ($ship, $version) = ($file =~ /^([A-Z0-9]+)\/[-_A-Z0-9]+\/[A-Z0-9]+\.[0-9]{9}v([0-9]{3})\.nc$/); if ( !defined($version)) {# && $version eq "woce" ) { print "Processing $main::i/$file\n" if ($verbose); *info = read_nc_info($file,$main::dr[$main::number]); print INDEXFILE nc_info_to_string(%info), "\n"; } else { print "Skipping $main::i/$file\n" if ($verbose); } } ++$main::number; close FINDPIPE; } # Buoy Data $main::number = 2; $basedir = "/Net/woce/cd3/MET/data/woce_buoys"; chdir $basedir; open FINDPIPE, 'find . -type f -name "*.nc" |' or die "Could not run find\n"; while ($file = ) { $ship = ""; chop $file; $file =~ s/^\.\///; ($ship, $version) = ($file =~ /^([A-Z0-9]+)\/[A-Z0-9]+\.[0-9]{9}v([0-9]{3})\.nc$/); if (!defined($version)) { print "Processing $basedir/$file\n" if ($verbose); *info = read_nc_info($file,$main::dr[$main::number]); print INDEXFILE nc_info_to_string(%info), "\n"; } else { print "Skipping $basedir/$file\n" if ($verbose); } } close INDEXFILE; close FINDPIPE; close NO_DATE; close POLAR; } else { die "Usage: mkinv -o -h\n -o \n REQUIRED. Name of output file to generate.\n Optional base directry to index.\n -h\n Prints this help message.\n"; }