# Automated loading and housekeeping of e-MERLIN data.
# LOCAL JBCA VERSION
# Original version: mkargo 20110717

###############################################################################
# This script will load data directly into AIPS from the online e-MERLIN
# archive at Jodrell. You can run this script from anywhere on the computer you
# are loading the data to, but....
#
# ***** YOU MUST BE ON THE JODRELL SYSTEM FOR THIS TO WORK! *****
#
# The first thing the script does is set up the list of base directories where
# e-MERLIN archive data are located.  *** SET THIS WHEN THE ARCHIVE CHANGES! ***
#
# Options: you can specify a list of dates in two ways, either by a list in an
# input file, or interactively when you run the script.
# 
# If you have a list of the dates you wish to load data for, put them in a text
# file in the following format:
# 	20110302
#	20110304
#	20110305
#	etc.
# Alternatively, run the script with no arguments and it will ask you for a 
# list of dates.
#
# The script will search the archive and load all files in all directories that
# exist for each specified date.
#
# If the script fails, the first thing to do is run 'source /aips/LOGIN.CSH'.
###############################################################################

# Changelog
# 20110807 - removed the need for any specific filename structure, as long as the file names 
# 	are shorter than 46 characters and end ".fits" they will be loaded.  Also now handles 
#	different directories with inseq, making catalog entries unique without risking names 
#	which are too long for AIPS.


##########################################################
# ******* SET THESE WHENEVER THE ARCHIVE CHANGES ******* #
##########################################################

# "archive" is a comma-separated list of all the directories containing (useful) e-MERLIN archive data
archive = ('/scratch/arch01_1/emerlin/', '/scratch/arch01_2/emerlin/')
# "obsstart" is the earliest date we expect to find, the script will die if the user requests data before this date
obsstart = 20100417


##########################################################
# You shouldn't need to change anything below this line. #
##########################################################


# Import useful things

import os, re, time, datetime, sys, fnmatch
from os.path import join, getsize
from datetime import date
import Utilities
from AIPS import AIPS
from AIPSTask import AIPSTask, AIPSList
from AIPSData import AIPSUVData, AIPSImage


print "-----------------------------------------------------"
print "Load e-MERLIN data into AIPS direct from the archive."
print "-----------------------------------------------------"


# Get AIPS user number and disk info
print "Enter your AIPS number: ",
usernum = raw_input()
if usernum.isdigit() :
	usernum = int(usernum)
else :
	print "Does not compute."
	sys.exit()
print "Enter the AIPS disk to use: ",
indisk = raw_input()
if indisk.isdigit() :
	indisk = int(indisk)
else :
	print "Does not compute."
	sys.exit()
outdisk = indisk



# Set up AIPS things

today = date.today()
now = today.strftime("%Y%m%d")
mylist=[]

AIPS.userno = usernum
fitld = AIPSTask('FITLD')
uvfix = AIPSTask('UVFIX')
zap = AIPSTask('ZAP')
msort = AIPSTask('MSORT')
msort.sort = 'TB'
indxr = AIPSTask('INDXR')

# AIPS is chatty. Supress most messages on the terminal but log everything.
AIPSTask.msgkill = -4
AIPS.log = open('eMERLIN_loadlocal.log', 'w')							


def isvalid(y):
	"""Parse a given string and check it's a valid date."""
	if y.isdigit() :
		pass
	else :
		print "Error: not a date."
		return False
	# check for strings with the wrong length
	if (len(y)<8 or len(y)>8) and int(y)>0 :
		print "Error: that does not look like a valid date."
		return False
	year=int(y[0:4])
	month=int(y[4:6])
	day=int(y[6:8])
	# check for invalid dates
	if (year > today.year or month>12 or month<1 or day>31 or day<1) :
		print "Error: date out of sensible range."
		return False
	if year<=2010 and month<=4 and day<17 and year>0 :
		print "Error: e-MERLIN was not operational before that!"
		return False
	#syear=int(obsstart[0:4])
	#smonth=int(obsstart[4:6])
	#sday=int(obsstart[6:8])
	if y < obsstart :
		print "Error: there are no data in the archive before", obsstart
		return False
	return y

def nodot(item):
	"""Filter out hidden files."""
	return item[0] != '.'

#def isfits(item):
#	thing = os.path.splitext(item)
#	if thing[1] == '.fits' :
#		return item


####################
# Start doing stuff
####################


# check we're at Jodrell, otherwise quit
if os.path.exists('/scratch/arch01_1/emerlin/'):
	print "Checking we can see the archive.... OK, proceeding."
else :
	print "Error: you don't appear to be on the Jodrell system. This script "
	print "is meant for direct loading from the e-MERLIN archive ONLY."
	print "Check you are where you think you are."
	sys.exit()



# Did the user give us an input file or do we need to ask for input?
if len(sys.argv)==2 :
	print "Loading data from directories specified in", sys.argv[1]
	afile = sys.argv[1]
	if os.path.isfile(afile) :
		inlist = open(afile, 'r')
	else :
		print "Error: cannot open file", afile
		sys.exit()
	for line in inlist :
		line = line.rstrip('\r\n')
		if isvalid(line):
			mylist.append(line,)
	inlist.close()
else :
	print "Enter a list of dates you wish to load, 0 ends the list."
	year=1
	while year>0 :					# loop until user ends list
		line=raw_input()
		if int(line)==0 :			# check for end of loop condition
			break
		if isvalid(line):
			mylist.append(line,)


mylist.sort()				# Sort the list before we load (makes DBCON more efficient later)


# List directories in the archive, check for all instances of the dates
# given in "list" and append to a new list.

loadlist=[]
tmplist=[]
fulllist=[]
for dir in archive:
    tmplist.append([f for f in os.listdir(dir)
       if os.path.isdir(os.path.join(dir, f))])

thing=len(tmplist)
i=0
while i<thing:
	j=0
	for item in tmplist[i]:
		newdir=os.path.join(archive[i], tmplist[i][j])
		fulllist.append(newdir,)
		j = j+1
	i = i + 1

fulllist.sort()

for tmpdate in mylist :
	for tmpdir in fulllist :
		if tmpdate in tmpdir :
			loadlist.append(tmpdir,)





# Proceed with loading the data

thisdir = 0
nfiles = 0
filelist = []
fitslist = []
for dir in loadlist:						# for each sub-directory...
	thisdir = thisdir + 1
	numdirs = len(dir)
	mylist = [f for f in filter(nodot, os.listdir(dir))	# make a list of files to load
	if os.path.isfile(os.path.join(dir, f))]
	for fitsfile in mylist:					# tidy up the list (remove dotfiles and anything not .fits)
		if isfits(fitsfile):
			fitslist.append(fitsfile)

	for fitsfile in fitslist:				# for each file...
		#index = fitsfile.find('_')
		if fnmatch.fnmatch(fitsfile, '*.fits'):
			descriptor = dir + "/" + fitsfile
			print "Loading " + descriptor
			fitld.datain = descriptor
			outn = 'TMP'
			fitld.outdata = AIPSUVData(outn,'UVDATA',outdisk,thisdir)
			fitld.digicor = -1
			fitld.douvcomp = -1
			fitld.go()

			# reset the filename based on the source name in the SU table
			datafile = AIPSUVData('TMP','UVDATA',outdisk,thisdir)
			sourcelist = datafile.table('SU',1)
			outn = datafile.sources[0]
			if len(outn)>12 :
				outn = outn[0:12]		# catch long source names
			#filelist.append(outn)			# filelist is an array of AIPS outnames loaded
			inname = outn

			# uvfix each file
			uvfix.indata =  AIPSUVData('TMP','UVDATA',indisk,thisdir)
			uvfix.outdata = AIPSUVData(inname,'UVFIX',indisk,thisdir)
			uvfix.go()

			# msort each file, writing back to same file
			msort.indata = AIPSUVData(inname,'UVFIX',indisk,thisdir)
			msort.outdata = AIPSUVData(inname,'UVFIX',indisk,thisdir)
			msort.go()

			# index each file
			indxr.indata = AIPSUVData(inname,'UVFIX',indisk,thisdir)
			indxr.go()

			# remove now-superfluous UVDATA files
			mydatain = AIPSUVData('TMP','UVDATA',indisk,thisdir)
			mydatain.zap()




print "Your AIPS catalog looks like this:"
print AIPSCat(indisk)

print "Your data are now loaded in AIPS under user number " + format(AIPS.userno) + " on disk " + format(outdisk) + "."
print "You should carefully edit them before proceeding with calibration."


