#!/usr/bin/env python # Typical usage: playmeso.py latest.mdf.pickle # See comments below for "SUB-TASK 1", "SUB-TASK 2" and "SUB-TASK 3". These specify # revisions to this script that you must make. When everything works, # post your script in your password-protected directory and notify the # instructor that it is ready to be graded. import sys,cPickle,math #try to get a name of the pickle file from the command line: try: filename=sys.argv[1] except: filename=None print "filename was not entered from command line" filename=raw_input("enter filename=>") if filename=="": filename="latest.mdf.pickle" #default #try to open that file: try: inf=open(filename,'r') print "opened",filename except: print "trouble opening",filename sys.exit() #try to load the dictionary (notice I do not use try/except here) h=cPickle.load(inf) print "loaded an object with type:",type(h) #Prepare a list of (ta9m,stid) to sort from low to high ta9m: allt=[] #initialize empty list for key in h.keys(): bundle=(h[key]['TA9M'],key) #bundle is a tuple, (a list would also be ok) if bundle[0]>-989.: allt.append(bundle) allt.sort() #list is sorted by the the first element inside the inner tuple, print "\nfirst tuple:", allt[0] print "last tuple:",allt[-1] print "the maximum temperature is %5.1f C, at %s" % allt[-1] print "the minimum temperature is %5.1f C, at %s" % allt[0] # Define a simple function for gives the numbers of missing values # in a mesonet station record: def numberMissing(alist): n=0 for i in alist: if i<-989.: n+=1 return n #Now use that function: keys=h.keys() keys.sort() allm=[] for key in keys: nmiss=numberMissing(h[key].values()) allm.append([nmiss,key]) allm.sort() allm.reverse() outfile=open('offenders.dat','w') #print "\n".join(dir(outfile)) #uncomment this to see methods available for outfile print"\n number of missing measurements at the site (top ten offenders):" for nmiss,site in allm[0:10]: print >>outfile, site,nmiss print site,nmiss outfile.close() #reverse the dictionary of dictionaries, making new dictonary z: z={} #initialize empty dictionary skeys=h[keys[0]].keys() #get the "sub-keys" for i in skeys: #use sub-keys as new main-keys z[i]={} for j in keys: #use main-keys as new sub-keys z[i][j]=h[j][i] #use the new dictionary to find number of missing values: print "\n fraction of sites with missing measurements:" nsites=len(h.keys()) outfile2=open('missmeas.dat','w') for key in skeys: fracmiss=1.*numberMissing(z[key].values())/nsites #note decimal multiplication, 1.* outstring="%s has %4.1f%% missing" % (key,100*fracmiss) print outstring outfile2.write(outstring+'\n') #notice explicit newline \n outfile2.close() #SUB-TASK 1: Fix the above to print/write only the keys with >0% missing, sorted alphabetically #by the key. (No need to use the "bundle method" for that). But ALSO append the #same information to outfile2, sorted by worst offender. (Use bundle for that). #do an experiment with zip function: listoflists=[[1,2],[3,4]] print "\n zip experiment:", zip(*listoflists) #you should see it transposed; note * expands list to separate args #now use zip to "transpose" allt: nomisst=zip(*allt)[0] #first list is now a list of valid (not missing) temperatures def mean(alist): assert alist==type([]) or type(()) return sum(alist)/len(alist) print "\n average temp:",mean(nomisst) # wssda=[] #initialize empty list newinda=[] #initialize empty list for key in h.keys(): wssd=h[key]['WSSD'] wspd=h[key]['WSPD'] wmax=h[key]['WMAX'] if wssd<-989. or wspd<-989. or wmax<-989.: break if wspd==0.: break newind=(wmax-wspd)/wspd #make a new index # print key,newind,wssd #uncomment this to see that newind and wssd appear correlated newinda.append(newind) wssda.append(wssd) # SUB-TASK 2: Update the dictionary h to include a new sub-key 'NEWI' # which contains the value newind. Write a file new.pickle, containing this updated # version of h. (easy) But check that your new.pickle works, with pickinsp.py. # SUB-TASK 3: Now study the correlation of newinda with wssda. # For info on correlation, see # http://www.tufts.edu/~gdallal/corr.htm # You need to complete the following function "corr". print " You may need to use sqrt, for example;",math.sqrt(2.) def corr(a,b): return "this does not work yet!" #now let's use the fuction corr: print "correlation of wssd with newind:",corr(wssda,newinda) wdsda=[] #stuff WDSD in here for key in h.keys(): wdsda.append(h[key]['WDSD']) print "correlation of wdsd with wssd:",corr(wdsda,wssda)