def read_weather_data(r): '''Read weather data from reader r in fixed-width format. The field widths are: 4,2,2 YYYYMMDD (date) 2,2,2 DDMMSS (latitude) 2,2,2 DDMMSS (longitude) 6,6,6 FF.FFF (temp, deg. C; humidity, %; pressure, kPa) The result is a list of tuples: ((YY, MM, DD), (DD, MM, SS), (DD, MM, SS), (Temp, Hum, Press))''' fields = (((4, int), (2, int), (2, int)), # date ((2, int), (2, int), (2, int)), # latitude ((2, int), (2, int), (2, int)), # longitude ((6, float), (6, float), (6, float))) # data result = [] # For each record for line in r: start = 0 record = [] # for each group of fields in the record for group in fields: # for each field in the record values = [] for (width, target_type) in group: # convert the text text = line[start:start+width] field = target_type(text) # add it to the record values.append(field) # move on start += width # add these values to the record record.append(tuple(values)) # add the completed record to the result result.append(record) return result
A tuple of tuples is easier to work with because values have been grouped into logical chunks. To get the longitude, for example, the programmer would get the third element of the record, then get its parts, rather than having to count along to get the seventh, eighth, and ninth values.
import sys from tsdl import skip_header def skip_header(r): '''Skip the header in reader r, and return the first real piece of data.''' # Read the description line (which must be present) and then # any comment lines that are also present. line = r.readline() line = r.readline() while line and line.startswith('#'): line = r.readline() # Now line contains the first real piece of data, or an empty # string if there was no data. return line def smallest_value_skip(r, default=0): '''Read and process reader r to find the smallest value after the TSDL header. Skip missing values, which are indicated with a hyphen.''' line = skip_header(r) if not line: return default # Now line contains the first data value; this is also the # smallest value found so far. smallest = int(line.strip()) for line in r: line = line.strip() # Only process line if it has a valid value. if line != '-': value = int(line) # Process value; if we find a smaller value, remember it. if value < smallest: smallest = value return smallest if __name__ == "__main__": input_file = open(sys.argv[1], "r") print smallest_value_skip(input_file) input_file.close()
def get_line(r): '''Return the next interesting line from the reader, or an empty string if there are no more interesting lines.''' line = r.readline().strip() while line: if line[:4] == 'CMNT': return '' line = r.readline().strip() return '' def read_molecule(r): '''Read a single molecule from reader r and return it, or return None to signal end of file.''' # If there isn't another line, we're at the end of the file. line = get_line(r) if not line: return None # Name of the molecule: "COMPND name" key, name = line.split() # Other lines are either "END" or "ATOM num type x y z" molecule = [name] reading = True while reading: line = get_line(r) if line.startswith('END'): reading = False else: key, num, type, x, y, z = line.split() molecule.append((type, x, y, z)) return molecule