""" shapefile.py Provides read and write support for ESRI Shapefiles. author: jlawheadgeospatialpython.com date: 20110927 version: 1.1.4 Compatible with Python versions 2.4-3.x """ from struct import pack, unpack, calcsize, error import os import sys import time import array # # Constants for shape types NULL = 0 POINT = 1 POLYLINE = 3 POLYGON = 5 MULTIPOINT = 8 POINTZ = 11 POLYLINEZ = 13 POLYGONZ = 15 MULTIPOINTZ = 18 POINTM = 21 POLYLINEM = 23 POLYGONM = 25 MULTIPOINTM = 28 MULTIPATCH = 31 PYTHON3 = sys.version_info[0] == 3 def b(v): if PYTHON3: if isinstance(v, str): # For python 3 encode str to bytes. return v.encode('utf-8') elif isinstance(v, bytes): # Already bytes. return v else: # Error. raise Exception('Unknown input type') else: # For python 2 assume str passed in and return str. return v def u(v): if PYTHON3: if isinstance(v, bytes): # For python 3 decode bytes to str. return v.decode('utf-8') elif isinstance(v, str): # Already str. return v else: # Error. raise Exception('Unknown input type') else: # For python 2 assume str passed in and return str. return v def is_string(v): if PYTHON3: return isinstance(v, str) else: return isinstance(v, basestring) class _Array(array.array): """Converts python tuples to lits of the appropritate type. Used to unpack different shapefile header parts.""" def __repr__(self): return str(self.tolist()) class _Shape: def __init__(self, shapeType=None): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are usually point, polyline, or polygons. Every shape type except the "Null" type contains points at some level for example verticies in a polygon. If a shape type has multiple shapes containing points within a single geometry record then those shapes are called parts. Parts are designated by their starting index in geometry record's list of shapes.""" self.shapeType = shapeType self.points = [] class _ShapeRecord: """A shape object of any type.""" def __init__(self, shape=None, record=None): self.shape = shape self.record = record class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" pass class Reader: """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, .dbf) is missing no exception is thrown until you try to call a method that depends on that particular file. The .shx index file is used if available for efficiency but is not required to read the geometry from the .shp file. The "shapefile" argument in the constructor is the name of the file you want to open. You can instantiate a Reader without specifying a shapefile and then specify one later with the load() method. Only the shapefile headers are read upon loading. Content within each file is only accessed when required and as efficiently as possible. Shapefiles are usually not large but they can be. """ def __init__(self, *args, **kwargs): self.shp = None self.shx = None self.dbf = None self.shapeName = "Not specified" self._offsets = [] self.shpLength = None self.numRecords = None self.fields = [] self.__dbfHdrLength = 0 # See if a shapefile name was passed as an argument if len(args) > 0: if type(args[0]) is type("stringTest"): self.load(args[0]) return if "shp" in kwargs.keys(): if hasattr(kwargs["shp"], "read"): self.shp = kwargs["shp"] if hasattr(self.shp, "seek"): self.shp.seek(0) if "shx" in kwargs.keys(): if hasattr(kwargs["shx"], "read"): self.shx = kwargs["shx"] if hasattr(self.shx, "seek"): self.shx.seek(0) if "dbf" in kwargs.keys(): if hasattr(kwargs["dbf"], "read"): self.dbf = kwargs["dbf"] if hasattr(self.dbf, "seek"): self.dbf.seek(0) if self.shp or self.dbf: self.load() else: raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") def load(self, shapefile=None): """Opens a shapefile from a filename or file-like object. Normally this method would be called by the constructor with the file object or file name as an argument.""" if shapefile: (shapeName, ext) = os.path.splitext(shapefile) self.shapeName = shapeName try: self.shp = open("%s.shp" % shapeName, "rb") except IOError: raise ShapefileException("Unable to open %s.shp" % shapeName) try: self.shx = open("%s.shx" % shapeName, "rb") except IOError: raise ShapefileException("Unable to open %s.shx" % shapeName) try: self.dbf = open("%s.dbf" % shapeName, "rb") except IOError: raise ShapefileException("Unable to open %s.dbf" % shapeName) if self.shp: self.__shpHeader() if self.dbf: self.__dbfHeader() def __getFileObj(self, f): """Checks to see if the requested shapefile file object is available. If not a ShapefileException is raised.""" if not f: raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") if self.shp and self.shpLength is None: self.load() if self.dbf and len(self.fields) == 0: self.load() return f def __restrictIndex(self, i): """Provides list-like handling of a record index with a clearer error message if the index is out of bounds.""" if self.numRecords: rmax = self.numRecords - 1 if abs(i) > rmax: raise IndexError("Shape or Record index out of range.") if i < 0: i = range(self.numRecords)[i] return i def __shpHeader(self): """Reads the header information from a .shp or .shx file.""" if not self.shp: raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") shp = self.shp # File length (16-bit word * 2 = bytes) shp.seek(24) self.shpLength = unpack(">i", shp.read(4))[0] * 2 # Shape type shp.seek(32) self.shapeType= unpack("2i", f.read(8)) shapeType = unpack(" -10e38: record.m.append(m) else: record.m.append(None) # Read a single point if shapeType in (1,11,21): record.points = [_Array('d', unpack("<2d", f.read(16)))] # Read a single Z value if shapeType == 11: record.z = unpack("i", shx.read(4))[0] * 2) - 100 numRecords = shxRecordLength // 8 # Jump to the first record. shx.seek(100) for r in range(numRecords): # Offsets are 16-bit words just like the file length self._offsets.append(unpack(">i", shx.read(4))[0] * 2) shx.seek(shx.tell() + 4) if not i == None: return self._offsets[i] def shape(self, i=0): """Returns a shape object for a shape in the the geometry record file.""" shp = self.__getFileObj(self.shp) i = self.__restrictIndex(i) offset = self.__shapeIndex(i) if not offset: # Shx index not available so use the full list. shapes = self.shapes() return shapes[i] shp.seek(offset) return self.__shape() def shapes(self): """Returns all shapes in a shapefile.""" shp = self.__getFileObj(self.shp) shp.seek(100) shapes = [] while shp.tell() < self.shpLength: shapes.append(self.__shape()) return shapes def __dbfHeaderLength(self): """Retrieves the header length of a dbf file header.""" if not self.__dbfHdrLength: if not self.dbf: raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") dbf = self.dbf (self.numRecords, self.__dbfHdrLength) = \ unpack("6i", 9994,0,0,0,0,0)) # File length (Bytes / 2 = 16-bit words) if headerType == 'shp': f.write(pack(">i", self.__shpFileLength())) elif headerType == 'shx': f.write(pack('>i', ((100 + (len(self._shapes) * 8)) // 2))) # Version, Shape type f.write(pack("<2i", 1000, self.shapeType)) # The shapefile's bounding box (lower left, upper right) if self.shapeType != 0: try: f.write(pack("<4d", *self.bbox())) except error: raise ShapefileException("Failed to write shapefile bounding box. Floats required.") else: f.write(pack("<4d", 0,0,0,0)) # Elevation z = self.zbox() # Measure m = self.mbox() try: f.write(pack("<4d", z[0], z[1], m[0], m[1])) except error: raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") def __dbfHeader(self): """Writes the dbf header and field descriptors.""" f = self.__getFileObj(self.dbf) f.seek(0) version = 3 year, month, day = time.localtime()[:3] year -= 1900 # Remove deletion flag placeholder from fields for field in self.fields: if field[0].startswith("Deletion"): self.fields.remove(field) numRecs = len(self.records) numFields = len(self.fields) headerLength = numFields * 32 + 33 recordLength = sum([int(field[2]) for field in self.fields]) + 1 header = pack('2i", recNum, 0)) recNum += 1 start = f.tell() # Shape Type f.write(pack("i", length)) f.seek(finish) def __shxRecords(self): """Writes the shx records.""" f = self.__getFileObj(self.shx) f.seek(100) for i in range(len(self._shapes)): f.write(pack(">i", self._offsets[i] // 2)) f.write(pack(">i", self._lengths[i])) def __dbfRecords(self): """Writes the dbf records.""" f = self.__getFileObj(self.dbf) for record in self.records: if not self.fields[0][0].startswith("Deletion"): f.write(b(' ')) # deletion flag for (fieldName, fieldType, size, dec), value in zip(self.fields, record): fieldType = fieldType.upper() size = int(size) if fieldType.upper() == "N": value = str(value).rjust(size) elif fieldType == 'L': value = str(value)[0].upper() else: value = str(value)[:size].ljust(size) assert len(value) == size value = b(value) f.write(value) def null(self): """Creates a null shape.""" self._shapes.append(_Shape(NULL)) def point(self, x, y, z=0, m=0): """Creates a point shape.""" pointShape = _Shape(self.shapeType) pointShape.points.append([x, y, z, m]) self._shapes.append(pointShape) def line(self, parts=[], shapeType=POLYLINE): """Creates a line shape. This method is just a convienience method which wraps 'poly()'. """ self.poly(parts, shapeType, []) def poly(self, parts=[], shapeType=POLYGON, partTypes=[]): """Creates a shape that has multiple collections of points (parts) including lines, polygons, and even multipoint shapes. If no shape type is specified it defaults to 'polygon'. If no part types are specified (which they normally won't be) then all parts default to the shape type. """ polyShape = _Shape(shapeType) polyShape.parts = [] polyShape.points = [] for part in parts: polyShape.parts.append(len(polyShape.points)) for point in part: # Ensure point is list if not isinstance(point, list): point = list(point) # Make sure point has z and m values while len(point) < 4: point.append(0) polyShape.points.append(point) if polyShape.shapeType == 31: if not partTypes: for part in parts: partTypes.append(polyShape.shapeType) polyShape.partTypes = partTypes self._shapes.append(polyShape) def field(self, name, fieldType="C", size="50", decimal=0): """Adds a dbf field descriptor to the shapefile.""" self.fields.append((name, fieldType, size, decimal)) def record(self, *recordList, **recordDict): """Creates a dbf attribute record. You can submit either a sequence of field values or keyword arguments of field names and values. Before adding records you must add fields for the record values using the fields() method. If the record values exceed the number of fields the extra ones won't be added. In the case of using keyword arguments to specify field/value pairs only fields matching the already registered fields will be added.""" record = [] fieldCount = len(self.fields) # Compensate for deletion flag if self.fields[0][0].startswith("Deletion"): fieldCount -= 1 if recordList: [record.append(recordList[i]) for i in range(fieldCount)] elif recordDict: for field in self.fields: if field[0] in recordDict: val = recordDict[field[0]] if val: record.append(val) else: record.append("") if record: self.records.append(record) def shape(self, i): return self._shapes[i] def shapes(self): """Return the current list of shapes.""" return self._shapes def saveShp(self, target): """Save an shp file.""" if not hasattr(target, "write"): target = os.path.splitext(target)[0] + '.shp' if not self.shapeType: self.shapeType = self._shapes[0].shapeType self.shp = self.__getFileObj(target) self.__shapefileHeader(self.shp, headerType='shp') self.__shpRecords() def saveShx(self, target): """Save an shx file.""" if not hasattr(target, "write"): target = os.path.splitext(target)[0] + '.shx' if not self.shapeType: self.shapeType = self._shapes[0].shapeType self.shx = self.__getFileObj(target) self.__shapefileHeader(self.shx, headerType='shx') self.__shxRecords() def saveDbf(self, target): """Save a dbf file.""" if not hasattr(target, "write"): target = os.path.splitext(target)[0] + '.dbf' self.dbf = self.__getFileObj(target) self.__dbfHeader() self.__dbfRecords() def save(self, target=None, shp=None, shx=None, dbf=None): """Save the shapefile data to three files or three file-like objects. SHP and DBF files can also be written exclusively using saveShp, saveShx, and saveDbf respectively.""" # TODO: Create a unique filename for target if None. if shp: self.saveShp(shp) if shx: self.saveShx(shx) if dbf: self.saveDbf(dbf) elif target: self.saveShp(target) self.shp.close() self.saveShx(target) self.shx.close() self.saveDbf(target) self.dbf.close() class Editor(Writer): def __init__(self, shapefile=None, shapeType=POINT, autoBalance=1): self.autoBalance = autoBalance if not shapefile: Writer.__init__(self, shapeType) elif is_string(shapefile): base = os.path.splitext(shapefile)[0] if os.path.isfile("%s.shp" % base): r = Reader(base) Writer.__init__(self, r.shapeType) self._shapes = r.shapes() self.fields = r.fields self.records = r.records() def select(self, expr): """Select one or more shapes (to be implemented)""" # TODO: Implement expressions to select shapes. pass def delete(self, shape=None, part=None, point=None): """Deletes the specified part of any shape by specifying a shape number, part number, or point number.""" # shape, part, point if shape and part and point: del self._shapes[shape][part][point] # shape, part elif shape and part and not point: del self._shapes[shape][part] # shape elif shape and not part and not point: del self._shapes[shape] # point elif not shape and not part and point: for s in self._shapes: if s.shapeType == 1: del self._shapes[point] else: for part in s.parts: del s[part][point] # part, point elif not shape and part and point: for s in self._shapes: del s[part][point] # part elif not shape and part and not point: for s in self._shapes: del s[part] def point(self, x=None, y=None, z=None, m=None, shape=None, part=None, point=None, addr=None): """Creates/updates a point shape. The arguments allows you to update a specific point by shape, part, point of any shape type.""" # shape, part, point if shape and part and point: try: self._shapes[shape] except IndexError: self._shapes.append([]) try: self._shapes[shape][part] except IndexError: self._shapes[shape].append([]) try: self._shapes[shape][part][point] except IndexError: self._shapes[shape][part].append([]) p = self._shapes[shape][part][point] if x: p[0] = x if y: p[1] = y if z: p[2] = z if m: p[3] = m self._shapes[shape][part][point] = p # shape, part elif shape and part and not point: try: self._shapes[shape] except IndexError: self._shapes.append([]) try: self._shapes[shape][part] except IndexError: self._shapes[shape].append([]) points = self._shapes[shape][part] for i in range(len(points)): p = points[i] if x: p[0] = x if y: p[1] = y if z: p[2] = z if m: p[3] = m self._shapes[shape][part][i] = p # shape elif shape and not part and not point: try: self._shapes[shape] except IndexError: self._shapes.append([]) # point # part if addr: shape, part, point = addr self._shapes[shape][part][point] = [x, y, z, m] else: Writer.point(self, x, y, z, m) if self.autoBalance: self.balance() def validate(self): """An optional method to try and validate the shapefile as much as possible before writing it (not implemented).""" #TODO: Implement validation method pass def balance(self): """Adds a corresponding empty attribute or null geometry record depending on which type of record was created to make sure all three files are in synch.""" if len(self.records) > len(self._shapes): self.null() elif len(self.records) < len(self._shapes): self.record() def __fieldNorm(self, fieldName): """Normalizes a dbf field name to fit within the spec and the expectations of certain ESRI software.""" if len(fieldName) > 11: fieldName = fieldName[:11] fieldName = fieldName.upper() fieldName.replace(' ', '_') # Begin Testing def test(): import doctest doctest.NORMALIZE_WHITESPACE = 1 doctest.testfile("README.txt", verbose=1) if __name__ == "__main__": """ Doctests are contained in the module 'pyshp_usage.py'. This library was developed using Python 2.3. Python 2.4 and above have some excellent improvements in the built-in testing libraries but for now unit testing is done using what's available in 2.3. """ test()