Python Geocoder

Steps of the program

  • Parsing raw HTML and saving the necessary data
  • Manipulating this data for export to a CSV file to submit to the Census API
  • Submitting data to the Census API, formatting and saving the response
  • Manipulating the response to extract longitude and latitude data
  • Combining necessary data for ArcMap
  • Importing this data through ArcMap and displaying a on a map.

Please keep in mind that this was a programming exercise, not a cartographical one. Had that been the case, the output map would have been styled much more professionally. As it is, it was more proof-of-concept than anything else


In [ ]:
from lxml import html  #for scraping HTML data
import requests # for handling API requests
import csv # for input/output of CSV files
import arcpy #needed for ArcMap
import os

#delcaring all lists as empty
teamName = []
teamAddress = []
teamStreet = []
teamCity = []
teamState = [] #optional for API.  Will be submitted in CSV as blank/comma filled
teamZIP = [] #optional for API.  Will be submitted in CSV as blank/comma filled
teamID = []
combined = []

#Web site for HTML to be scraped
page = requests.get('Ā¬str=true&ha=&search=55358&country=&value=600&tagshow=showall&tag=All+Club+Men')
tree = html.fromstring(page.content)

#for loop to scrape web data.  Looks for each div with a class name = "result"
#the  parses the data with in that div
for result in tree.xpath('//div[@class="result"]'):
    teamName.append(result.xpath('div[@class="storename"]/text()')) #finds team names and places them in the list declared above
    teamAddress.append(result.xpath('div[@class="storeaddress"]/text()'))#finds team address and places them in the list declared above. Use append over extend to keep formatting for CSV       teamZIP.append(result.xpath('div[@class="storepostalcode"]/text()'))

#creates list of numbers to match length of teamName
#needed for first column of Census API CSV template
#adds one to each number in teamID. Python counts from zero, template starts at one
for j, _name in enumerate(teamName):

#For splitting teamCity from teamAddress
for t in teamAddress:
    if (len(t) == 1) or (len(t)==2):
    elif (len(t)== 3):

#strips whitespace and commas from teamCity
teamCity = [q.strip() for q in teamCity]
teamCity = [w.replace(",","") for w in teamCity]

#add correct number of commas for state and ZIP. Lists cannot be empty for template
for j, _name in enumerate(teamName):

#combines data and adds to CSV file.  teamName to be added later as it does not conform to template (match teamID as key)
combined = zip(teamID,teamStreet,teamCity,teamState,teamZIP)
with open('textRugbyZIP.csv', 'wb') as textRugby:
    textRugbyWriter = csv.writer(textRugby)
    for row in combined:

#writes club names with ID for later join inside ArcMap
teamIDheader = [("id","Team Name")]
teamIDKey = zip(teamID,teamName)
teamIDKey = teamIDheader + teamIDKey
with open('teamIDKey.csv', "w+") as idCSV:
    idWriter = csv.writer(idCSV)
    for row in teamIDKey:

#takes created CSV and uploads to Census API.  Using pre-created data at this point.
url = ""
data = {'benchmark':9}
files = {'addressFile': open('textRugbyZIP.csv')}
response =, data=data, files=files, verify=False)
apiData = response.text

#reads response from API into new CSV,
with open('export.csv',"w+") as csvWriting:

withClubsRemoved = [] #after removing entries with no geocode result
extractTeamID = []
extractTeamAddress = []
extractTeamLongLat = []

# removes entries with no match
with open('export.csv', "r+") as removeNoData:
    clubDataTuple = [tuple(line) for line in csv.reader(removeNoData)]
withClubsRemoved = [t for t in clubDataTuple if t[2]=="Match"]
extractTeamID = [t[0] for t in withClubsRemoved]
extractTeamAddress = [t[4] for t in withClubsRemoved]
extractTeamLongLat = [t[5] for t in withClubsRemoved]

#retrives Lat/Long data, separates by comma and adds each seperately
for j in extractTeamLongLat:

clubLong = []
clubLat = []

#splits lat and long
for j in clubLongLat:

#clubsLatLong match to teamIDKey via

forCSV = zip(extractTeamID, extractTeamAddress, clubLong, clubLat)

csvHeader = [("id","Address","Long","Lat")]
readyforCSV = csvHeader+forCSV

#writes to CSV for importing into ArcMap
with open("clubXYData.csv", "wb") as XYData:
    XYDataWriter = csv.writer(XYData)
    for row in readyforCSV:

#Files to import into ArcMap
XYFile = r"C:\Users\ame\Documents\PythonExperiments\clubXYdata.csv"
ClubsIDFile = r"C:\Users\ame\Documents\PythonExperiments\teamIDKey.csv"

#GDB in which to save created tables
out_gdb = r"C:\Users\ame\Documents\PythonExperiments\Rugby.gdb"

#import CSV and convert to tables
arcpy.TableToTable_conversion(XYFile, out_gdb, 'clubXYTable')
arcpy.TableToTable_conversion(ClubsIDFile, out_gdb, 'clubIDTable')

#Join tables together

#add XY data to map
arcpy.MakeXYEventLayer_management ('clubXYTable', 'clubXYTable.Long','clubXYTable.Lat', 'clubsPlotted')

# get the map document
mxd = arcpy.mapping.MapDocument("CURRENT")

# get the data frame
df = arcpy.mapping.ListDataFrames(mxd,"*")[0]

# create a new layer
newlayer = arcpy.mapping.Layer(r'C:\Users\ame\Documents\Shared_Shapefiles\US_States\Contiguous48.shp')

# add the layer to the map at the bottom of the TOC in data frame 0
arcpy.mapping.AddLayer(df, newlayer,"BOTTOM")

#adding team name labels
toLable = arcpy.mapping.ListLayers(mxd, "")[0]
toLable.showLabels = True

#Exporting to PDF


Rugby club map

(c) 2017 Andrew McGuire First written for GEOG 2113 GIS Applications at Itasca Community College