Batch Geocoding¶
This guide shows how to encode large datasets of GPS coordinates to Yoro codes — useful for geocoding CSV files, database records, or GeoJSON features.
CSV Geocoding¶
Encode a CSV file¶
import csv
import yoro
with open('locations.csv') as fin, open('locations_with_yoro.csv', 'w', newline='') as fout:
reader = csv.DictReader(fin)
fieldnames = reader.fieldnames + ['yoro_code', 'yoro_precision', 'resolution_m']
writer = csv.DictWriter(fout, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
lat = float(row['latitude'])
lon = float(row['longitude'])
code = yoro.encode(lat, lon, precision=12, domain='CI')
decoded = yoro.decode(code)
res = yoro.resolution(decoded['precision'], domain='CI')
row['yoro_code'] = code
row['yoro_precision'] = decoded['precision']
row['resolution_m'] = round(res, 1)
writer.writerow(row)
Input (locations.csv)¶
name,latitude,longitude
Abidjan Plateau,5.322,-4.017
Bouake Centre,7.694,-5.031
Yamoussoukro,6.827,-5.276
San Pedro Port,4.748,-6.636
Output (locations_with_yoro.csv)¶
name,latitude,longitude,yoro_code,yoro_precision,resolution_m
Abidjan Plateau,5.322,-4.017,CI-PW2MK,12,172.9
Bouake Centre,7.694,-5.031,CI-NX3WB,12,172.9
Yamoussoukro,6.827,-5.276,CI-NW64D,12,172.9
San Pedro Port,4.748,-6.636,CI-KYB8G,12,172.9
Pandas DataFrame¶
import pandas as pd
import yoro
df = pd.read_csv('locations.csv')
# Vectorized encoding
df['yoro_code'] = df.apply(
lambda row: yoro.encode(row['latitude'], row['longitude'], domain='CI'),
axis=1
)
# Add decoded info
decoded = df['yoro_code'].apply(yoro.decode)
df['yoro_lat'] = decoded.apply(lambda d: d['lat'])
df['yoro_lon'] = decoded.apply(lambda d: d['lon'])
df['cell_lat_min'] = decoded.apply(lambda d: d['bounds']['lat_min'])
df['cell_lat_max'] = decoded.apply(lambda d: d['bounds']['lat_max'])
print(df)
GeoJSON¶
Add Yoro codes to GeoJSON features¶
import json
import yoro
with open('parcels.geojson') as f:
geojson = json.load(f)
for feature in geojson['features']:
geom = feature['geometry']
if geom['type'] == 'Point':
lon, lat = geom['coordinates']
elif geom['type'] == 'Polygon':
# Use centroid of first ring
coords = geom['coordinates'][0]
lon = sum(c[0] for c in coords) / len(coords)
lat = sum(c[1] for c in coords) / len(coords)
else:
continue
code = yoro.encode(lat, lon, domain='CI')
feature['properties']['yoro_code'] = code
with open('parcels_with_yoro.geojson', 'w') as f:
json.dump(geojson, f, indent=2)
Django Bulk Assignment¶
For existing database records without Yoro codes:
from django.contrib.gis.geos import Point
from yoro.django.services import get_or_create_altius_code
from myapp.models import Producer
# Find all producers without a Yoro code
producers = Producer.objects.filter(
location__isnull=False,
altius_code__isnull=True,
)
updated = []
for producer in producers.iterator(chunk_size=500):
lat, lon = producer.location.y, producer.location.x
code_obj = get_or_create_altius_code(lat, lon, domain='CI')
producer.altius_code = code_obj
updated.append(producer)
if len(updated) >= 500:
Producer.objects.bulk_update(updated, ['altius_code'])
updated = []
if updated:
Producer.objects.bulk_update(updated, ['altius_code'])
Performance¶
Yoro encoding is pure math — no I/O, no network calls:
import time
import yoro
# Benchmark: 100,000 encodes
start = time.perf_counter()
for i in range(100_000):
yoro.encode(5.0 + i * 0.00005, -4.0 + i * 0.00003, domain='CI')
elapsed = time.perf_counter() - start
print(f"100,000 encodes in {elapsed:.2f}s ({100_000/elapsed:.0f} ops/sec)")
# Typical: ~200,000 ops/sec on modern hardware
No need for async or multiprocessing
At ~200K encodes/second, even a million-row dataset takes only 5 seconds. No parallelization needed for most use cases.