Skip to content

Batch Geocoding

This guide shows how to encode large datasets of GPS coordinates to Yoro codes — useful for geocoding CSV files, database records, or GeoJSON features.

CSV Geocoding

Encode a CSV file

import csv
import yoro

with open('locations.csv') as fin, open('locations_with_yoro.csv', 'w', newline='') as fout:
    reader = csv.DictReader(fin)
    fieldnames = reader.fieldnames + ['yoro_code', 'yoro_precision', 'resolution_m']
    writer = csv.DictWriter(fout, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        lat = float(row['latitude'])
        lon = float(row['longitude'])
        code = yoro.encode(lat, lon, precision=12, domain='CI')
        decoded = yoro.decode(code)
        res = yoro.resolution(decoded['precision'], domain='CI')

        row['yoro_code'] = code
        row['yoro_precision'] = decoded['precision']
        row['resolution_m'] = round(res, 1)
        writer.writerow(row)

Input (locations.csv)

name,latitude,longitude
Abidjan Plateau,5.322,-4.017
Bouake Centre,7.694,-5.031
Yamoussoukro,6.827,-5.276
San Pedro Port,4.748,-6.636

Output (locations_with_yoro.csv)

name,latitude,longitude,yoro_code,yoro_precision,resolution_m
Abidjan Plateau,5.322,-4.017,CI-PW2MK,12,172.9
Bouake Centre,7.694,-5.031,CI-NX3WB,12,172.9
Yamoussoukro,6.827,-5.276,CI-NW64D,12,172.9
San Pedro Port,4.748,-6.636,CI-KYB8G,12,172.9

Pandas DataFrame

import pandas as pd
import yoro

df = pd.read_csv('locations.csv')

# Vectorized encoding
df['yoro_code'] = df.apply(
    lambda row: yoro.encode(row['latitude'], row['longitude'], domain='CI'),
    axis=1
)

# Add decoded info
decoded = df['yoro_code'].apply(yoro.decode)
df['yoro_lat'] = decoded.apply(lambda d: d['lat'])
df['yoro_lon'] = decoded.apply(lambda d: d['lon'])
df['cell_lat_min'] = decoded.apply(lambda d: d['bounds']['lat_min'])
df['cell_lat_max'] = decoded.apply(lambda d: d['bounds']['lat_max'])

print(df)

GeoJSON

Add Yoro codes to GeoJSON features

import json
import yoro

with open('parcels.geojson') as f:
    geojson = json.load(f)

for feature in geojson['features']:
    geom = feature['geometry']

    if geom['type'] == 'Point':
        lon, lat = geom['coordinates']
    elif geom['type'] == 'Polygon':
        # Use centroid of first ring
        coords = geom['coordinates'][0]
        lon = sum(c[0] for c in coords) / len(coords)
        lat = sum(c[1] for c in coords) / len(coords)
    else:
        continue

    code = yoro.encode(lat, lon, domain='CI')
    feature['properties']['yoro_code'] = code

with open('parcels_with_yoro.geojson', 'w') as f:
    json.dump(geojson, f, indent=2)

Django Bulk Assignment

For existing database records without Yoro codes:

from django.contrib.gis.geos import Point
from yoro.django.services import get_or_create_altius_code

from myapp.models import Producer

# Find all producers without a Yoro code
producers = Producer.objects.filter(
    location__isnull=False,
    altius_code__isnull=True,
)

updated = []
for producer in producers.iterator(chunk_size=500):
    lat, lon = producer.location.y, producer.location.x
    code_obj = get_or_create_altius_code(lat, lon, domain='CI')
    producer.altius_code = code_obj
    updated.append(producer)

    if len(updated) >= 500:
        Producer.objects.bulk_update(updated, ['altius_code'])
        updated = []

if updated:
    Producer.objects.bulk_update(updated, ['altius_code'])

Performance

Yoro encoding is pure math — no I/O, no network calls:

import time
import yoro

# Benchmark: 100,000 encodes
start = time.perf_counter()
for i in range(100_000):
    yoro.encode(5.0 + i * 0.00005, -4.0 + i * 0.00003, domain='CI')
elapsed = time.perf_counter() - start

print(f"100,000 encodes in {elapsed:.2f}s ({100_000/elapsed:.0f} ops/sec)")
# Typical: ~200,000 ops/sec on modern hardware

No need for async or multiprocessing

At ~200K encodes/second, even a million-row dataset takes only 5 seconds. No parallelization needed for most use cases.