Importing data from CSV files is a common feature in web applications — bulk user imports, product catalogue uploads, configuration data. A production CSV import pipeline needs to do more than just parse rows: it must validate required columns exist, clean and coerce field values, skip or collect errors per row without aborting the entire import, and return a structured report showing what succeeded and what failed. Building this correctly — collecting all errors rather than stopping at the first — is the difference between an import feature that delights users and one that frustrates them.
Validating Column Structure
import csv
from io import StringIO
from typing import NamedTuple
REQUIRED_COLUMNS = {"name", "email", "role"}
OPTIONAL_COLUMNS = {"bio", "phone"}
ALL_COLUMNS = REQUIRED_COLUMNS | OPTIONAL_COLUMNS
def validate_csv_structure(text: str) -> tuple[list, list]:
"""Returns (fieldnames, errors). errors is empty if structure is valid."""
reader = csv.DictReader(StringIO(text))
errors = []
if reader.fieldnames is None:
return [], ["CSV file is empty or has no header row"]
actual = {f.strip().lower() for f in reader.fieldnames}
missing = REQUIRED_COLUMNS - actual
unknown = actual - ALL_COLUMNS
if missing:
errors.append(f"Missing required columns: {', '.join(sorted(missing))}")
if unknown:
errors.append(f"Unknown columns (will be ignored): {', '.join(sorted(unknown))}")
return list(reader.fieldnames), errors
csv.DictReader uses the first row as column names — validate that the required column names are present before looping over rows. Return a clear error immediately if the structure is wrong rather than processing thousands of rows with wrong mappings.[{"row": 5, "field": "email", "error": "Invalid email"}, {"row": 12, "field": "role", "error": "Unknown role"}]. The user can fix everything at once and re-import successfully.Row-Level Validation and Transformation
from pydantic import BaseModel, EmailStr, field_validator
from typing import List
ALLOWED_ROLES = {"user", "editor", "admin"}
class UserImportRow(BaseModel):
name: str
email: str
role: str = "user"
bio: str = ""
@field_validator("name")
@classmethod
def validate_name(cls, v: str) -> str:
v = v.strip()
if len(v) < 2:
raise ValueError("Name must be at least 2 characters")
if len(v) > 100:
raise ValueError("Name must be at most 100 characters")
return v
@field_validator("email")
@classmethod
def validate_email(cls, v: str) -> str:
v = v.strip().lower()
if "@" not in v or "." not in v.split("@")[-1]:
raise ValueError("Invalid email address")
return v
@field_validator("role")
@classmethod
def validate_role(cls, v: str) -> str:
v = v.strip().lower()
if v not in ALLOWED_ROLES:
raise ValueError(f"Role must be one of: {', '.join(sorted(ALLOWED_ROLES))}")
return v
class ImportReport(BaseModel):
total_rows: int
imported: int
skipped: int
errors: List[dict]
def process_csv_import(text: str) -> ImportReport:
"""Parse, validate and process all rows, collecting per-row errors."""
# Validate structure first
fieldnames, structure_errors = validate_csv_structure(text)
if structure_errors:
return ImportReport(
total_rows=0, imported=0, skipped=0,
errors=[{"row": "header", "error": e} for e in structure_errors]
)
reader = csv.DictReader(StringIO(text))
imported = 0
skipped = 0
errors = []
for row_num, raw_row in enumerate(reader, start=2):
try:
# Clean and validate with Pydantic
row = UserImportRow.model_validate({
k.strip().lower(): v for k, v in raw_row.items()
})
# TODO: insert into database
imported += 1
except Exception as e:
skipped += 1
# Extract field-level errors from Pydantic ValidationError
error_detail = str(e)
errors.append({
"row": row_num,
"data": dict(raw_row),
"error": error_detail,
})
return ImportReport(
total_rows=imported + skipped,
imported=imported,
skipped=skipped,
errors=errors,
)
Exporting Data to CSV
import csv
from io import StringIO
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
app = FastAPI()
def generate_csv(rows: list[dict], columns: list[str]) -> str:
"""Convert a list of dicts to a CSV string."""
output = StringIO()
writer = csv.DictWriter(
output,
fieldnames=columns,
extrasaction="ignore", # ignore keys not in fieldnames
)
writer.writeheader()
writer.writerows(rows)
return output.getvalue()
@app.get("/users/export")
async def export_users(db = Depends(get_db)):
users = db.query(User).all()
rows = [
{
"name": u.name,
"email": u.email,
"role": u.role,
"created_at": u.created_at.strftime("%Y-%m-%d"),
}
for u in users
]
csv_content = generate_csv(rows, ["name", "email", "role", "created_at"])
return StreamingResponse(
iter([csv_content]),
media_type="text/csv",
headers={"Content-Disposition": 'attachment; filename="users.csv"'},
)
Common Mistakes
Mistake 1 — Stopping at the first error instead of collecting all errors
❌ Wrong — returns after first bad row:
for row in reader:
validated = validate(row)
if validated.errors:
return {"error": validated.errors[0]} # stops at row 1!
✅ Correct — collect all errors, report at the end:
errors = []
for i, row in enumerate(reader, start=2):
try:
process(row)
except Exception as e:
errors.append({"row": i, "error": str(e)})
# report all errors after processing all rows ✓
Mistake 2 — Not stripping whitespace from CSV values
❌ Wrong — ” admin ” fails role validation:
role = row["role"] # " admin " — whitespace not stripped
if role not in ALLOWED_ROLES: # " admin " ∉ {"user", "admin"} — False!
✅ Correct — strip all values on ingestion:
role = row["role"].strip().lower() # "admin" ✓
Mistake 3 — Using csv.reader instead of csv.DictReader for columnar data
❌ Wrong — positional access breaks when column order changes:
reader = csv.reader(f)
for row in reader:
name = row[0] # breaks if CSV column order changes!
✅ Correct — use DictReader for named column access:
reader = csv.DictReader(f)
for row in reader:
name = row["name"] # ✓ robust to column reordering
Quick Reference
| Task | Code |
|---|---|
| Read CSV to dicts | csv.DictReader(StringIO(text)) |
| Get header names | reader.fieldnames |
| Write CSV to string | csv.DictWriter(StringIO(), fieldnames=[...]) |
| Stream CSV download | StreamingResponse(iter([csv_str]), media_type="text/csv") |
| Validate with Pydantic | Model.model_validate(row_dict) |
| Collect all errors | Append to errors list, don’t raise until end |
| Content-Disposition header | 'attachment; filename="export.csv"' |