Why Python Is the Engineer's Automation Language
Python dominates engineering automation for three reasons: it is the lingua franca of data science (pandas, numpy), it has mature libraries for every file format engineers use (PDF, Excel, CAD exports, JSON, XML), and it is fast enough for most batch processing tasks. More importantly, Python scripts are readable by non-programmers on your team — a well-written 50-line script replaces an hour of manual file manipulation and can be handed off without a tutorial.
File Processing with pathlib
pathlib.Path is the modern way to work with filesystem paths in Python (preferred over os.path which it replaces). It provides an object-oriented interface with intuitive operator overloading:
from pathlib import Path
import shutil
from datetime import datetime
# Common pathlib operations
base_dir = Path('/projects/engineering')
# Iteration — find all PDF files recursively
pdf_files = list(base_dir.rglob('*.pdf'))
print(f"Found {len(pdf_files)} PDFs")
# Path components
p = Path('/projects/engineering/reports/2026/report.pdf')
print(p.name) # 'report.pdf'
print(p.stem) # 'report'
print(p.suffix) # '.pdf'
print(p.parent) # Path('/projects/engineering/reports/2026')
# Create directories safely
output_dir = base_dir / 'processed' / datetime.now().strftime('%Y-%m-%d')
output_dir.mkdir(parents=True, exist_ok=True) # Equivalent to mkdir -p
# Move/copy files
for pdf in pdf_files:
if pdf.stat().st_size > 10 * 1024 * 1024: # Files > 10 MB
dest = output_dir / f"large_{pdf.name}"
shutil.copy2(pdf, dest) # copy2 preserves metadata
# Read and write text files
config_file = base_dir / 'config.json'
if config_file.exists():
import json
config = json.loads(config_file.read_text(encoding='utf-8'))
config['processed_date'] = datetime.now().isoformat()
config_file.write_text(json.dumps(config, indent=2), encoding='utf-8')
PDF Manipulation
Merging and Splitting PDFs with PyPDF2
from pypdf import PdfWriter, PdfReader # pypdf is the maintained fork of PyPDF2
from pathlib import Path
def merge_pdfs(input_paths: list[Path], output_path: Path) -> None:
"""Merge multiple PDFs into a single file."""
writer = PdfWriter()
for path in input_paths:
reader = PdfReader(str(path))
for page in reader.pages:
writer.add_page(page)
with open(output_path, 'wb') as f:
writer.write(f)
print(f"Merged {len(input_paths)} files into {output_path}")
def extract_pages(input_path: Path, page_numbers: list[int], output_path: Path) -> None:
"""Extract specific pages (0-indexed) from a PDF."""
reader = PdfReader(str(input_path))
writer = PdfWriter()
for page_num in page_numbers:
writer.add_page(reader.pages[page_num])
with open(output_path, 'wb') as f:
writer.write(f)
# Example: merge all monthly reports into annual report
reports_dir = Path('/projects/reports/2026')
monthly_reports = sorted(reports_dir.glob('report_*.pdf'))
merge_pdfs(monthly_reports, reports_dir / 'annual_report_2026.pdf')
Generating PDFs with reportlab
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
from reportlab.pdfgen import canvas
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors
def generate_equipment_report(equipment_data: list[dict], output_path: str) -> None:
"""Generate a formatted equipment inspection report PDF."""
doc = SimpleDocTemplate(output_path, pagesize=letter,
leftMargin=inch, rightMargin=inch,
topMargin=inch, bottomMargin=inch)
styles = getSampleStyleSheet()
story = []
# Title
story.append(Paragraph('Equipment Inspection Report', styles['Title']))
story.append(Paragraph(f'Generated: {datetime.now().strftime("%Y-%m-%d")}', styles['Normal']))
# Table
table_data = [['Equipment ID', 'Name', 'Status', 'Last Inspection']]
for item in equipment_data:
table_data.append([item['id'], item['name'], item['status'], item['last_inspection']])
table = Table(table_data, colWidths=[1.2*inch, 2.5*inch, 1.3*inch, 1.5*inch])
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#0ea5e9')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f0f9ff')]),
('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#e0e7ff')),
('PADDING', (0, 0), (-1, -1), 6),
]))
story.append(table)
doc.build(story)
Excel and CSV Processing
import pandas as pd
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment
# pandas: powerful data manipulation for CSV/Excel
def process_sensor_data(csv_path: str) -> pd.DataFrame:
"""Load, clean, and summarize sensor readings."""
df = pd.read_csv(csv_path, parse_dates=['timestamp'])
# Clean: remove rows with invalid sensor values
df = df[(df['temperature'] > -40) & (df['temperature'] < 200)]
df = df.dropna(subset=['sensor_id', 'temperature'])
# Aggregate: hourly averages per sensor
df['hour'] = df['timestamp'].dt.floor('h')
summary = df.groupby(['sensor_id', 'hour']).agg(
avg_temp=('temperature', 'mean'),
max_temp=('temperature', 'max'),
reading_count=('temperature', 'count')
).reset_index()
return summary
# Export to formatted Excel with openpyxl
def export_to_excel(df: pd.DataFrame, output_path: str) -> None:
df.to_excel(output_path, index=False, sheet_name='Sensor Summary')
wb = openpyxl.load_workbook(output_path)
ws = wb.active
# Style header row
header_fill = PatternFill(start_color='0EA5E9', end_color='0EA5E9', fill_type='solid')
for cell in ws[1]:
cell.font = Font(bold=True, color='FFFFFF')
cell.fill = header_fill
cell.alignment = Alignment(horizontal='center')
# Auto-fit column widths
for col in ws.columns:
max_len = max(len(str(cell.value or '')) for cell in col)
ws.column_dimensions[col[0].column_letter].width = max_len + 4
wb.save(output_path)
print(f"Report saved to {output_path}")
REST API Automation
import httpx
import asyncio
from typing import Any
# Synchronous requests for simple scripts
import requests
def fetch_all_pages(base_url: str, api_key: str) -> list[dict]:
"""Fetch all pages of a paginated API."""
headers = {'X-API-Key': api_key}
results = []
cursor = None
while True:
params = {'limit': 100}
if cursor:
params['cursor'] = cursor
response = requests.get(base_url, headers=headers, params=params, timeout=30)
response.raise_for_status()
data = response.json()
results.extend(data['items'])
cursor = data.get('next_cursor')
if not cursor:
break
return results
# Async httpx for concurrent API calls (much faster for bulk operations)
async def fetch_equipment_status(equipment_ids: list[str], api_base: str) -> list[dict]:
"""Fetch status for many equipment IDs concurrently."""
async with httpx.AsyncClient(timeout=10.0) as client:
tasks = [
client.get(f'{api_base}/equipment/{eq_id}/status')
for eq_id in equipment_ids
]
responses = await asyncio.gather(*tasks, return_exceptions=True)
results = []
for eq_id, response in zip(equipment_ids, responses):
if isinstance(response, Exception):
results.append({'id': eq_id, 'error': str(response)})
else:
results.append({'id': eq_id, **response.json()})
return results
# Usage
statuses = asyncio.run(fetch_equipment_status(equipment_ids, 'https://api.example.com'))
CLI Tools with argparse
#!/usr/bin/env python3
"""
merge-reports.py — Merge PDF inspection reports into a single annual report.
Usage: python merge-reports.py --input-dir /reports/2026 --output annual_2026.pdf --verbose
"""
import argparse
from pathlib import Path
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description='Merge PDF inspection reports into a single document.',
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument('--input-dir', type=Path, required=True,
help='Directory containing PDF reports')
parser.add_argument('--output', type=Path, required=True,
help='Output PDF file path')
parser.add_argument('--pattern', default='report_*.pdf',
help='Glob pattern for report files (default: report_*.pdf)')
parser.add_argument('--verbose', action='store_true',
help='Print each file as it is processed')
parser.add_argument('--dry-run', action='store_true',
help='List files that would be merged without merging')
return parser.parse_args()
def main() -> None:
args = parse_args()
if not args.input_dir.exists():
print(f"Error: {args.input_dir} does not exist")
raise SystemExit(1)
pdfs = sorted(args.input_dir.glob(args.pattern))
if not pdfs:
print(f"No PDFs matching '{args.pattern}' in {args.input_dir}")
raise SystemExit(1)
print(f"Found {len(pdfs)} PDFs to merge:")
for pdf in pdfs:
if args.verbose or args.dry_run:
print(f" {pdf}")
if args.dry_run:
print("Dry run — no files written.")
return
merge_pdfs(pdfs, args.output)
print(f"Done: {args.output} ({args.output.stat().st_size // 1024} KB)")
if __name__ == '__main__':
main()
Task Scheduling
import schedule
import time
import logging
from datetime import datetime
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
def generate_daily_report():
logging.info("Generating daily equipment report...")
try:
data = fetch_all_pages('https://api.example.com/equipment', api_key='...')
df = pd.DataFrame(data)
export_to_excel(df, f"reports/equipment_{datetime.now():%Y-%m-%d}.xlsx")
logging.info("Daily report generated successfully")
except Exception as e:
logging.error(f"Report generation failed: {e}")
def sync_data():
logging.info("Syncing data from external API...")
# ... sync logic
# Schedule jobs
schedule.every().day.at('06:00').do(generate_daily_report) # 6 AM daily
schedule.every(30).minutes.do(sync_data) # Every 30 minutes
schedule.every().monday.at('08:00').do(generate_weekly_summary)
print("Scheduler running. Press Ctrl+C to stop.")
while True:
schedule.run_pending()
time.sleep(60) # Check every minute