{"id":"1u02o8a873","title":"Senior Software Engineer, ML Infrastructure","posted_at":"2026-03-26T20:20:10.000Z","apply_url":"https://jobs.ashbyhq.com/decagon/63cd7634-f67f-4051-be45-515ed4f6a074/application","locations":["San Francisco, CA"],"employment_type":"full_time","workplace_type":"on_site","seniority_level":"senior","description_language":"en","source_name":"ashby","source_url":"https://jobs.ashbyhq.com/decagon/63cd7634-f67f-4051-be45-515ed4f6a074","salary":{"min":200000,"max":400000,"currency":"USD","period":"year","display":"$200,000–$400,000"},"job_summary":"Decagon is a conversational AI platform that enables enterprises to deploy AI agents for personalized customer experiences. The Senior Software Engineer will own the ML infrastructure, building distributed training systems and inference architectures to support the company's model lifecycle.","job_description":{"responsibilities":["Design and build distributed training platforms for LLM and multimodal fine-tuning at scale","Integrate state-of-the-art training algorithms into production pipelines","Own inference architecture and multi-provider routing, including failover and optimization","Lead initiatives to improve latency and cost efficiency across the training and serving stack","Build evaluation and experimentation infrastructure for rapid iteration","Drive technical direction, mentor engineers, and establish best practices for ML infrastructure"],"minimum_qualifications":["6+ years building ML infrastructure or production systems at scale","Deep experience with distributed training, including multi-node GPU clusters, fault tolerance, and optimization","Strong understanding of LLM inference, latency optimization, provider tradeoffs, and serving architecture","Proven track record leading complex, multi-quarter technical projects"],"preferred_qualifications":[]},"visa_sponsorship":null,"experience_years_min":6,"job_address":null,"job_city":"San Francisco","job_state":"CA","job_country":"US","location_lat":37.789482400000004,"location_lng":-122.3975976,"keywords":["distributed training","experimentation","infrastructure","relationships","architecture","Collaborate","distributed","fine-tuning","experiences","competitive","production","algorithms","multimodal","pipelines","ML models","scalable","Platform","customer","research","leading","latency","routing","design","Mentor","vision","deploy","agents","Square","teams","local","email","GPU","art","ML","AI"],"company":{"name":"Decagon","logo_url":"https://img.logo.dev/decagon.ai?token=pk_fWx5G5QrQMm-0Ud8BW3mBg&size=64&format=png","description":"Decagon provides a conversational AI platform that enables enterprises to deploy AI agents for customer support interactions across voice, chat, email, and SMS channels.","website_url":"https://decagon.ai","linkedin_url":"https://www.linkedin.com/company/decagon-ai","glassdoor_url":null,"x_url":"https://x.com/DecagonAI","instagram_url":null,"youtube_url":"https://www.youtube.com/channel/UCr4I4kQ253q-h91KOkJckQQ","github_url":null,"huggingface_url":null,"tiktok_url":null,"crunchbase_url":"https://www.crunchbase.com/organization/decagon-485e","facebook_url":null,"employee_count_range":"501-1000","employee_count":null,"founded_year":2023,"headquarters":{"address":"2261 Market Street, Suite 5378, San Francisco, CA 94114, United States","city":"San Francisco, CA","country":"US","lat":37.7879363,"lng":-122.4075201},"industry":"other","company_type":"startup","total_funding_usd":481000000,"locations":["Atlanta","London","London, UK","London, United Kingdom","New York, NY","San Francisco, CA","Toronto, Canada"]}}