<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width initial-scale=1" />
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="author" content="Sebastian Raschka">
<meta property="og:title" content="
Sebastian Raschka
">
<meta property="og:description" content="
I'm an LLM Research Engineer with over a decade of experience in artificial intelligence. My work bridges academia and industry, with roles including senior staff at an AI company and a statistics professor. My expertise lies in LLM research and the development of high-performance AI systems, with a deep focus on practical, code-driven implementations.
">
<meta property="og:url" content="https://sebastianraschka.com/" />
<meta property="og:site_name" content="Sebastian Raschka, PhD" />
<meta property="og:locale" content="en_US" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="" />
<meta property="og:image" content="https://sebastianraschka.com/images/logos/hero.jpg" />
<meta name="twitter:site" content="@rasbt" />
<meta name="twitter:creator" content="@rasbt" />
<meta name="twitter:title" content="Sebastian Raschka" />
<meta name="twitter:description" content="
I'm an LLM Research Engineer with over a decade of experience in artificial intelligence. My work bridges academia and industry, with roles including senior staff at an AI company and a statistics professor. My expertise lies in LLM research and the development of high-performance AI systems, with a deep focus on practical, code-driven implementations.
" />
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:image" content="https://sebastianraschka.com/images/logos/hero.jpg">
<title>Sebastian Raschka</title>
<meta name="description" content="I'm an LLM Research Engineer with over a decade of experience in artificial intelligence. My work bridges academia and industry, with roles including senior staff at an AI company and a statistics professor. My expertise lies in LLM research and the development of high-performance AI systems, with a deep focus on practical, code-driven implementations.
">
<link rel="stylesheet" href="/css/combined_direct_no_sass.css">
<link rel="stylesheet" href="/css/fork-awesome.min.css">
<meta property='og:title' content="Sebastian Raschka">
<meta property="og:type" content="article">
<meta property="og:url" content="https://sebastianraschka.com/">
<meta property="og:image" content="">
<meta property="og:description" content="">
<meta property="og:site_name" content="Sebastian Raschka, PhD">
<meta property="og:locale" content="en_US">
<meta property="fb:admins" content="">
<meta property="fb:app_id" content="">
<link rel="canonical" href="https://sebastianraschka.com/">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicons/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicons/favicon-16x16.png">
<link rel="manifest" href="/site.webmanifest">
<link rel="mask-icon" href="/images/favicons/safari-pinned-tab.svg" color="#5bbad5">
<meta name="msapplication-TileColor" content="#ffc40d">
<meta name="theme-color" content="#ffffff">
<body>
<img src="/images/logos/ahead-of-ai-icon.png" alt="Ahead of AI logo" style="display: none;">
<header class="site-header">
<div class="site-title" style="text-decoration: none; margin-top: 2em;">
<a href="/"><span style="color:black">Sebastian</span> <span style="color:#c5050c">Raschka</span> </a>
<a href="https://x.com/rasbt"><img src="/images/logos/twitter-bw.jpg" height="20" style="padding-left:20px;" alt="Twitter/X icon"></a>
<!--<a href="https://threads.net/@sebastianraschka"><img src="/images/logos/threads-logo-alt-small.png" height="20" style="padding-left:5px;" alt="Threads icon"></a>-->
<a href="https://www.linkedin.com/in/sebastianraschka/"><img src="/images/logos/linkedin-bw.jpg" height="20" style="padding-left:5px;" alt="LinkedIn Icon"></a>
<a href="https://github.com/rasbt"><img src="/images/logos/github-bw.jpg" height="20" style="padding-left:5px;" alt="GitHub icon"></a>
</div>
<!-- <div style="width:100%;height:50;float:left;margin-bottom:10px;">
<a href="https://twitter.com/rasbt"><img src="/images/logos/twitter-bw.jpg" height="20"></a>-->
<!-- <a href="https://www.buymeacoffee.com/rasbt"><img src="/images/logos/coffee-bw.jpg" height="20"></a>-->
<!-- <a href="https://mastodon.social/@SebRaschka"><img src="/images/logos/mastodon-bw.jpg" height="20"></a>-->
<!-- </div>-->
<div class="wrapper">
<nav class="site-nav">
<a href="#" class="menu-icon">
<svg viewBox="0 0 18 15">
<path fill="#424242" d="M18,1.484c0,0.82-0.665,1.484-1.484,1.484H1.484C0.665,2.969,0,2.304,0,1.484l0,0C0,0.665,0.665,0,1.484,0 h15.031C17.335,0,18,0.665,18,1.484L18,1.484z"/>
<path fill="#424242" d="M18,7.516C18,8.335,17.335,9,16.516,9H1.484C0.665,9,0,8.335,0,7.516l0,0c0-0.82,0.665-1.484,1.484-1.484 h15.031C17.335,6.031,18,6.696,18,7.516L18,7.516z"/>
<path fill="#424242" d="M18,13.516C18,14.335,17.335,15,16.516,15H1.484C0.665,15,0,14.335,0,13.516l0,0 c0-0.82,0.665-1.484,1.484-1.484h15.031C17.335,12.031,18,12.696,18,13.516L18,13.516z"/>
</svg>
</a>
<div class="trigger">
<!--<script type="text/javascript">
var total_images = 2;
var random_number = Math.floor((Math.random()*total_images));
var random_img = new Array();
random_img[0] = '<a href="https://twitter.com/rasbt"><img src="/images/logos/twitter-1.jpg" height="20"></a>';
random_img[1] = '<a href="https://linkedin.com/in/sebastianraschka"><img src="/images/logos/linkedin-1.jpg" height="20"></a>';
document.write(random_img[random_number]);
</script>-->
<span style="padding-left:0px;margin-left:0px;"></span>
<a class="page-link" href="https://magazine.sebastianraschka.com"><span style="color:#c5050c;"><img src="/images/logos/ahead-of-ai-icon.png" alt="Ahead of AI Logo" height="20"> Blog</span></a>
<!--<a class="page-link" href="/blog/index.html">Blog</a>-->
<a class="page-link" href="/books">Books</a>
<!--<a class="page-link" href="/newsletter">AI Newsletter</a>-->
<a class="page-link" href="/teaching">Courses</a>
<a class="page-link" href="https://github.com/rasbt/LLMs-from-scratch">LLMs From Scratch</a>
<!--<a class="page-link" href="/publications">Research</a>-->
<a class="page-link" href="/elsewhere">Talks</a>
<a class="page-link" href="/contact">Contact</a>
<a class="page-link" href="/resources">More</a>
</div>
</nav>
</div>
</header>
<div class="page-content">
<div class="wrapper">
<div style="display: flex; align-items: center; justify-content: center; flex-wrap: wrap; max-width: 800px; margin: 40px auto; gap: 30px;">
<div style="flex: 1; min-width: 300px; text-align: left;">
<h1 style="font-size: 1.8em; font-weight: 600; color: #222; margin-bottom: 20px;">
Hello, I'm Sebastian Raschka, PhD
</h1>
<p style="font-size: 1.1em; line-height: 1.8; color: #555; margin-bottom: 20px;">
I'm an LLM Research Engineer with over a decade of experience in artificial intelligence. My work bridges academia and industry, with roles including senior staff at an AI company and a statistics professor.
</p>
<p style="font-size: 1.1em; line-height: 1.8; color: #555; margin-bottom: 20px;">
My expertise lies in LLM research and the development of high-performance AI systems, with a deep focus on practical, code-driven implementations.
(For my most up-to-date CV details, please visit my <a href="https://www.linkedin.com/in/sebastianraschka">LinkedIn profile</a>.)
</p>
</div>
<div style="flex: 0 0 200px; text-align: center;">
<picture>
<source type="image/webp" srcset="/images/logos/photo-2021-08-25_compressed.webp">
<source type="image/jpeg" srcset="/images/logos/photo-2021-08-25_compressed.jpg">
<img src="/images/logos/photo-2021-08-25_compressed.jpg" alt="Sebastian Raschka Profile Picture" style="width: 200px; height: 200px; object-fit: cover; object-position: 100% 10%; border-radius: 50%; box-shadow: 0 4px 10px rgba(0,0,0,0.1);">
</picture>
</div>
</div>
<!-- Subscribe Section -->
<div class="rss" style="display: flex; align-items: center; justify-content: center; width: 96%; margin: 20px auto; max-width: 800px;">
<a href="/rss_feed.xml" style="display: inline-flex; align-items: center; margin-right: 20px;">
<i class="fa fa-rss fa-1x" style="padding-right: 5px;"></i>
<span>RSS Feed (Blog and Notes)</span>
</a>
<a class="page-link" href="https://magazine.sebastianraschka.com/subscribe" style="display: inline-flex; align-items: center;">
<img src="/images/logos/ahead-of-ai-icon.png" alt="Ahead of AI Logo" height="20" style="padding-right: 5px;">
<span>Subscribe via Email (AI Magazine)</span>
</a>
</div>
<div style="height: 50px;"></div>
<!-- Blog Entries Section -->
<div style="max-width: 800px; margin: 30px auto;">
<blockquote>
<!-- Content about Sebastian -->
</blockquote>
<div class="post">
<header class="post-header">
<h1 class="post-title">Recent Notes and Blog Entries</h1>
</header>
<div style="margin-bottom: 45px; text-align: center;">
See <a href="/blog/">Blog and Notes Archive</a> for all entries.
</div>
<article class="post-content posts-by-year">
<div class="post-entry">
<div class="post-hero-image">
<img src="/images/blog/2025/from-gpt-2-to-gpt-oss/hero.jpg" alt="From GPT-2 to gpt-oss: Analyzing the Architectural Advances">
</div>
<div class="post-details">
<h2 class="post-title">
<a href="/blog/2025/from-gpt-2-to-gpt-oss.html">From GPT-2 to gpt-oss: Analyzing the Architectural Advances</a>
</h2>
<p class="post-date">Aug 9, 2025</p>
<p class="post-description">OpenAI just released their new open-weight LLMs this week: gpt-oss-120b and gpt-oss-20b, their first open-weight models since GPT-2 in 2019. And yes, thanks to some clever optimizations, they can run locally. I spent the past few days reading through the code and technical reports to summarize the most interesting details.</p>
</div>
</div>
<div class="post-entry">
<div class="post-hero-image">
<img src="/images/blog/2025/the-big-llm-architecture-comparison/hero.jpg" alt="The Big LLM Architecture Comparison">
</div>
<div class="post-details">
<h2 class="post-title">
<a href="/blog/2025/the-big-llm-architecture-comparison.html">The Big LLM Architecture Comparison</a>
</h2>
<p class="post-date">Jul 19, 2025</p>
<p class="post-description">It has been seven years since the original GPT architecture was developed. At first glance, looking back at GPT-2 (2019) and forward to DeepSeek-V3 and Llama 4 (2024-2025), one might be surprised at how structurally similar these models still are. Comparing LLMs to determine the key ingredients that contribute to their good (or not-so-good) performance is notoriously challenging: datasets, training techniques, and hyperparameters vary widely and are often not well documented. However, I think that there is still a lot of value in examining the structural changes of the architectures themselves to see what LLM developers are up to in 2025.</p>
</div>
</div>
<div class="post-entry">
<div class="post-hero-image">
<img src="/images/blog/2025/llm-research-papers-the-2025-list-january-to-june/hero.jpeg" alt="LLM Research Papers: The 2025 List (January to June)">
</div>
<div class="post-details">
<h2 class="post-title">
<a href="/blog/2025/llm-research-papers-the-2025-list-january-to-june.html">LLM Research Papers: The 2025 List (January to June)</a>
</h2>
<p class="post-date">Jul 1, 2025</p>
<p class="post-description">The latest in LLM research with a hand-curated, topic-organized list of over 200 research papers from 2025.</p>
</div>
</div>
<div class="post-entry">
<div class="post-hero-image">
<img src="/images/blog/2025/coding-the-kv-cache-in-llms/hero.jpg" alt="Understanding and Coding the KV Cache in LLMs from Scratch">
</div>
<div class="post-details">
<h2 class="post-title">
<a href="/blog/2025/coding-the-kv-cache-in-llms.html">Understanding and Coding the KV Cache in LLMs from Scratch</a>
</h2>
<p class="post-date">Jun 17, 2025</p>
<p class="post-description">KV caches are one of the most critical techniques for efficient inference in LLMs in production. KV caches are an important component for compute-efficient LLM inference in production. This article explains how they work conceptually and in code with a from-scratch, human-readable implementation.</p>
</div>
</div>
<div class="post-entry">
<div class="post-hero-image">
<img src="/images/blog/2025/coding-llms-from-the-ground-up-a-complete-course/hero.jpg" alt="Coding LLMs from the Ground Up: A Complete Course">
</div>
<div class="post-details">
<h2 class="post-title">
<a href="/blog/2025/coding-llms-from-the-ground-up-a-complete-course.html">Coding LLMs from the Ground Up: A Complete Course</a>
</h2>
<p class="post-date">May 10, 2025</p>
<p class="post-description">Why build an LLM from scratch? It's probably the best and most efficient way to learn how LLMs really work. Plus, many readers have told me they had a lot of fun doing it.</p>
</div>
</div>
</article>
</div>
</div>
<!-- Styles -->
<style>
.post-entry {
display: flex;
align-items: flex-start;
margin-bottom: 20px;
border-bottom: 1px solid #ddd;
padding-bottom: 20px;
}
.post-hero-image {
flex: 0 0 120px;
margin-right: 20px;
}
.post-hero-image img {
width: 100%;
height: auto;
}
.post-details {
flex: 1;
}
.post-title {
margin: 0;
font-size: 1.5em;
color: #333;
}
.post-title a {
color: #333;
text-decoration: none;
}
.post-title a:hover {
text-decoration: underline;
color: #0056b3;
}
.post-date {
color: #666;
font-size: 0.9em;
margin: -4px 0;
}
.post-description {
margin: 0;
}
h1.post-title {
text-align: center;
}
h2.post-title, h2.post-title a {
all: unset;
display: block;
font-size: 1.0em;
color: #333;
text-decoration: none;
}
h2.post-title a:hover {
color: #0056b3;
text-decoration: underline;
}
a.anchorjs-link {
display: none !important;
}
</style>
</div>
</div>
<footer class="site-footer">
<div class="wrapper">
<div class="footer-col-wrapper">
<div class="footer-col social-col">
<a href="https://magazine.sebastianraschka.com"><span><i class="fa fa-rss fa-2x"></i></span> </a>
<a href="/contact"><span><i class="fa fa-envelope fa-2x"></i></span> </a>
<a href="https://twitter.com/rasbt"> <span><i class="fa fa-twitter fa-2x"></i></span> </a>
<a href="https://youtube.com/c/SebastianRaschka"><span><i class="fa fa-youtube fa-2x"></i></span> </a>
<a href="https://github.com/rasbt"><span><i class="fa fa-github fa-2x"></i> </span></a>
<a href="https://scholar.google.com/citations?user=X4RCC0IAAAAJ&hl=enrasbt"><span><i class="fa fa-google fa-2x"></i> </span></a>
<a href="https://linkedin.com/in/sebastianraschka"><span><i class="fa fa-linkedin fa-2x"></i> </span></a>
</div>
<div class="footer-col copyright-col">
<p>© 2013-2025 Sebastian Raschka</p>
</div>
</div>
</div>
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-BYQXBRPK81"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-BYQXBRPK81');
</script>
</footer>
</div>
<script src="/js/anchor.min.js" type="text/javascript"></script>
<script>
var selector = 'h2, h3, h4, h5, h6';
/*
anchors.options = {
icon: '#',
visible: 'always',
placement: 'left',
class: 'bb-anchor'
}
*/
anchors.add(selector);
</script>
</body>
</html>