<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Juyoung Suk</title>
    <link>https://juyoung.site/blog/</link>
    <description>Research notes and technical writing on foundation models, evaluation, and training systems.</description>
    <language>en</language>
    <lastBuildDate>Tue, 05 May 2026 00:00:00 GMT</lastBuildDate>
    <atom:link href="https://juyoung.site/rss.xml" rel="self" type="application/rss+xml"/>
    <item>
      <title>Fused Linear Cross-Entropy</title>
      <link>https://juyoung.site/blog/fused-lce/</link>
      <guid isPermaLink="true">https://juyoung.site/blog/fused-lce/</guid>
      <pubDate>Tue, 05 May 2026 00:00:00 GMT</pubDate>
      <description>Why fusing the LM head projection with cross-entropy is the single biggest memory win for training LLMs at long context.</description>
      <category>training</category>
      <category>kernels</category>
      <category>memory</category>
    </item>
  </channel>
</rss>
