<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"><url><loc>https://ashraf-bhuiyan.com/</loc></url><url><loc>https://ashraf-bhuiyan.com/about/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/01-naive-inference/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/02-async-streaming/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/03-paged-attention/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/04-continuous-batching/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/05-async-scheduling/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/06-chunked-prefill/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/07-prefix-caching/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/08-speculative-decoding/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/09-tensor-parallelism/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/10-data-parallelism/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/11-expert-parallelism/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/12-kv-cpu-offloading/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/13-disaggregated-serving/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/14-quantization/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/15-full-architecture/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/embed-01-fundamentals/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/embed-02-pooling/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/embed-03-serving/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/embed-04-rerankers/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/embed-05-optimization/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/lora-01-fundamentals/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/lora-02-qlora/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/lora-03-serving/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/lora-04-multi-lora/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/lora-05-kernels/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/lora-06-production/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/mm-01-vlm-architecture/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/mm-02-serving-vlms/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/mm-03-multi-image-video/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/mm-04-audio-models/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/mm-05-internals/</loc></url><url><loc>https://ashraf-bhuiyan.com/blog/mm-06-optimization/</loc></url></urlset>