diff --git a/backend/src/chitai/services/book.py b/backend/src/chitai/services/book.py
index 9fe0f52..d8cde00 100644
--- a/backend/src/chitai/services/book.py
+++ b/backend/src/chitai/services/book.py
@@ -44,11 +44,13 @@ from chitai.schemas.book import BooksCreateFromFiles
 from chitai.services.filesystem_library import BookPathGenerator
 from chitai.services.metadata_extractor import Extractor as MetadataExtractor
 from chitai.services.utils import (
+    calculate_koreader_hash,
     cleanup_empty_parent_directories,
     delete_file,
     move_dir_contents,
     move_file,
     save_image,
+    StreamingHasher,
 )
 
 
@@ -172,18 +174,18 @@ class BookService(SQLAlchemyAsyncRepositoryService[Book]):
             file_metadata = []
 
             for file in files:
-
                 stats = await aios.stat(file)
                 file_size = stats.st_size
                 content_type, _ = mimetypes.guess_type(file)
+                file_hash = await calculate_koreader_hash(file)
 
                 filename = path_gen.generate_filename(data, Path(file.name))
-                
+
                 file_metadata.append(
                     FileMetadata(
                         path=str(filename),
                         size=file_size,
-                        hash="stub-hash",  # TODO: implement file hashing to catch duplicates
+                        hash=file_hash,
                         content_type=content_type,
                     )
                 )
@@ -540,10 +542,13 @@ class BookService(SQLAlchemyAsyncRepositoryService[Book]):
             await file.seek(0)
             path = parent / filename
             path.parent.mkdir(parents=True, exist_ok=True)
+
+            hasher = StreamingHasher()
             async with aiofiles.open(path, "wb") as dest:
                 # Read spooled file and save it to the local filesystem
                 while chunk := await file.read(CHUNK_SIZE):
                     await dest.write(chunk)
+                    hasher.update(chunk)
 
             stats = await aios.stat(path)
             file_size = stats.st_size
@@ -552,7 +557,7 @@ class BookService(SQLAlchemyAsyncRepositoryService[Book]):
                 FileMetadata(
                     path=str(filename),
                     size=file_size,
-                    hash="stub-hash",  # TODO: implement file hashing to catch duplicates
+                    hash=hasher.hexdigest(),
                     content_type=file.content_type,
                 )
             )
diff --git a/backend/src/chitai/services/utils.py b/backend/src/chitai/services/utils.py
index d7486ec..6d714ee 100644
--- a/backend/src/chitai/services/utils.py
+++ b/backend/src/chitai/services/utils.py
@@ -1,9 +1,15 @@
 # src/chitai/services/utils.py
 
 # Standard library
+from __future__ import annotations
+
+import hashlib
 from pathlib import Path
 import shutil
-from typing import BinaryIO
+from typing import TYPE_CHECKING, BinaryIO
+
+if TYPE_CHECKING:
+    from hashlib import _Hash
 
 # Third-party libraries
 import PIL
@@ -12,6 +18,120 @@ import aiofiles
 import aiofiles.os as aios
 from litestar.datastructures import UploadFile
 
+
+##################################
+#  KOReader file hash utilities  #
+##################################
+
+# KOReader partial MD5 constants
+# These match KOReader's partial MD5 implementation for document identification
+# KOReader samples 1024 bytes at specific offsets calculated using 32-bit left shift.
+# The shift wrapping behavior (shift & 0x1F) causes i=-1 to produce offset 0.
+# Offsets: 0, 1024, 4096, 16384, 65536, 262144, 1048576, ...
+KO_STEP = 1024
+KO_SAMPLE_SIZE = 1024
+KO_INDICES = range(-1, 11)  # -1 to 10 inclusive
+
+
+def _lshift32(val: int, shift: int) -> int:
+    """
+    32-bit left shift matching LuaJIT's bit.lshift behavior.
+
+    LuaJIT masks the shift amount to 5 bits (0-31) and performs 32-bit arithmetic.
+    This causes negative shifts to wrap: shift=-2 becomes shift=30, and
+    1024 << 30 overflows 32 bits to produce 0.
+    """
+    val &= 0xFFFFFFFF
+    shift &= 0x1F
+    return (val << shift) & 0xFFFFFFFF
+
+
+def _get_koreader_offsets() -> list[int]:
+    """Get all KOReader sampling offsets."""
+    return [_lshift32(KO_STEP, 2 * i) for i in KO_INDICES]
+
+
+def _partial_md5_from_chunk(
+    chunk: bytes,
+    hasher: hashlib._Hash,
+    offsets: list[int],
+    chunk_start: int,
+) -> None:
+    """
+    Update partial MD5 hasher with sampled bytes from a chunk.
+
+    KOReader samples 1024 bytes at specific offsets rather than hashing
+    the entire file. This function checks if any sampling offsets fall
+    within the current chunk and updates the hasher with those bytes.
+
+    Args:
+        chunk: The current chunk of file data.
+        hasher: The MD5 hasher to update.
+        offsets: List of byte offsets to sample from the file.
+        chunk_start: The starting byte position of this chunk in the file.
+    """
+    chunk_len = len(chunk)
+    for offset in offsets:
+        if chunk_start <= offset < chunk_start + chunk_len:
+            start = offset - chunk_start
+            end = min(start + KO_SAMPLE_SIZE, chunk_len)
+            hasher.update(chunk[start:end])
+
+
+async def calculate_koreader_hash(file_path: Path) -> str:
+    """
+    Calculate KOReader-compatible partial MD5 hash for a file.
+
+    KOReader uses a partial MD5 algorithm that samples 1024 bytes at specific
+    offsets rather than hashing the entire file. This provides fast document
+    identification for large ebook files.
+
+    The offsets are calculated using 32-bit left shift: 1024 << (2*i) for i from -1 to 10.
+    Due to 32-bit overflow, i=-1 produces offset 0:
+    0, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, ...
+
+    Args:
+        file_path: Path to the file to hash.
+
+    Returns:
+        The hexadecimal MD5 hash string.
+    """
+    hasher = hashlib.md5()
+    offsets = _get_koreader_offsets()
+
+    file_pos = 0
+    chunk_size = 262144  # 256 KiB
+
+    async with aiofiles.open(file_path, "rb") as f:
+        while chunk := await f.read(chunk_size):
+            _partial_md5_from_chunk(chunk, hasher, offsets, file_pos)
+            file_pos += len(chunk)
+
+    return hasher.hexdigest()
+
+
+class StreamingHasher:
+    """
+    Helper class for calculating KOReader hash while streaming file data.
+
+    Allows hash calculation during file writes without needing to re-read
+    the file after writing.
+    """
+
+    def __init__(self) -> None:
+        self.hasher = hashlib.md5()
+        self.offsets = _get_koreader_offsets()
+        self.position = 0
+
+    def update(self, chunk: bytes) -> None:
+        """Update hash with a chunk of data."""
+        _partial_md5_from_chunk(chunk, self.hasher, self.offsets, self.position)
+        self.position += len(chunk)
+
+    def hexdigest(self) -> str:
+        """Return the final hash."""
+        return self.hasher.hexdigest()
+
 ##################################
 #  Filesystem related utilities  #
 ##################################