Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions crates/core/src/document/epub/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,52 @@ mod tests {
);
}

/// Reproduces https://github.com/baskerville/plato/issues/426:
/// EPUB files from royallib.com wrap block elements inside inline `<span>`
/// ancestors. The engine's `has_blocks` check only looks at direct children,
/// so the outer `<span>` appears inline-only, `gather_inline_material`
/// recurses into it, and the nested `<div>`/`<p>` bodies are silently
/// dropped — only the chapter title renders.
///
/// Spine index 39 is OPS/ch1-38.xhtml. Its body starts with:
/// <span><span><span id="id90">
/// <div class="title6"><p>"Коса" жизни</p></div>
/// <p>Георгий Гамов озаглавил …</p>
/// …
/// The first body paragraph is the canary: if block-in-inline
/// promotion is broken, gather_inline_material swallows the <p>
/// nodes and this text never appears in any DrawCommand.
#[test]
fn royallib_block_in_inline_renders_body_paragraphs() {
let root_dir = PathBuf::from(
std::env::var("TEST_ROOT_DIR").expect("TEST_ROOT_DIR must be set for epub tests"),
);
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let epub_path =
manifest_dir.join("src/document/tests/fixtures/royallib-block-in-inline.epub");

let mut doc = EpubDocumentFile::new(&epub_path).expect("failed to open royallib epub");
doc.engine.layout(600, 800, 12.0, 265);
doc.engine.set_margin_width(3);
doc.engine.load_fonts_from(root_dir);

let display_list = doc.build_display_list(39, 0);

let rendered_text: String = display_list
.iter()
.flat_map(|page| page.iter())
.filter_map(|cmd| match cmd {
DrawCommand::Text(tc) | DrawCommand::ExtraText(tc) => Some(tc.text.as_str()),
_ => None,
})
.collect();

assert!(
rendered_text.contains("Георгий") && rendered_text.contains("Гамов"),
"body paragraph text not rendered — block-in-inline content was silently dropped",
);
}

#[test]
fn all_spine_chapters_produce_content() {
let mut doc = setup_epub();
Expand Down
128 changes: 81 additions & 47 deletions crates/core/src/document/html/dom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,57 +19,61 @@ pub struct ElementData {
pub name: String,
pub qualified_name: Option<String>,
pub attributes: Attributes,
/// Set when an otherwise inline element contains block-level descendants
/// (invalid block-in-inline markup); it is then laid out as a block.
pub force_block: bool,
}

impl ElementData {
fn is_block(&self) -> bool {
matches!(
self.name.as_str(),
"address"
| "article"
| "aside"
| "blockquote"
| "body"
| "head"
| "details"
| "dialog"
| "dd"
| "div"
| "dl"
| "dt"
| "fieldset"
| "figcaption"
| "figure"
| "footer"
| "form"
| "h1"
| "h2"
| "h3"
| "h4"
| "h5"
| "h6"
| "header"
| "hgroup"
| "hr"
| "html"
| "li"
| "main"
| "nav"
| "ol"
| "p"
| "pre"
| "section"
| "table"
| "thead"
| "colgroup"
| "tbody"
| "tfoot"
| "tr"
| "caption"
| "td"
| "th"
| "ul"
)
self.force_block
|| matches!(
self.name.as_str(),
"address"
| "article"
| "aside"
| "blockquote"
| "body"
| "head"
| "details"
| "dialog"
| "dd"
| "div"
| "dl"
| "dt"
| "fieldset"
| "figcaption"
| "figure"
| "footer"
| "form"
| "h1"
| "h2"
| "h3"
| "h4"
| "h5"
| "h6"
| "header"
| "hgroup"
| "hr"
| "html"
| "li"
| "main"
| "nav"
| "ol"
| "p"
| "pre"
| "section"
| "table"
| "thead"
| "colgroup"
| "tbody"
| "tfoot"
| "tr"
| "caption"
| "td"
| "th"
| "ul"
)
}
}

Expand Down Expand Up @@ -106,6 +110,7 @@ pub fn element(name: &str, offset: usize, attributes: Attributes) -> NodeData {
name: name[colon.map(|index| index + 1).unwrap_or(0)..].to_string(),
qualified_name: colon.map(|_| name.to_string()),
attributes,
force_block: false,
})
}

Expand Down Expand Up @@ -305,7 +310,36 @@ impl XmlTree {
}
}

/// Promote inline elements that contain block-level descendants to blocks.
///
/// Such block-in-inline nesting is invalid HTML (e.g.
/// `<span><div>…</div></span>`, common in EPUB converter output) and would
/// otherwise be flattened into a single inline run by
/// `gather_inline_material`, silently dropping the block content. Must run
/// before `wrap_lost_inlines` so the promoted elements are not wrapped as
/// lost inlines.
fn promote_blockish_inlines(&mut self) {
let ids: Vec<NodeId> = self
.root()
.descendants()
.filter(|n| {
matches!(n.data(), NodeData::Element(..))
&& n.is_inline()
&& n.descendants().any(|d| d.is_block())
})
.map(|n| n.id)
.collect();

for id in ids {
if let NodeData::Element(e) = &mut self.node_mut(id).data {
e.force_block = true;
}
}
}

pub fn wrap_lost_inlines(&mut self) {
self.promote_blockish_inlines();

let mut ids = Vec::new();
let mut known_ids = FxHashSet::default();

Expand Down
Binary file not shown.
Loading