" . \PHP_EOL
. $block['close'];
}
/**
* Sanitizes and cleans up code content by removing HTML artifacts.
*
* @param string $code_content The raw code content.
*
* @return string Cleaned code content.
*/
public function clean_code_content( string $code_content ): string {
// Clean up the code content.
$code_content = \trim( $code_content );
// Bail if the code content is empty.
if ( $code_content === '' ) {
return '';
}
// Remove HTML tags that might be part of the styling but not the code.
$code_content = \preg_replace( '/<\/p>/', \PHP_EOL, $code_content );
$code_content = \preg_replace( '/
]*>/', '', $code_content );
// Strip all span tags - first explicitly remove opening and closing tags that might be at beginning/end.
$code_content = \preg_replace( '/^<\/?span[^>]*>/', '', $code_content );
$code_content = \preg_replace( '/<\/?span[^>]*>$/', '', $code_content );
// Final cleaup - Balance the tags.
$code_content = \force_balance_tags( $code_content );
// Remove the markers for the code block.
$code_content = \str_replace( '', '', $code_content );
$code_content = \str_replace( '', '', $code_content );
return $code_content;
}
/**
* Extracts an image from a paragraph element.
*
* @param DOMElement $paragraph The paragraph element.
*
* @return string The image block if found, null otherwise.
*/
public function extract_image_from_paragraph( ?DOMElement $paragraph ): string {
if ( $paragraph === null ) {
return '';
}
// First check for direct img elements.
$imgs = $paragraph->getElementsByTagName( 'img' );
// Check for spans that contain images.
$spans = $paragraph->getElementsByTagName( 'span' );
if ( $spans->length === 0 ) {
return '';
}
foreach ( $spans as $span ) {
$imgs = $span->getElementsByTagName( 'img' );
if ( $imgs->length > 0 ) {
$img = $imgs->item( 0 );
// Get the original image source and alt.
$src = $img->getAttribute( 'src' );
$alt = $img->getAttribute( 'alt' );
// Check for style attributes that might indicate image dimensions.
$style = $img->getAttribute( 'style' );
$width = null;
$height = null;
// Parse dimensions from style attribute.
if ( \preg_match( '/width:\s*([0-9]+(?:\.[0-9]+)?)(px|%|em|rem|vw|vh)?/', $style, $width_matches ) ) {
$width = $width_matches[1];
}
if ( \preg_match( '/height:\s*([0-9]+(?:\.[0-9]+)?)(px|%|em|rem|vw|vh)?/', $style, $height_matches ) ) {
$height = $height_matches[1];
}
// Also check for width/height attributes.
if ( ! $width ) {
$width = $img->getAttribute( 'width' );
}
if ( ! $height ) {
$height = $img->getAttribute( 'height' );
}
// Build block attributes.
$json_attrs = [
'id' => 0,
'sizeSlug' => 'large',
];
if ( $width && \is_numeric( $width ) ) {
$json_attrs['width'] = \absint( $width );
}
if ( $height && \is_numeric( $height ) ) {
$json_attrs['height'] = \absint( $height );
}
$image_block = $this->create_block_comments( 'image', $json_attrs );
// Create the image block.
return $image_block['open'] . \PHP_EOL
. '' . \PHP_EOL
. '' . \PHP_EOL
. '' . \PHP_EOL
. $image_block['close'];
}
}
return '';
}
/**
* Creates a separator block.
*
* @return string The separator block.
*/
public function create_separator_block(): string {
$block = $this->create_block_comments( 'separator' );
return $block['open'] . \PHP_EOL . "\t
" . \PHP_EOL . $block['close'];
}
/**
* Creates a paragraph with a link.
*
* @param DOMElement|null $anchor The node element interpreted as an anchor.
*
* @return string The paragraph block with a link.
*/
public function create_paragraph_with_link( ?DOMElement $anchor ): string {
if ( $anchor === null ) {
return '';
}
$href = $anchor->getAttribute( 'href' );
$text = $anchor->textContent;
$block = $this->create_block_comments( 'paragraph' );
return $block['open'] . \PHP_EOL
. '
';
++$header_rows_processed;
// Remove the row from all_row_data to avoid duplication.
unset( $all_row_data[ $row_index ] );
// If we've processed all header rows, stop.
if ( $header_rows_processed >= $header_row_count ) {
break;
}
}
}
$block .= '';
}
// Add tbody with remaining rows.
$block .= '';
// If we have no body rows, add an empty row.
if ( empty( $all_row_data ) ) {
$block .= '
';
$block .= \str_repeat( '
', $max_columns );
$block .= '
';
}
else {
// Add all remaining rows to tbody (reset array keys first).
$all_row_data = \array_values( $all_row_data );
foreach ( $all_row_data as $row ) {
$block .= '
';
$block .= '';
return $block;
}
/**
* Creates a heading block from a heading element.
*
* @param DOMElement|null $heading The heading element.
* @param DOM_Processor|null $dom_processor The DOM processor instance.
*
* @return string The heading block.
*/
public function create_heading_block( ?DOMElement $heading, ?DOM_Processor $dom_processor ): string {
if ( $heading === null || $dom_processor === null ) {
return '';
}
$level = \substr( $heading->nodeName, 1, 1 );
// Process inline formatting elements instead of using plain text.
$content = $dom_processor->process_inline_elements( $heading );
// Check for text alignment in style attribute.
$style = $dom_processor->get_style_attribute( $heading, false );
$block_attrs = [ 'level' => \absint( $level ) ];
$alignment = null;
// Process alignment from inline style.
if ( \preg_match( '/text-align:\s*(center|right|left)/i', $style, $matches ) ) {
$alignment = $matches[1];
$block_attrs['align'] = $alignment;
// Remove the text-align property from the style attribute.
$style = \preg_replace( '/text-align:\s*(center|right|left)(;|$)/i', '', $style );
$style = \trim( $style, " \t\n\r\0\x0B;" );
}
// Get the heading id attribute.
$id_attr = $heading->getAttribute( 'id' );
$id_html = ! empty( $id_attr ) ? ' id="' . \wp_kses_data( $id_attr ) . '"' : '';
// Get the heading style attribute.
$style_attr = ! empty( $style ) ? ' style="' . \wp_kses_data( $style ) . '"' : '';
// Start with required wp-block-heading class.
$classes = 'wp-block-heading';
// Add alignment class if needed.
if ( $alignment ) {
$classes .= ' has-text-align-' . \esc_attr( $alignment );
}
// Get block comments.
$block = $this->create_block_comments( 'heading', $block_attrs );
return $block['open'] . \PHP_EOL
. "\t' . \wp_kses_post( $content ) . '' . \PHP_EOL
. $block['close'];
}
/**
* Creates a list block from a list element (ul or ol).
*
* @param DOMElement|null $list_element The list element.
* @param DOM_Processor|null $dom_processor The DOM processor instance.
*
* @return string The list block.
*/
public function create_list_block( ?DOMElement $list_element, ?DOM_Processor $dom_processor ): string {
if ( $list_element === null || empty( $list_element ) || $dom_processor === null ) {
return '';
}
// Determine list type.
$is_ordered = $list_element->nodeName === 'ol';
// Create block attributes.
$attrs = ( $is_ordered === true ) ? [ 'ordered' => true ] : [];
// Get block comments.
$block = $this->create_block_comments( 'list', $attrs );
// Start the list block.
$output = $block['open'] . \PHP_EOL;
$output .= '<' . $list_element->nodeName . ' class="wp-block-list">';
// Process all list items.
foreach ( $list_element->childNodes as $child ) {
if ( $child->nodeType !== \XML_ELEMENT_NODE || $child->nodeName !== 'li' ) {
continue;
}
// Process this list item.
$output .= $this->process_list_item( $child, $dom_processor );
}
// Close the list block.
$output .= ''>nodeName . '>' . \PHP_EOL;
$output .= $block['close'];
return $output;
}
/**
* Process a list item and its content, including any nested lists.
*
* @param DOMElement|null $list_item The list item element.
* @param DOM_Processor|null $dom_processor The DOM processor instance.
*
* @return string The processed list item HTML with Gutenberg comments.
*/
public function process_list_item( ?DOMElement $list_item, ?DOM_Processor $dom_processor ): string {
if ( $list_item === null || empty( $list_item ) || $dom_processor === null ) {
return '';
}
// Start the list item block.
$output = '' . \PHP_EOL;
$output .= '
';
// Extract nested lists.
$nested_lists = [];
$content = '';
// First pass: separate content and nested lists.
foreach ( $list_item->childNodes as $node ) {
// Capture nested lists.
if (
$node->nodeType === \XML_ELEMENT_NODE
&& ( $node->nodeName === 'ul' || $node->nodeName === 'ol' )
) {
$nested_lists[] = $node;
}
// Process content nodes (text or other elements).
elseif ( $node->nodeType === \XML_TEXT_NODE ) {
$text = \trim( $node->textContent );
if ( ! empty( $text ) ) {
$content .= $text;
}
}
elseif ( $node->nodeType === \XML_ELEMENT_NODE ) {
$content .= $dom_processor->get_node_html( $node );
}
}
// Add the content.
$output .= $content;
// Process nested lists.
foreach ( $nested_lists as $nested_list ) {
// Always use unordered list for nested lists.
$output .= '' . \PHP_EOL;
$output .= '
';
// Process nested list items.
foreach ( $nested_list->childNodes as $nested_child ) {
if ( $nested_child->nodeType !== \XML_ELEMENT_NODE || $nested_child->nodeName !== 'li' ) {
continue;
}
// Recursively process nested list items.
$output .= $this->process_list_item( $nested_child, $dom_processor );
}
$output .= '
' . \PHP_EOL;
$output .= '';
}
// Close the list item.
$output .= '
' . \PHP_EOL;
$output .= '';
return $output;
}
/**
* Creates a bookmark paragraph that can be shared via link.
*
* @param DOMElement|null $paragraph The paragraph element.
*
* @return string The bookmark paragraph block if found, null otherwise.
*/
public function create_bookmark_paragraph( ?DOMElement $paragraph ): string {
if ( $paragraph === null ) {
return '';
}
// Check if this matches our bookmark structure.
$spans = $paragraph->getElementsByTagName( 'span' );
$anchors = $paragraph->getElementsByTagName( 'a' );
// Must have exactly one span followed by an anchor with ID.
if ( $spans->length === 0 || $anchors->length === 0 ) {
return '';
}
$span = $spans->item( 0 );
$anchor = $anchors->item( 0 );
// The anchor should have an ID and be empty.
$anchor_id = $anchor->getAttribute( 'id' );
// Ensure ID is present and anchor has no text content.
// Prevent conflict with footnotes.
if ( $anchor_id === '' || \trim( $anchor->textContent ) !== '' ) {
return '';
}
// Get the span text content.
$span_text = \trim( $span->textContent );
if ( $span_text === '' ) {
return '';
}
// Create a paragraph block with a link and an ID for the bookmark.
$block = $this->create_block_comments( 'paragraph' );
return $block['open'] . \PHP_EOL
. '