Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WJ-99] [WJ-323] Add code and HTML blocks in output #14

Merged
merged 16 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ keywords = ["wikidot", "wikijump", "ftml", "parsing", "html"]
categories = ["parser-implementations"]
exclude = [".gitignore", ".editorconfig"]

version = "1.23.0"
version = "1.24.0"
authors = ["Emmie Smith <[email protected]>"]
edition = "2021"

Expand Down
4 changes: 4 additions & 0 deletions misc/ast-test-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
}
}
],
"html-blocks": [
],
"code-blocks": [
],
"table-of-contents": [
],
"footnotes": [
Expand Down
14 changes: 10 additions & 4 deletions src/includes/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@

//! This module provides functions to parse strings into [`IncludeRef`]s

mod parser {
// Since pest generates some code that clippy doesn't like
#![allow(clippy::empty_docs)]

#[derive(Parser, Debug)]
#[grammar = "includes/grammar.pest"]
pub struct IncludeParser;
}

use self::parser::*;
use super::IncludeRef;
use crate::data::{PageRef, PageRefParseError};
use crate::settings::WikitextSettings;
Expand All @@ -28,10 +38,6 @@ use pest::Parser;
use std::borrow::Cow;
use std::collections::HashMap;

#[derive(Parser, Debug)]
#[grammar = "includes/grammar.pest"]
struct IncludeParser;

/// Parses a single include block in the text.
///
/// # Arguments
Expand Down
3 changes: 3 additions & 0 deletions src/parsing/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ pub enum ParseErrorKind {
/// Bibliography contains an element other than a definition list.
BibliographyContainsNonDefinitionList,

/// Code block has a name which is not unique.
CodeNonUniqueName,

/// There is no rule for the block name specified.
NoSuchBlock,

Expand Down
18 changes: 16 additions & 2 deletions src/parsing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ use crate::next_index::{NextIndex, TableOfContentsIndex};
use crate::settings::WikitextSettings;
use crate::tokenizer::Tokenization;
use crate::tree::{
AttributeMap, BibliographyList, Element, LinkLabel, LinkLocation, LinkType, ListItem,
ListType, SyntaxTree,
AttributeMap, BibliographyList, CodeBlock, Element, LinkLabel, LinkLocation,
LinkType, ListItem, ListType, SyntaxTree,
};
use std::borrow::Cow;

Expand All @@ -86,6 +86,8 @@ where
// Run parsing, get raw results
let UnstructuredParseResult {
result,
html_blocks,
code_blocks,
table_of_contents_depths,
footnotes,
has_footnote_block,
Expand Down Expand Up @@ -132,6 +134,7 @@ where
SyntaxTree::from_element_result(
elements,
errors,
(html_blocks, code_blocks),
table_of_contents,
footnotes,
bibliographies,
Expand All @@ -155,6 +158,7 @@ where
SyntaxTree::from_element_result(
elements,
errors,
(html_blocks, code_blocks),
table_of_contents,
footnotes,
bibliographies,
Expand All @@ -180,13 +184,17 @@ where
let result = gather_paragraphs(&mut parser, RULE_PAGE, NO_CLOSE_CONDITION);

// Build and return
let html_blocks = parser.remove_html_blocks();
let code_blocks = parser.remove_code_blocks();
let table_of_contents_depths = parser.remove_table_of_contents();
let footnotes = parser.remove_footnotes();
let has_footnote_block = parser.has_footnote_block();
let bibliographies = parser.remove_bibliographies();

UnstructuredParseResult {
result,
html_blocks,
code_blocks,
table_of_contents_depths,
footnotes,
has_footnote_block,
Expand Down Expand Up @@ -249,6 +257,12 @@ pub struct UnstructuredParseResult<'r, 't> {
/// The returned result from parsing.
pub result: ParseResult<'r, 't, Vec<Element<'t>>>,

/// The list of HTML blocks to emit from this page.
pub html_blocks: Vec<Cow<'t, str>>,

/// The list of code blocks to emit from this page.
pub code_blocks: Vec<CodeBlock<'t>>,

/// The "depths" list for table of content entries.
///
/// Each value is a zero-indexed depth of how
Expand Down
60 changes: 59 additions & 1 deletion src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ use super::RULE_PAGE;
use crate::data::PageInfo;
use crate::render::text::TextRender;
use crate::tokenizer::Tokenization;
use crate::tree::{AcceptsPartial, Bibliography, BibliographyList, HeadingLevel};
use crate::tree::{
AcceptsPartial, Bibliography, BibliographyList, CodeBlock, HeadingLevel,
};
use std::borrow::Cow;
use std::cell::RefCell;
use std::rc::Rc;
use std::{mem, ptr};
Expand Down Expand Up @@ -58,6 +61,12 @@ pub struct Parser<'r, 't> {
// here preserved across parser child instances.
table_of_contents: Rc<RefCell<Vec<(usize, String)>>>,

// HTML blocks with data to expose
html_blocks: Rc<RefCell<Vec<Cow<'t, str>>>>,

// Code blocks with data to expose
code_blocks: Rc<RefCell<Vec<CodeBlock<'t>>>>,

// Footnotes
//
// Schema: Vec<List of elements in a footnote>
Expand Down Expand Up @@ -102,6 +111,8 @@ impl<'r, 't> Parser<'r, 't> {
rule: RULE_PAGE,
depth: 0,
table_of_contents: make_shared_vec(),
html_blocks: make_shared_vec(),
code_blocks: make_shared_vec(),
footnotes: make_shared_vec(),
bibliographies: Rc::new(RefCell::new(BibliographyList::new())),
accepts_partial: AcceptsPartial::None,
Expand Down Expand Up @@ -221,6 +232,16 @@ impl<'r, 't> Parser<'r, 't> {
self.table_of_contents.borrow_mut().push((level, name));
}

#[cold]
pub fn remove_html_blocks(&mut self) -> Vec<Cow<'t, str>> {
mem::take(&mut self.html_blocks.borrow_mut())
}

#[cold]
pub fn remove_code_blocks(&mut self) -> Vec<CodeBlock<'t>> {
mem::take(&mut self.code_blocks.borrow_mut())
}

#[cold]
pub fn remove_table_of_contents(&mut self) -> Vec<(usize, String)> {
mem::take(&mut self.table_of_contents.borrow_mut())
Expand All @@ -236,6 +257,34 @@ impl<'r, 't> Parser<'r, 't> {
mem::take(&mut self.footnotes.borrow_mut())
}

// Blocks
pub fn push_html_block(&mut self, new_block: Cow<'t, str>) {
self.html_blocks.borrow_mut().push(new_block);
}

pub fn push_code_block(
&mut self,
new_block: CodeBlock<'t>,
) -> Result<(), NonUniqueNameError> {
// Check name (if specified) is unique
{
let guard = self.code_blocks.borrow();
if let Some(ref new_name) = new_block.name {
for block in &*guard {
if let Some(ref name) = block.name {
if name == new_name {
return Err(NonUniqueNameError);
}
}
}
}
}

// Add block
self.code_blocks.borrow_mut().push(new_block);
Ok(())
}

// Bibliography
pub fn push_bibliography(&mut self, bibliography: Bibliography<'t>) -> usize {
let mut guard = self.bibliographies.borrow_mut();
Expand All @@ -252,10 +301,16 @@ impl<'r, 't> Parser<'r, 't> {
// Special for [[include]], appending a SyntaxTree
pub fn append_shared_items(
&mut self,
html_blocks: &mut Vec<Cow<'t, str>>,
code_blocks: &mut Vec<CodeBlock<'t>>,
table_of_contents: &mut Vec<(usize, String)>,
footnotes: &mut Vec<Vec<Element<'t>>>,
bibliographies: &mut BibliographyList<'t>,
) {
self.html_blocks.borrow_mut().append(html_blocks);

self.code_blocks.borrow_mut().append(code_blocks);

self.table_of_contents
.borrow_mut()
.append(table_of_contents);
Expand Down Expand Up @@ -516,6 +571,9 @@ impl<'r, 't> Parser<'r, 't> {
}
}

#[derive(Debug)]
pub struct NonUniqueNameError;

#[inline]
fn make_shared_vec<T>() -> Rc<RefCell<Vec<T>>> {
Rc::new(RefCell::new(Vec::new()))
Expand Down
20 changes: 19 additions & 1 deletion src/parsing/rule/impls/block/blocks/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
*/

use super::prelude::*;
use crate::tree::CodeBlock;
use wikidot_normalize::normalize;

pub const BLOCK_CODE: BlockRule = BlockRule {
name: "block-code",
Expand All @@ -42,13 +44,29 @@ fn parse_fn<'r, 't>(
assert_block_name(&BLOCK_CODE, name);

let mut arguments = parser.get_head_map(&BLOCK_CODE, in_head)?;
let language = arguments.get("type");

let mut language = arguments.get("type");
if let Some(ref mut language) = language {
language.to_mut().make_ascii_lowercase();
}

let mut name = arguments.get("name");
if let Some(ref mut name) = name {
normalize(name.to_mut());
}

let code = parser.get_body_text(&BLOCK_CODE)?;
let element = Element::Code {
contents: cow!(code),
language,
};
let added_result = parser.push_code_block(CodeBlock {
contents: cow!(code),
name,
});
if added_result.is_err() {
return Err(parser.make_err(ParseErrorKind::CodeNonUniqueName));
}

ok!(element)
}
1 change: 1 addition & 0 deletions src/parsing/rule/impls/block/blocks/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ fn parse_fn<'r, 't>(
let element = Element::Html {
contents: cow!(html),
};
parser.push_html_block(cow!(html));

ok!(element)
}
8 changes: 8 additions & 0 deletions src/parsing/rule/impls/block/blocks/include_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use super::prelude::*;
use crate::data::PageRef;
use crate::parsing::UnstructuredParseResult;

// TODO: maybe scrap this? we want to move to components anyways

/// Block rule for include (elements).
///
/// This takes the resultant `SyntaxTree` from another page and
Expand Down Expand Up @@ -60,6 +62,8 @@ fn parse_fn<'r, 't>(
// Get page to be included
let UnstructuredParseResult {
result,
mut html_blocks,
mut code_blocks,
mut table_of_contents_depths,
mut footnotes,
has_footnote_block,
Expand All @@ -80,6 +84,8 @@ fn parse_fn<'r, 't>(

// Update parser state, build, and return
parser.append_shared_items(
&mut html_blocks,
&mut code_blocks,
&mut table_of_contents_depths,
&mut footnotes,
&mut bibliographies,
Expand Down Expand Up @@ -112,6 +118,8 @@ fn include_page<'r, 't>(
vec![],
false,
)),
html_blocks: vec![],
code_blocks: vec![],
table_of_contents_depths: vec![],
footnotes: vec![],
has_footnote_block: false,
Expand Down
4 changes: 2 additions & 2 deletions src/parsing/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
mod test;

mod lexer {
// Since pest makes enums automatically that clippy doesn't like
#![allow(clippy::upper_case_acronyms)]
// Since pest generates some code that clippy doesn't like
#![allow(clippy::upper_case_acronyms, clippy::empty_docs)]

// The actual parser definition, which we will re-export
#[derive(Parser, Debug)]
Expand Down
1 change: 1 addition & 0 deletions src/render/html/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ fn html() {
let result = SyntaxTree::from_element_result(
vec![],
vec![],
(vec![], vec![]),
vec![],
vec![],
BibliographyList::new(),
Expand Down
1 change: 1 addition & 0 deletions src/render/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ fn null() {
let result = SyntaxTree::from_element_result(
vec![],
vec![],
(vec![], vec![]),
vec![],
vec![],
BibliographyList::new(),
Expand Down
2 changes: 2 additions & 0 deletions src/test/prop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,8 @@ fn arb_tree() -> impl Strategy<Value = SyntaxTree<'static>> {
.prop_map(|(elements, table_of_contents, footnotes, wikitext_len)| {
SyntaxTree {
elements,
html_blocks: Vec::new(),
code_blocks: Vec::new(), // these two are derived fields
table_of_contents,
footnotes,
bibliographies: BibliographyList::new(), // not bothering right now
Expand Down
39 changes: 39 additions & 0 deletions src/tree/code.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* code.rs
*
* ftml - Library to parse Wikidot text
* Copyright (C) 2019-2024 Wikijump Team
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

//! Structure to represent a code block.

use super::clone::{option_string_to_owned, string_to_owned};
use std::borrow::Cow;

#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq)]
pub struct CodeBlock<'t> {
pub contents: Cow<'t, str>,
pub name: Option<Cow<'t, str>>,
}

impl<'t> CodeBlock<'t> {
pub fn to_owned(&self) -> CodeBlock<'static> {
CodeBlock {
contents: string_to_owned(&self.contents),
name: option_string_to_owned(&self.name),
}
}
}
Loading
Loading