Files
the-sourdough-framework/website/modify_build.rb
2023-12-01 04:17:01 +01:00

586 lines
19 KiB
Ruby

require 'pry'
require 'nokogiri'
# This class goes through the generated default LaTeX HTML and performs
# several optimisations on the HTML. Nokogiri is used to facilitate the
# modifications.
class ModifyBuild
HOST = "https://www.the-sourdough-framework.com".freeze
def self.build
new.build
end
def build
build_latex_html
create_sitemap
end
private
def create_sitemap
content = ""
list_of_files_to_modify.sort.each do |fn|
# "static_website_html/Acknowledgements.html"
# Only take the html part
html_file_name = fn.split("/")[-1]
content += "#{HOST}/#{html_file_name}\n"
end
File.open("#{build_dir}/sitemap.txt", 'w:UTF-8') { |file| file.write(content) }
end
def build_latex_html
system("rm -rf #{build_dir}/")
system("mkdir #{build_dir}/")
copy_source_to_local_dir_for_modification
copy_assets_into_folder
list_of_files_to_modify.each do |filename|
modify_file(filename)
end
end
def modify_file(filename)
orig_text = File.read(filename, encoding: "UTF-8")
text = fix_double_slashes(orig_text)
text = fix_navigation_bar(text)
text = fix_titles(text)
text = fix_menu(text)
text = fix_cover_page(text) if is_cover_page?(filename)
text = add_home_link_to_menu(text)
text = fix_anchor_hyperlinks_menu(text)
text = add_favicon(text)
text = add_meta_tags(text, filename)
text = remove_section_table_of_contents(text)
text = mark_menu_as_selected_if_on_page(text, extract_file_from_path(filename))
text = add_canonical_for_duplicates(text, extract_file_from_path(filename))
text = include_javascript(text)
text = add_text_to_coverpage(text, extract_file_from_path(filename))
text = fix_js_dependency_link(text)
text = fix_list_of_tables_figures_duplicates(text)
text = add_anchors_to_headers(text)
text = fix_menus_list_figures_tables(text) if is_list_figures_tables?(filename)
text = fix_list_of_figures_tables_display(text) if is_list_figures_tables?(filename)
File.open(filename, "w:UTF-8") {|file| file.puts text }
end
def is_cover_page?(filename)
["book.html", "index.html"].any? do |name|
filename.include?(name)
end
end
def is_list_figures_tables?(filename)
["listfigurename.html", "listtablename.html", "listoflocname.html"].any? do |name|
filename.include?(name)
end
end
def list_of_files_to_modify
Dir.glob("#{build_dir}/*.html")
end
def copy_source_to_local_dir_for_modification
system("cd ../book/ && make website") unless source_website_output_exists?
system("cp -R ../book/#{build_dir}/* #{build_dir}")
end
def copy_assets_into_folder
system("cp -R ./assets/* #{build_dir}")
end
def source_website_output_exists?
File.directory?("../book/#{build_dir}/")
end
def build_dir
'static_website_html'
end
def fix_double_slashes(text)
text.gsub(/\/\//, "/")
end
def fix_navigation_bar(text)
doc = build_doc(text)
elements = [doc.search('.chapterToc'), doc.search('.sectionToc'), doc.search('.subsectionToc')].flatten
elements.each do |n|
chapter_number_or_nothing = n.children[0].text.strip.gsub(/[[:space:]]/, '')
hyperlink_node = n.children[1]
next if hyperlink_node.nil?
# remove unneeded text and merge into single a tag
n.children[0].remove
link_text = hyperlink_node.content
# no chapter number
if chapter_number_or_nothing == ""
content = hyperlink_node.to_s
else
#binding.pry if link_text == "The process"
link_node_content = %Q{
<span class="chapter_number">#{chapter_number_or_nothing}</span>
<span class="link_text">#{link_text}</span>
}
hyperlink_node.inner_html = link_node_content
content = hyperlink_node.to_s
end
n.inner_html = content
end
doc.to_html
end
# By default the titles look boring. This changes the titles of all the
# pages and adds the book name as appendix
def fix_titles(text)
doc = build_doc(text)
title_node = doc.css("title")[0]
raise ArgumentError.new("No title found in HTML document") if title_node.nil?
title_node.content = build_title(title_node.content)
doc.to_html
end
# "3 Making a sourdough starter"
# Should return Making a sourdough starter - The Sourdough Framework"
def build_title(title)
# No title, happens on index page in LaTeX build
return title_appendix if title.length == 0
# Starts with number
if title[0].to_i > 0
use_title = title.split(" ").drop(1).join(" ")
else
use_title = title
end
"#{use_title} - #{title_appendix}"
end
def title_appendix
"The Sourdough Framework"
end
# By default the menu is not made for mobile devices. This adds mobile
# capabilities to the menu
def fix_menu(text)
doc = build_doc(text)
nav = doc.css("nav.TOC")[0]
# page has no nav
return text unless nav
menu_items_html = doc.css("nav.TOC > *").to_html
nav.add_class("menu")
nav_content = %Q{
#{menu_mobile_nav}
<div class="menu-items">#{menu_items_html}</div>
}
nav.inner_html = nav_content
doc.to_html
end
# By default the menu is not made for mobile devices. This adds mobile
# capabilities to the menu
def fix_menu(text)
doc = build_doc(text)
nav = doc.css("nav.TOC")[0]
# page has no nav
return text unless nav
menu_items_html = doc.css("nav.TOC > *").to_html
nav.add_class("menu")
nav_content = %Q{
#{menu_mobile_nav}
<div class="menu-items">#{menu_items_html}</div>
}
nav.inner_html = nav_content
doc.to_html
end
def menu_mobile_nav
%Q{
<a href="/" class="logo">
🍞 The Sourdough Framework
</a>
<input type="checkbox" id="toggle-menu">
<label class="hamb toggle-menu-label" for="toggle-menu"><span class="hamb-line"></span></label>
}
end
# The cover page should have some additional content and allow the user to
# click the book cover in order to start reading.
def fix_cover_page(text)
doc = build_doc(text)
body = doc.css("body")[0]
version = doc.css("body i")[0].text
content = doc.css("body > .main-content")[0]
menu = doc.css("body > nav")[0]
content = %Q{
<main class="titlepage">
<a href="Thehistoryofsourdough.html">
<img src="cover-page.jpg" />
<div class="version"><p>#{version}</p></div>
</a>
</main>
}
body.inner_html = "#{menu} #{content}"
doc.to_html
end
# Users are lost and can't easily access the root page of the book. This
# adds a home menu item.
def add_home_link_to_menu(text)
doc = build_doc(text)
menu = doc.css(".menu-items")[0]
return text if menu.nil?
home_html = %Q{<span class="chapterToc home-link"><a href="/">Home</a></span>}
# Normally the flowcharts link should be automatically added, but there
# seems to be a problem in the generation. See:
# https://github.com/hendricius/the-sourdough-framework/pull/188 for more
# details
appendix_html = %Q{
<span class="chapterToc flowcharts-menu">
<a href="listoflocname.html">
<span class="link_text">List of Flowcharts</span>
</a>
</span>
<span class="chapterToc">
<a href="https://breadco.de/kofi">
<span class="chapter_number">⭐️</span>
<span class="link_text">Donate</span>
</a>
</span>
<span class="chapterToc">
<a href="https://breadco.de/hardcover-book">
<span class="chapter_number">📚</span>
<span class="link_text">Hardcover Book</span>
</a>
</span>
}
menu.inner_html = "#{home_html} #{menu.inner_html} #{appendix_html}"
doc.to_html
end
# Some of the links in the menu have an anchor. This makes clicking through
# the menu frustrating as the browser jumps a lot on each request. Only do
# this for the top level menu entries though.
def fix_anchor_hyperlinks_menu(text)
doc = build_doc(text)
top_level_menus = doc.css(".menu-items > span.chapterToc > a")
top_level_menus.each do |el|
link = el.attribute("href").value
splitted = link.split("#")
next if splitted.length == 1
el["href"] = splitted[0]
end
doc.to_html
end
def add_favicon(text)
doc = build_doc(text)
head = doc.css("head")[0]
fav_html = %Q{<link rel="shortcut icon" type="image/x-icon" href="favicon.ico" />}
head.inner_html = "#{head.inner_html} #{fav_html}"
doc.to_html
end
def add_meta_tags(text, filename)
doc = build_doc(text)
head = doc.css("head")[0]
title = head.css("title")[0].text
cleaned_filename = extract_file_from_path(filename)
# Exception for index.html when we are on the root page
use_filename = ["index.html"].include?(cleaned_filename) ? "" : cleaned_filename
description = extract_description(text, filename)
og_image = og_image_for_chapter(cleaned_filename)
meta_html = %Q{
<meta property="og:locale" content="en_US">
<meta property="og:site_name" content="The Sourdough Framework">
<meta property="og:title" content="#{title}">
<meta property="og:type" content="article">
<meta property="og:url" content="#{HOST}/#{cleaned_filename}">
<meta property="og:description" content="#{description}">
<meta property="description" content="#{description}">
<meta property="og:image" content="#{HOST}/#{og_image}" />
}
head.inner_html = "#{head.inner_html} #{meta_html}"
doc.to_html
end
# Takes a name like "static_website_html/book.html" and returns "book.html"
def extract_file_from_path(filename)
result = filename.split("/")
return filename if result.length == 1
raise ArgumentError.new("The filename #{filename} is odd. Don't know how to handle it") if result.length > 2
result[1]
end
def extract_description(text, filename)
doc = build_doc(text)
el = doc.css(".main-content p:first-of-type")[0]
custom = custom_titles_per_filename(clean_filename(filename))
return custom.strip if custom
return "" if el.nil?
el.text.strip
end
# static_website_html/Acknowledgements.html => "Acknowledgements.html"
def clean_filename(filename)
filename.split("/")[1]
end
def custom_titles_per_filename(filename)
index_text = "The Sourdough Framework goes beyond just recipes and provides a solid knowledge foundation, covering the science of sourdough, the basics of bread making, and advanced techniques for achieving the perfect sourdough bread at home."
data = {
"book.html" => index_text,
"index.html" => index_text
}
data[filename]
end
def remove_section_table_of_contents(text)
doc = build_doc(text)
el = doc.css(".sectionTOCS")[0]
return text unless el
el.remove
doc.to_html
end
def og_image_for_chapter(chapter_name)
open_graph_images_map[chapter_name] || open_graph_images_map["index.html"]
end
def open_graph_images_map
{
"Baking.html" => "og_image_baking.png",
"Breadtypes.html" => "og_image_bread_types.png",
"Flourtypes.html" => "og_image_flour_types.png",
"index.html" => "og_image_general.png",
"Howsourdoughworks.html" => "og_image_how_sourdough_works.png",
"Makingasourdoughstarter.html" => "og_image_making_a_sourdough_starter.png",
"Nonwheatsourdough.html" => "og_image_non_wheat_sourdough.png",
"Sourdoughstartertypes.html" => "og_image_sourdough_starter_types.png",
"Storingbread.html" => "og_image_storing_bread.png",
"Thehistoryofsourdough.html" => "og_image_the_history_of_sourdough.png",
"Wheatsourdough.html" => "og_image_troubleshooting.png",
}
end
def mark_menu_as_selected_if_on_page(text, filename)
doc = build_doc(text)
return doc.to_html
selected = doc.css(".menu-items .chapterToc > a").find do |el|
el["href"] == ""
end
# Special case for index page
#if ["index.html", "book.html"].include?(filename)
# doc.css(".menu-items .chapterToc.home-link")[0].add_class("selected")
# return doc.to_html
#end
# Special case for the flowcharts page which is added by us to the menu.
# This needs to be done for future manually added pages too
if "listoflocname.html" == filename
doc.css(".menu-items .chapterToc.flowcharts-menu")[0].add_class("selected")
return doc.to_html
end
return doc.to_html unless selected
# Fix that when the menu is selected the href is empty. This way users can
# click the menu and the page will reload.
selected["href"] = filename
selected.parent.add_class("selected")
doc.to_html
end
def add_canonical_for_duplicates(text, filename)
# Only applies to book.html which is a duplicate for index.html. The file
# is still needed though for proper display.
canonical_pages = ["book.html", "index.html"]
return text unless canonical_pages.include?(filename)
doc = build_doc(text)
head = doc.css("head")[0]
canonical_html = %Q{
<link rel="canonical" href="https://www.the-sourdough-framework.com" />
}
head.inner_html = "#{head.inner_html} #{canonical_html}"
doc.to_html
end
def include_javascript(text)
doc = build_doc(text)
head = doc.css("head")[0]
js_tag_html = %Q{
<script type="text/javascript" src="script.js"></script>
}
head.inner_html = "#{head.inner_html} #{js_tag_html}"
doc.to_html
end
def add_text_to_coverpage(text, filename)
return text unless is_cover_page?(filename)
doc = build_doc(text)
content = doc.css(".titlepage")[0]
raise ArgumentError.new(".titlepage not found in HTML") if content.nil?
content.add_class("main-content")
content.inner_html = "#{build_cover_page_content} #{content.inner_html}"
doc.to_html
end
def build_cover_page_content
%Q{
<h2 class="chapterHead home-title">
🍞 The Sourdough Framework
</h2>
<p class="noindent">
The Sourdough Framework goes beyond just recipes and provides you a solid
knowledge foundation, covering the science of sourdough, the basics of
bread making, and advanced techniques for achieving the perfect sourdough bread at home.
</p>
<div class="videoWrapper">
<iframe width="560" height="349" src="https://www.youtube-nocookie.com/embed/l0GwG74otX4?controls=0" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>
</div>
<p class="noindent">
Creating this book has been a labor of love. My
main goal has always been to spread the joy of baking and empower bread
enthusiasts like yourself. To ensure that the book remains accessible
to everyone, I have decided to make it available as a free digital download.
</p>
<img alt="One of my best Sourdough Breads" class="home-bread" src="bread.jpg" />
<p class="noindent">
However, producing and maintaining resources like this requires
considerable time, effort, and financial investment. If you find value
in "The Sourdough Framework" and appreciate the effort that went into
creating it, I kindly request your support <a href="https://breadco.de/book">
through a donation</a> or by
<a href="https://www.breadco.de/hardcover-book">considering the purchase of
the hardcover version of this book.</a>
</p>
<p class="noindent">
Your generous contribution will not only help me cover the costs associated
with this project but will also enable me to continue creating more valuable
content in the future.
</p>
<p class="noindent">
If you feel inspired to contribute, please consider making a donation of
any amount through <a href="https://breadco.de/book">my donation page</a>.
Your support will go a long way in ensuring
that this knowledge can reach even more bread enthusiasts worldwide.
</p>
<p class="noindent">
Remember, your donation is entirely voluntary and any amount you
contribute is deeply appreciated. If you are unable to make a donation at
this time, please know that your readership and support in spreading the
word about "The Sourdough Framework" are invaluable contributions as well.
</p>
<p class="noindent">
Thank you for being a part of this journey, and I hope that
"The Sourdough Framework" enriches your bread-making adventures.
Together, we can continue to share the love of baking and cultivate a
community passionate about the art of sourdough.
</p>
<p class="noindent">
You can either browse through this page or download the full book directly:
</p>
<p class="noindent">
PDF: <a href="https://www.the-bread-code.io/book.pdf">https://www.the-bread-code.io/book.pdf</a><br>
PDF (no serif): <a href="https://www.the-bread-code.io/book-sans-serif.pdf">https://www.the-bread-code.io/book-sans-serif.pdf</a>
</p>
<p class="noindent">
EPUB: <a href="https://www.the-bread-code.io/book.epub">https://www.the-bread-code.io/book.epub</a><br>
EPUB in Black & White, size optimized for screen readers : <a href="https://www.the-bread-code.io/bw-book.epub">https://www.the-bread-code.io/bw-book.epub</a><br>
</p>
<p class="noindent">
The full source code of the book can be found here:
<a href="https://www.github.com/hendricius/the-sourdough-framework">https://www.github.com/hendricius/the-sourdough-framework</a>
</p>
<p class="noindent">
There's also a hardcover version of the book available featuring an even more awesome design. You can read more information here:
<a href="https://www.breadco.de/hardcover-book">https://www.breadco.de/hardcover-book</a>
</p>
<p class="noindent">
Thank you and may the gluten be strong with you,<br>
Hendrik
</p>
}
end
# For some reason the list of figures and tables is displayed twice in the
# menu. Fix this.
def fix_list_of_tables_figures_duplicates(text)
doc = build_doc(text)
content = doc.css(".menu-items > .likechapterToc")
content.each do |node|
node.remove
end
doc.to_html
end
# The list of tables for some reason expands the menu on other pages? Fix
# this.
def fix_menus_list_figures_tables(text)
doc = build_doc(text)
content = doc.css(".menu-items > .subsectionToc, .menu-items > .sectionToc")
content.each do |node|
node.remove
end
doc.css(".menu-items > .lotToc").each(&:remove)
doc.css(".menu-items > .lofToc").each(&:remove)
doc.css(".menu-items > br").each(&:remove)
doc.to_html
end
# For some reason the links are not properly displayed and have odd color.
# This repairs the html and css.
def fix_list_of_figures_tables_display(text)
doc = build_doc(text)
content = doc.css(".main-content .TOC").remove_class("TOC")
doc.to_html
end
# For some reason the depdency is missing a // in the url.
def fix_js_dependency_link(text)
text.gsub("https:/cdn.jsdelivr.net", "https://cdn.jsdelivr.net")
end
def build_doc(text)
Nokogiri::HTML(text)
end
def add_anchors_to_headers(text)
doc = build_doc(text)
content = doc.css(".sectionHead, .subsectionHead")
content.each do |el|
anchor = el.attribute("id").value
# No anchor for whatever reason
next unless anchor
copy_link = %Q{<a href="##{anchor}" class="permalink">🔗</a>}
el.inner_html = "#{el.inner_html}#{copy_link}"
end
doc.to_html
end
end
ModifyBuild.build