require 'pry' require 'nokogiri' # This class goes through the generated default LaTeX HTML and performs # several optimisations on the HTML. Nokogiri is used to facilitate the # modifications. class InvalidWebsiteFormat < StandardError; end class ModifyBuild HOST = "https://www.the-sourdough-framework.com".freeze def self.build new.build end def build build_latex_html create_sitemap rescue InvalidWebsiteFormat => e raise e end private def create_sitemap content = "" list_of_files_to_modify.sort.each do |fn| # "static_website_html/Acknowledgements.html" # Only take the html part html_file_name = fn.split("/")[-1] content += "#{HOST}/#{html_file_name}\n" end File.open("#{build_dir}/sitemap.txt", 'w:UTF-8') { |file| file.write(content) } end def build_latex_html system("rm -rf #{build_dir}/") system("mkdir #{build_dir}/") copy_source_to_local_dir_for_modification copy_assets_into_folder list_of_files_to_modify.each do |filename| modify_file(filename) end end def modify_file(filename) orig_text = File.read(filename, encoding: "UTF-8") validate_file(orig_text) text = fix_double_slashes(orig_text) text = fix_navigation_bar(text) text = fix_titles(text) text = fix_menu(text) text = fix_cover_page(text) if is_cover_page?(filename) text = add_header_banner(text) text = add_home_link_to_menu(text) text = fix_anchor_hyperlinks_menu(text) text = add_favicon(text) text = add_meta_tags(text, filename) text = remove_section_table_of_contents(text) text = add_canonical_for_duplicates(text, extract_file_from_path(filename)) text = include_javascript(text) text = add_text_to_coverpage(text, extract_file_from_path(filename)) text = fix_js_dependency_link(text) text = fix_list_of_tables_figures_duplicates(text) text = add_anchors_to_headers(text) text = create_menu_groups(text) text = fix_top_links(text) text = fix_flowchart_background(text) text = remove_empty_menu_links(text) text = fix_bottom_cross_links(text) text = insert_mobile_header_graphic(text) text = fix_https_links(text) text = add_anchors_to_glossary_items(text) if is_glossary_page?(filename) text = mark_menu_as_selected_if_on_page(text, extract_file_from_path(filename)) text = fix_menus_list_figures_tables(text) if is_list_figures_tables?(filename) text = fix_list_of_figures_tables_display(text) if is_list_figures_tables?(filename) File.open(filename, "w:UTF-8") {|file| file.puts text } end def is_cover_page?(filename) ["book.html", "index.html"].any? do |name| filename.include?(name) end end def is_glossary_page?(filename) filename.include?("Glossary.html") end def is_list_figures_tables?(filename) ["listfigurename.html", "listtablename.html", "listoflocname.html", "bibname.html"].any? do |name| filename.include?(name) end end def list_of_files_to_modify Dir.glob("#{build_dir}/*.html") end def copy_source_to_local_dir_for_modification system("cd ../book/ && make website") unless source_website_output_exists? system("cp -R ../book/#{build_dir}/* #{build_dir}") end def copy_assets_into_folder system("cp -R ./assets/* #{build_dir}") end def source_website_output_exists? File.directory?("../book/#{build_dir}/") end def build_dir 'static_website_html' end def fix_double_slashes(text) text.gsub(/\/\//, "/") end # Sometimes for whatever reason the make4ht input produces files that are # improperly formatted. This validator will go through the files and do a # couple of basic checks to see if the files are in the format we expect. If # not an exception is caused. def validate_file(text) doc = build_doc(text) stylesheets = doc.css("link[rel='stylesheet']").map{|attr| attr["href"] } has_all_styles = %w(book.css style.css).all? { |required_stylesheet| stylesheets.include?(required_stylesheet) } raise InvalidWebsiteFormat.new("No style tag style.css found in the website") unless has_all_styles true end def fix_navigation_bar(text) doc = build_doc(text) elements = [doc.search('.chapterToc'), doc.search('.sectionToc'), doc.search('.subsectionToc')].flatten elements.each do |n| chapter_number_or_nothing = n.children[0].text.strip.gsub(/[[:space:]]/, '') hyperlink_node = n.children[1] next if hyperlink_node.nil? # remove unneeded text and merge into single a tag n.children[0].remove link_text = hyperlink_node.content # no chapter number if chapter_number_or_nothing == "" content = hyperlink_node.to_s else #binding.pry if link_text == "The process" link_node_content = %Q{ #{chapter_number_or_nothing} #{link_text} } hyperlink_node.inner_html = link_node_content content = hyperlink_node.to_s end n.inner_html = content end doc.to_html end def create_menu_groups(text) doc = build_doc(text) groups = build_groups(doc.css(".menu-items > span")) menu_el = doc.css(".menu-items")[0] html = "" groups.each do |group| out = "" group.each do |g| if g.to_html.length > 0 out += %Q{
#{g.to_html}
} end end html += %Q{} end menu_el.inner_html = html doc.to_html end def build_groups(menu_items) final_groups = [] tmp_groups = [] menu_items.each_with_index do |el, index| # Get next item and check if it is a lower entry level in the menu. next_item = menu_items[index + 1] if next_item && next_item["class"].include?("chapterToc") || next_item.nil? final_groups.push(tmp_groups.push(el)) tmp_groups = [] else tmp_groups.push(el) end end final_groups end # By default the titles look boring. This changes the titles of all the # pages and adds the book name as appendix def fix_titles(text) doc = build_doc(text) title_node = doc.css("title")[0] raise ArgumentError.new("No title found in HTML document") if title_node.nil? title_node.content = build_title(title_node.content) doc.to_html end # "3 Making a sourdough starter" # Should return Making a sourdough starter - The Sourdough Framework" def build_title(title) # No title, happens on index page in LaTeX build return title_appendix if title.length == 0 # Starts with number if title[0].to_i > 0 use_title = title.split(" ").drop(1).join(" ") else use_title = title end "#{use_title} - #{title_appendix}" end def title_appendix "The Sourdough Framework" end # By default the menu is not made for mobile devices. This adds mobile # capabilities to the menu def fix_menu(text) doc = build_doc(text) nav = doc.css("nav.TOC")[0] # page has no nav return text unless nav menu_items_html = doc.css("nav.TOC > *").to_html nav.add_class("menu") nav_content = %Q{ #{menu_mobile_nav} } nav.inner_html = nav_content doc.to_html end # By default the menu is not made for mobile devices. This adds mobile # capabilities to the menu def fix_menu(text) doc = build_doc(text) nav = doc.css("nav.TOC")[0] # page has no nav return text unless nav menu_items_html = doc.css("nav.TOC > *").to_html nav.add_class("menu") nav_content = %Q{ #{menu_mobile_nav} } nav.inner_html = nav_content doc.to_html end def menu_mobile_nav %Q{ } end # The cover page should have some additional content and allow the user to # click the book cover in order to start reading. def fix_cover_page(text) doc = build_doc(text) body = doc.css("body")[0] version = doc.css("body i")[0].text content = doc.css("body > .main-content")[0] menu = doc.css("body > nav")[0] content = %Q{

#{version}

} body.inner_html = "#{menu} #{content}" doc.to_html end # Users are lost and can't easily access the root page of the book. This # adds a home menu item. def add_home_link_to_menu(text) # Remove duplicate menu entries first before building clean menu doc = build_doc(remove_duplicate_entries_menu(text)) menu = doc.css(".menu-items")[0] return text if menu.nil? home_html = %Q{🍞 The Sourdough Framework} # Normally the flowcharts link should be automatically added, but there # seems to be a problem in the generation. See: # https://github.com/hendricius/the-sourdough-framework/pull/188 for more # details appendix_html = %Q{ List of Flowcharts List of Tables List of Figures Bibliography ⬇️ Book .PDF ⬇️ Book .EPUB ⬇️ Short TL;DR .PDF 📚 Hardcover Book ⚙️ Source code ⭐️ Support me } menu.inner_html = "#{home_html} #{menu.inner_html} #{appendix_html}" doc.to_html end # Adds a header banner to each page def add_header_banner(text) doc = build_doc(text) body = doc.css("body")[0] footnotes = doc.css(".footnotes")[0] main = doc.css(".main-content")[0] menu = doc.css(".menu")[0] if main.nil? || menu.nil? #raise ArgumentError.new("Don't know how to handle") return doc.to_html end body.inner_html = %Q{
#{build_header_html}
#{menu.to_html}
#{main.inner_html} #{footnotes ? footnotes.to_html : ''}
} return doc.to_html end def build_header_html %Q{
} end # Some of the menu links are added in the wrong order. Remove them since we # later on add them in the structure that we want. def remove_duplicate_entries_menu(text) doc = build_doc(text) remove = ["List of Tables", "List of Figures"] selected_elements = doc.css(".menu-items .chapterToc > a").select do |el| remove.include?(el.text) end selected_elements.each(&:remove) doc.to_html end # Some of the links in the menu have an anchor. This makes clicking through # the menu frustrating as the browser jumps a lot on each request. Only do # this for the top level menu entries though. def fix_anchor_hyperlinks_menu(text) doc = build_doc(text) top_level_menus = doc.css(".menu-items > span.chapterToc > a") top_level_menus.each do |el| link = el.attribute("href").value splitted = link.split("#") next if splitted.length == 1 el["href"] = splitted[0] end doc.to_html end def add_favicon(text) doc = build_doc(text) head = doc.css("head")[0] fav_html = %Q{} head.inner_html = "#{head.inner_html} #{fav_html}" doc.to_html end def add_meta_tags(text, filename) doc = build_doc(text) head = doc.css("head")[0] title = head.css("title")[0].text cleaned_filename = extract_file_from_path(filename) # Exception for index.html when we are on the root page use_filename = ["index.html"].include?(cleaned_filename) ? "" : cleaned_filename description = extract_description(text, filename) og_image = og_image_for_chapter(cleaned_filename) meta_html = %Q{ } head.inner_html = "#{head.inner_html} #{meta_html}" doc.to_html end # Takes a name like "static_website_html/book.html" and returns "book.html" def extract_file_from_path(filename) result = filename.split("/") return filename if result.length == 1 raise ArgumentError.new("The filename #{filename} is odd. Don't know how to handle it") if result.length > 2 result[1] end def extract_description(text, filename) doc = build_doc(text) el = doc.css(".main-content p:first-of-type")[0] custom = custom_titles_per_filename(clean_filename(filename)) return custom.strip if custom return "" if el.nil? el.text.strip end # static_website_html/Acknowledgements.html => "Acknowledgements.html" def clean_filename(filename) filename.split("/")[1] end def custom_titles_per_filename(filename) index_text = "The Sourdough Framework goes beyond just recipes and provides a solid knowledge foundation, covering the science of sourdough, the basics of bread making, and advanced techniques for achieving the perfect sourdough bread at home." data = { "book.html" => index_text, "index.html" => index_text } data[filename] end def remove_section_table_of_contents(text) doc = build_doc(text) el = doc.css(".sectionTOCS")[0] return text unless el el.remove doc.to_html end def og_image_for_chapter(chapter_name) open_graph_images_map[chapter_name] || open_graph_images_map["index.html"] end def open_graph_images_map { "Baking.html" => "og_image_baking.png", "Breadtypes.html" => "og_image_bread_types.png", "Flourtypes.html" => "og_image_flour_types.png", "index.html" => "og_image_general.png", "Howsourdoughworks.html" => "og_image_how_sourdough_works.png", "Makingasourdoughstarter.html" => "og_image_making_a_sourdough_starter.png", "Nonwheatsourdough.html" => "og_image_non_wheat_sourdough.png", "Sourdoughstartertypes.html" => "og_image_sourdough_starter_types.png", "Storingbread.html" => "og_image_storing_bread.png", "Thehistoryofsourdough.html" => "og_image_the_history_of_sourdough.png", "Wheatsourdough.html" => "og_image_wheat_sourdough.png", "Troubleshooting.html" => "og_image_troubleshooting.png", "Mixins.html" => "og_image_mixins.png", } end def mark_menu_as_selected_if_on_page(text, filename) doc = build_doc(text) selected = doc.css(".menu-items .chapterToc > a").find do |el| el["href"] == "" end # Special case for index page #if ["index.html", "book.html"].include?(filename) # doc.css(".menu-items .chapterToc.home-link")[0].add_class("selected") # return doc.to_html #end # Special case for the flowcharts page which is added by us to the menu. # This needs to be done for future manually added pages too if "listoflocname.html" == filename doc.css(".menu-items .chapterToc.flowcharts-menu")[0].ancestors(".menu-group")[0].add_class("selected") return doc.to_html end if "listtablename.html" == filename doc.css(".menu-items .chapterToc.listtables-menu")[0].ancestors(".menu-group")[0].add_class("selected") return doc.to_html end if "listfigurename.html" == filename doc.css(".menu-items .chapterToc.listfigures-menu")[0].ancestors(".menu-group")[0].add_class("selected") return doc.to_html end return doc.to_html unless selected # Fix that when the menu is selected the href is empty. This way users can # click the menu and the page will reload. selected["href"] = filename selected.ancestors(".menu-group")[0].add_class("selected") doc.to_html end def add_canonical_for_duplicates(text, filename) # Only applies to book.html which is a duplicate for index.html. The file # is still needed though for proper display. canonical_pages = ["book.html", "index.html"] return text unless canonical_pages.include?(filename) doc = build_doc(text) head = doc.css("head")[0] canonical_html = %Q{ } head.inner_html = "#{head.inner_html} #{canonical_html}" doc.to_html end def include_javascript(text) doc = build_doc(text) head = doc.css("head")[0] js_tag_html = %Q{ } head.inner_html = "#{head.inner_html} #{js_tag_html}" doc.to_html end def add_text_to_coverpage(text, filename) return text unless is_cover_page?(filename) doc = build_doc(text) content = doc.css(".main-content")[0] content.inner_html = "#{build_cover_page_content} #{content.inner_html}" doc.to_html end def add_anchors_to_glossary_items(text) doc = build_doc(text) content = doc.css("dt.description") content.each do |el| term = el.css("span")[0] item_name = term&.text # No anchor for whatever reason next unless item_name anchor = item_name.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') copy_link = %Q{} el.set_attribute("id", "term-#{anchor}") term.inner_html = "#{term.inner_html}#{copy_link}" end doc.to_html end def build_cover_page_content %Q{

🍞 The Sourdough Framework

The Sourdough Framework goes beyond just recipes and provides you a solid knowledge foundation, covering the science of sourdough, the basics of bread making, and advanced techniques for achieving the perfect sourdough bread at home.

Creating this book has been a labor of love. My main goal has always been to spread the joy of baking and empower bread enthusiasts like yourself. To ensure that the book remains accessible to everyone, I have decided to make it available free of charge.

One of my best Sourdough Breads

⭐ Support this project

Producing and maintaining resources like this requires considerable time, effort, and financial investment. If you find value in "The Sourdough Framework" and appreciate the effort that went into creating it, I kindly request your support through a donation or by considering the purchase of the hardcover version of this book.

Your generous contribution will not only help me cover the costs associated with this project but will also enable me to continue creating more valuable content in the future. Your donation is entirely voluntary and any amount you contribute is deeply appreciated. If you are unable to make a donation at this time, please know that your readership and support in spreading the word about "The Sourdough Framework" are invaluable contributions as well.

Thank you for being a part of this journey, and I hope that "The Sourdough Framework" enriches your bread-making adventures. Together, we can continue to share the love of baking and cultivate a community passionate about the art of sourdough.

⬇️ Versions

You can either browse through this website or download the full book to read it on your preferred device:

Thank you and may the gluten be strong with you,
Hendrik

} end # For some reason the list of figures and tables is displayed twice in the # menu. Fix this. def fix_list_of_tables_figures_duplicates(text) doc = build_doc(text) content = doc.css(".menu-items > .likechapterToc") content.each do |node| node.remove end doc.to_html end # The list of tables for some reason expands the menu on other pages? Fix # this. def fix_menus_list_figures_tables(text) doc = build_doc(text) content = doc.css(".menu-group .subsectionToc, .menu-group .sectionToc") content.each do |node| node.ancestors(".menu-entry")[0].remove end doc.to_html end # For some reason the links are not properly displayed and have odd color. # This repairs the html and css. def fix_list_of_figures_tables_display(text) doc = build_doc(text) content = doc.css(".main-content .TOC").remove_class("TOC") doc.to_html end # For some reason the dependency is missing a // in the url. def fix_js_dependency_link(text) text.gsub("https:/cdn.jsdelivr.net", "https://cdn.jsdelivr.net") end def build_doc(text) Nokogiri::HTML(text) end def add_anchors_to_headers(text) doc = build_doc(text) content = doc.css(".sectionHead, .subsectionHead") content.each do |el| anchor = el.attribute("id").value # No anchor for whatever reason next unless anchor copy_link = %Q{} el.inner_html = "#{el.inner_html}#{copy_link}" end doc.to_html end # For some reason some of the links are broken in the conversion process. # They have https:/www and are missing a slash. def fix_https_links(text) text.gsub(/https:\/(?!\/)/, 'https://') end def fix_top_links(text) doc = build_doc(text) el = doc.css(".crosslinks-top")[0] el.remove if el doc.to_html end def remove_empty_menu_links(text) doc = build_doc(text) menus = doc.css(".menu-group") menus.each do |m| element = m.css("span.chapterToc")[0] next unless element if element.inner_html == "" || element.inner_html == " " m.remove end end doc.to_html end def insert_mobile_header_graphic(text) doc = build_doc(text) content = doc.css(".TOC.menu")[0] content.after('
') doc.to_html end def fix_flowchart_background(text) doc = build_doc(text) images = doc.css("img") images.each do |img| src = img.attr("src") is_flowchart = src.include?(".svg") next unless is_flowchart img.parent.add_class("flowchart-image-wrapper") end doc.to_html end def fix_bottom_cross_links(text) doc = build_doc(text) link_cont = doc.css(".crosslinks-bottom")[0] return doc.to_html unless link_cont links = doc.css(".crosslinks-bottom a") prev_link = links.find {|l| l.inner_html == "prev" } next_link = links.find {|l| l.inner_html == "next" } prev_html = prev_link ? "" : '' next_html = next_link ? "" : '' link_cont.inner_html = %Q{ #{prev_html} #{next_html} } doc.to_html end end ModifyBuild.build