X-Git-Url: https://git.stg.codes/stg.git/blobdiff_plain/0c9c28efcd43f53ac54aa60b2dfefa69c70dbadf..6b6d9b29e9e9e91f79507a8bf193fb30de311dcc:/doc/help/xslt/epub/bin/lib/docbook.rb diff --git a/doc/help/xslt/epub/bin/lib/docbook.rb b/doc/help/xslt/epub/bin/lib/docbook.rb new file mode 100755 index 00000000..14110d60 --- /dev/null +++ b/doc/help/xslt/epub/bin/lib/docbook.rb @@ -0,0 +1,227 @@ +require 'fileutils' +require 'rexml/parsers/pullparser' + +module DocBook + + class Epub + CHECKER = "epubcheck" + STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl")) + CALLOUT_PATH = File.join('images', 'callouts') + CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH)) + CALLOUT_LIMIT = 15 + CALLOUT_EXT = ".png" + XSLT_PROCESSOR = "xsltproc" + OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}" + MIMETYPE = "application/epub+zip" + META_DIR = "META-INF" + OEBPS_DIR = "OEBPS" + ZIPPER = "zip" + + attr_reader :output_dir + + def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[]) + @docbook_file = docbook_file + @output_dir = output_dir + @meta_dir = File.join(@output_dir, META_DIR) + @oebps_dir = File.join(@output_dir, OEBPS_DIR) + @css_file = css_file ? File.expand_path(css_file) : css_file + @embedded_fonts = embedded_fonts + @to_delete = [] + + if customization_layer + @stylesheet = File.expand_path(customization_layer) + else + @stylesheet = STYLESHEET + end + + unless File.exist?(@docbook_file) + raise ArgumentError.new("File #{@docbook_file} does not exist") + end + end + + def render_to_file(output_file, verbose=false) + render_to_epub(output_file, verbose) + bundle_epub(output_file, verbose) + cleanup_files(@to_delete) + end + + def self.invalid?(file) + # Obnoxiously, we can't just check for a non-zero output... + cmd = %Q(#{CHECKER} "#{file}") + output = `#{cmd} 2>&1` + + if $?.to_i == 0 + return false + else + STDERR.puts output if $DEBUG + return output + end + end + + private + def render_to_epub(output_file, verbose) + @collapsed_docbook_file = collapse_docbook() + + chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1') + callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/" + callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}" + callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}" + html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file + base = "--stringparam base.dir #{OEBPS_DIR}/" + unless @embedded_fonts.empty? + embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',') + font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\"" + end + meta = "--stringparam epub.metainf.dir #{META_DIR}/" + oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/" + options = [chunk_quietly, + callout_path, + callout_limit, + callout_ext, + base, + font, + meta, + oebps, + html_stylesheet, + ].join(" ") + # Double-quote stylesheet & file to help Windows cmd.exe + db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}") + STDERR.puts db2epub_cmd if $DEBUG + success = system(db2epub_cmd) + raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success + @to_delete << Dir["#{@meta_dir}/*"] + @to_delete << Dir["#{@oebps_dir}/*"] + end + + def bundle_epub(output_file, verbose) + + quiet = verbose ? "" : "-q" + mimetype_filename = write_mimetype() + meta = File.basename(@meta_dir) + oebps = File.basename(@oebps_dir) + images = copy_images() + csses = copy_csses() + fonts = copy_fonts() + callouts = copy_callouts() + # zip -X -r ../book.epub mimetype META-INF OEBPS + # Double-quote stylesheet & file to help Windows cmd.exe + zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}") + puts zip_cmd if $DEBUG + success = system(zip_cmd) + raise "Could not bundle into .epub file to #{output_file}" unless success + end + + # Input must be collapsed because REXML couldn't find figures in files that + # were XIncluded or added by ENTITY + # http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747 + def collapse_docbook + # Double-quote stylesheet & file to help Windows cmd.exe + collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)), + '.collapsed.' + File.basename(@docbook_file)) + entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}") + entity_success = system(entity_collapse_command) + raise "Could not collapse named entites in #{@docbook_file}" unless entity_success + + xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}") + xinclude_success = system(xinclude_collapse_command) + raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success + + @to_delete << collapsed_file + return collapsed_file + end + + def copy_callouts + new_callout_images = [] + if has_callouts? + calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}" + Dir.glob(calloutglob).each {|img| + img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img)) + + # TODO: What to rescue for these two? + FileUtils.mkdir_p(File.dirname(img_new_filename)) + FileUtils.cp(img, img_new_filename) + @to_delete << img_new_filename + new_callout_images << img + } + end + return new_callout_images + end + + def copy_fonts + new_fonts = [] + @embedded_fonts.each {|font_file| + font_new_filename = File.join(@oebps_dir, File.basename(font_file)) + FileUtils.cp(font_file, font_new_filename) + new_fonts << font_file + } + return new_fonts + end + + def copy_csses + if @css_file + css_new_filename = File.join(@oebps_dir, File.basename(@css_file)) + FileUtils.cp(@css_file, css_new_filename) + end + end + + def copy_images + image_references = get_image_refs() + new_images = [] + image_references.each {|img| + # TODO: It'd be cooler if we had a filetype lookup rather than just + # extension + if img =~ /\.(svg|png|gif|jpe?g|xml)/i + img_new_filename = File.join(@oebps_dir, img) + img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img) + + # TODO: What to rescue for these two? + FileUtils.mkdir_p(File.dirname(img_new_filename)) + puts(img_full + ": " + img_new_filename) if $DEBUG + FileUtils.cp(img_full, img_new_filename) + @to_delete << img_new_filename + new_images << img_full + end + } + return new_images + end + + def write_mimetype + mimetype_filename = File.join(@output_dir, "mimetype") + File.open(mimetype_filename, "w") {|f| f.print MIMETYPE} + @to_delete << mimetype_filename + return File.basename(mimetype_filename) + end + + def cleanup_files(file_list) + file_list.flatten.each {|f| + # Yikes + FileUtils.rm_r(f, :force => true ) + } + end + + # Returns an Array of all of the (image) @filerefs in a document + def get_image_refs + parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file)) + image_refs = [] + while parser.has_next? + el = parser.pull + if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic") + image_refs << el[1]['fileref'] + end + end + return image_refs.uniq + end + + # Returns true if the document has code callouts + def has_callouts? + parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file)) + while parser.has_next? + el = parser.pull + if el.start_element? and (el[0] == "calloutlist" or el[0] == "co") + return true + end + end + return false + end + end +end