Also avoids compiling too many PDF at the same time (configurable) Also skips useless build tasks (if another build task is already pending to start)
242 lines
8.9 KiB
Python
242 lines
8.9 KiB
Python
import os
|
|
import uuid
|
|
from flask import current_app
|
|
from web_utils.task import ProcessTask
|
|
from web_utils.business_exception import BusinessException
|
|
import shutil
|
|
from unicodedata import normalize
|
|
from urllib.parse import quote as url_encode
|
|
from urllib.parse import unquote as url_decode
|
|
import string
|
|
import json
|
|
|
|
def os_path_separators():
|
|
seps = ['/','\\']
|
|
for sep in os.path.sep, os.path.altsep:
|
|
if sep:
|
|
seps.append(sep)
|
|
return seps
|
|
|
|
def sanitize_name(initial_name, slashReplacement = '_', removeUnknownCharacters = True):
|
|
# Sort out unicode characters
|
|
name = normalize('NFKD', initial_name).encode('ascii', 'ignore').decode('ascii')
|
|
|
|
# Replace path separators
|
|
for sep in os_path_separators():
|
|
name = name.replace(sep, slashReplacement)
|
|
|
|
# Ensure only valid characters
|
|
if removeUnknownCharacters:
|
|
valid_chars = "-_.{0}{1}{2}".format(string.ascii_letters, string.digits, slashReplacement)
|
|
name = "".join(ch for ch in name if ch in valid_chars)
|
|
|
|
if len(name) == 0 or '..' in name:
|
|
raise BusinessException("Invalid name: " + initial_name)
|
|
|
|
return name
|
|
|
|
class Document:
|
|
def __init__(self, origin, doc_name, branch = 'master', allow_invalid = False):
|
|
self.origin = Document.decode_origin(origin) if '!' in origin else origin
|
|
self.encoded_origin = Document.encode_origin(self.origin)
|
|
self.doc_name = doc_name
|
|
self.branch = branch
|
|
|
|
doc_path = Document.make_doc_path(self.origin, doc_name, branch)
|
|
print(doc_path)
|
|
if not os.path.isdir(doc_path + "/repo/.git"):
|
|
if allow_invalid:
|
|
self.valid = False
|
|
return
|
|
else:
|
|
raise BusinessException("This document does not exist: "+self.origin+'/'+doc_name+"@"+branch)
|
|
|
|
self.doc_path = doc_path
|
|
self.valid = True
|
|
|
|
# Init default values
|
|
self.settings = {
|
|
'multiversion': False,
|
|
'default_version': '', # Only used if multiversion is True
|
|
'build_pdf': False, # for multiversion, this can be set to an array of strings indicating each branch or tag for which we want to build the PDF
|
|
}
|
|
|
|
# Read settings.json (if it exists)
|
|
json_settings_filename = self.doc_path + '/settings.json'
|
|
if os.path.exists(json_settings_filename):
|
|
with open(json_settings_filename) as f:
|
|
json_settings = json.load(f)
|
|
for key in self.settings:
|
|
if key in json_settings:
|
|
self.settings[key] = json_settings[key]
|
|
|
|
def build(self):
|
|
multiversion_build = self.settings['multiversion']
|
|
|
|
cmd = []
|
|
|
|
is_intensive_task = False
|
|
|
|
# update source files from git
|
|
cmd.append(['git', 'pull'])
|
|
|
|
if multiversion_build:
|
|
# also fetch all branches and tags, so that sphinx-multiversion knows what versions exist and can pull them
|
|
cmd.append(['git', 'fetch', '--all'])
|
|
cmd.append(['bash', '-c', 'for BRANCH in $(git branch -a | grep remotes | grep -v HEAD | grep -v master); do git branch --track "${BRANCH#remotes/origin/}" "${BRANCH}" || git branch -f "${BRANCH#remotes/origin/}" -t "${BRANCH}"; done'])
|
|
|
|
# build the HTML version
|
|
cmd.append(['make', 'html_versions', 'BUILDDIR=../build'])
|
|
|
|
if type(self.settings['build_pdf']) is list:
|
|
for pdf_branch_name in self.settings['build_pdf']:
|
|
is_intensive_task = True
|
|
|
|
# Extract the source files to a temporary directory
|
|
cmd.append(['rm', '-rf', self.doc_path + '/tmp_source'])
|
|
cmd.append(['mkdir', self.doc_path + '/tmp_source'])
|
|
cmd.append(['bash', '-c', 'git archive "'+pdf_branch_name+'" | tar -x -C "' + self.doc_path + '/tmp_source"'])
|
|
|
|
# Build the PDF
|
|
cmd.append(['bash', '-c', 'cd "' + self.doc_path + '/tmp_source" && make pdf'])
|
|
|
|
# Copy the generated PDF file to the HTML directory, so that it is accessible for download by users
|
|
cmd.append(['cp', self.doc_path + '/tmp_source/build/weasyprint/vheliotech.pdf', self.doc_path + '/build/html_versions/' + pdf_branch_name + '/' + self.doc_name + '.pdf'])
|
|
|
|
# Clean up
|
|
cmd.append(['rm', '-rf', self.doc_path + '/tmp_source'])
|
|
|
|
else:
|
|
# build the HTML version
|
|
cmd.append(['make', 'html', 'BUILDDIR=../build'])
|
|
|
|
if self.settings['build_pdf']:
|
|
is_intensive_task = True
|
|
|
|
# build the PDF version
|
|
cmd.append(['make', 'pdf', 'BUILDDIR=../build'])
|
|
|
|
# Copy the generated PDF file to the HTML directory, so that it is accessible for download by users
|
|
cmd.append(['cp', self.doc_path + '/build/weasyprint/vheliotech.pdf', self.doc_path + '/build/html/' + self.doc_name + '.pdf'])
|
|
|
|
# Now that the build is successful, move it to the deployment directory (replacing any existing content)
|
|
cmd.append(['rm', '-rf', self.doc_path + '/dist'])
|
|
if multiversion_build:
|
|
cmd.append(['mv', self.doc_path + '/build/html_versions/', self.doc_path + '/dist/'])
|
|
else:
|
|
cmd.append(['mv', self.doc_path + '/build/html/', self.doc_path + '/dist/'])
|
|
|
|
resource_usage = [('document_files', self.doc_path)]
|
|
if is_intensive_task:
|
|
resource_usage.append(('intensive_task', ''))
|
|
|
|
task = ProcessTask(cmd, cwd = self.doc_path + "/repo", resource_usage = resource_usage)
|
|
task.start(skip_if_another_pending = self.doc_path)
|
|
|
|
return task
|
|
|
|
def delete(self):
|
|
if not self.valid:
|
|
raise Exception("Internal error")
|
|
self.delete_folder()
|
|
|
|
def delete_folder(self):
|
|
doc_path = Document.make_doc_path(self.origin, self.doc_name, self.branch)
|
|
shutil.rmtree(doc_path)
|
|
doc_root = os.path.dirname(doc_path)
|
|
if len(os.listdir(doc_root)) == 0:
|
|
os.rmdir(doc_root)
|
|
origin_root = os.path.dirname(doc_root)
|
|
if len(os.listdir(origin_root)) == 0:
|
|
os.rmdir(origin_root)
|
|
|
|
def get_url(self):
|
|
if self.settings['multiversion']:
|
|
return "/doc/" + self.encoded_origin + "/" + sanitize_name(self.doc_name)+'/'+sanitize_name(self.branch) + "/" + self.settings['default_version'] + "/index.html"
|
|
else:
|
|
return "/doc/" + self.encoded_origin + "/" + sanitize_name(self.doc_name)+'/'+sanitize_name(self.branch) + "/index.html"
|
|
|
|
def get_pdf_url(self):
|
|
if self.settings['multiversion']:
|
|
return "/doc/" + self.encoded_origin + "/" + sanitize_name(self.doc_name)+'/'+sanitize_name(self.branch) + "/" + self.settings['default_version'] + "/" + self.doc_name + ".pdf"
|
|
else:
|
|
return "/doc/" + self.encoded_origin + "/" + sanitize_name(self.doc_name)+'/'+sanitize_name(self.branch) + "/" + self.doc_name + ".pdf"
|
|
|
|
def get_api_key(self):
|
|
with open(self.doc_path + "/apikey") as f:
|
|
return f.read().replace('\n', '')
|
|
|
|
@staticmethod
|
|
def encode_origin(origin):
|
|
return url_encode(origin, safe='').replace('%', '!')
|
|
|
|
@staticmethod
|
|
def decode_origin(origin):
|
|
return url_decode(origin.replace('!', '%'))
|
|
|
|
@staticmethod
|
|
def make_doc_path(origin, doc_name, branch):
|
|
doc_path = os.path.realpath(get_document_root()+'/'+Document.encode_origin(origin)+'/'+sanitize_name(doc_name)+'/'+sanitize_name(branch))
|
|
if not doc_path.startswith(get_document_root()):
|
|
raise BusinessException("Invalid document path for "+origin+"/"+doc_name+"@"+branch)
|
|
return doc_path
|
|
|
|
@staticmethod
|
|
def get_origin(repo):
|
|
result = sanitize_name(os.path.dirname(repo).replace('https://', ''), '/', False)
|
|
if '!' in result:
|
|
raise BusinessException("Invalid character: !")
|
|
return result
|
|
|
|
@staticmethod
|
|
def clone(repo, branch, doc_name, source_dir):
|
|
# check the document does not already exist
|
|
origin = Document.get_origin(repo)
|
|
doc_path = Document.make_doc_path(origin, doc_name, branch)
|
|
if os.path.isdir(doc_path):
|
|
raise BusinessException("This document already exists: "+origin+"/"+doc_name+"@"+branch)
|
|
|
|
if source_dir != sanitize_name(source_dir):
|
|
raise BusinessException("Invalid source directory name: " + source_dir)
|
|
|
|
# we have potentially serious security issues related to cloning anything. For example cloning from SSH may use a pre-configured server identity, etc.
|
|
if not repo.startswith("https://"):
|
|
raise BusinessException("Only HTTPS repositories are allowed in current implementation")
|
|
|
|
# Generate an API key
|
|
apikey = str(uuid.uuid4())
|
|
print("generated API key: " + apikey)
|
|
|
|
target_dir = doc_path + "/repo"
|
|
os.makedirs(target_dir, exist_ok = True)
|
|
with open(doc_path + "/apikey", "w") as apikey_file:
|
|
apikey_file.write(apikey)
|
|
|
|
cmd = []
|
|
cmd.append(['git', 'init', '--initial-branch=' + branch])
|
|
cmd.append(['git', 'remote', 'add', '-f', 'origin', repo])
|
|
#cmd.append(['git', 'sparse-checkout', 'init'])
|
|
#cmd.append(['git', 'sparse-checkout', 'set', source_dir])
|
|
cmd.append(['git', 'pull', 'origin', branch])
|
|
cmd.append(['git', 'branch', '--set-upstream-to=origin/' + branch, branch])
|
|
|
|
task = ProcessTask(cmd, cwd = target_dir)
|
|
task.on_fail(lambda : shutil.rmtree(doc_path, ignore_errors = True))
|
|
task.start()
|
|
|
|
return task
|
|
|
|
@staticmethod
|
|
def list():
|
|
result = []
|
|
for origin in os.listdir(get_document_root()):
|
|
for doc_name in os.listdir(get_document_root() + "/" + origin):
|
|
for branch in os.listdir(get_document_root() + "/" + origin + "/" + doc_name):
|
|
doc = Document(origin, doc_name, branch, allow_invalid = True)
|
|
result.append(doc)
|
|
return result
|
|
|
|
def get_document_root():
|
|
return current_app.config['DOCUMENT_ROOT_DIR']
|
|
|