diff --git a/src/api/document.py b/src/api/document.py index bd39ccc..0387791 100644 --- a/src/api/document.py +++ b/src/api/document.py @@ -1,6 +1,8 @@ from flask import Blueprint, request from markupsafe import escape from web_utils.get_arg import get_arg +from web_utils.run import run +import os from data.document import Document @@ -20,3 +22,14 @@ def build(): output += doc.build() return output.replace('\n', '
') + +@bp.route('/clone') +def clone(): + repo = get_arg('repo') + doc_name = get_arg('doc', os.path.splitext(os.path.basename(repo))[0]) + branch = get_arg('branch', 'master') + source_dir = get_arg('source', 'source') + + output = Document.clone(repo, branch, doc_name, source_dir) + + return output.replace('\n', '
') diff --git a/src/data/document.py b/src/data/document.py index 9a8a71d..82a67ad 100644 --- a/src/data/document.py +++ b/src/data/document.py @@ -1,17 +1,38 @@ import os from web_utils.run import run +import shutil +from unicodedata import normalize +import string document_root = None +def os_path_separators(): + seps = ['/','\\'] + for sep in os.path.sep, os.path.altsep: + if sep: + seps.append(sep) + return seps + +def sanitize_name(initial_name): + # Sort out unicode characters + name = normalize('NFKD', initial_name).encode('ascii', 'ignore').decode('ascii') + # Replace path separators with underscores + name = name.replace('/', '_slash_') + for sep in os_path_separators(): + name = name.replace(sep, '_') + # Ensure only valid characters + valid_chars = "-_.(){0}{1}".format(string.ascii_letters, string.digits) + name = "".join(ch for ch in name if ch in valid_chars) + if len(name) == 0 or '..' in name: + raise Exception("Invalid name: " + initial_name) + return name + class Document: def __init__(self, doc_name, branch = 'master'): self.doc_name = doc_name self.branch = branch - doc_path = os.path.realpath(get_document_root()+'/'+doc_name+'/'+branch) - if not doc_path.startswith(get_document_root()): - raise Exception("Invalid document path for "+doc_name+"@"+branch) - + doc_path = Document.make_doc_path(doc_name, branch) if not os.path.isdir(doc_path + "/repo/.git"): raise Exception("This document does not exist: "+doc_name+"@"+branch) @@ -24,6 +45,46 @@ class Document: def pull(self): return run("cd \"" + self.doc_path + "/repo\" && git pull") + + @staticmethod + def make_doc_path(doc_name, branch): + doc_path = os.path.realpath(get_document_root()+'/'+sanitize_name(doc_name)+'/'+sanitize_name(branch)) + if not doc_path.startswith(get_document_root()): + raise Exception("Invalid document path for "+doc_name+"@"+branch) + return doc_path + + @staticmethod + def clone(repo, branch, doc_name, source_dir): + # check the document does not already exist + doc_path = Document.make_doc_path(doc_name, branch) + if os.path.isdir(doc_path): + raise Exception("This document already exists: "+doc_name+"@"+branch) + + if source_dir != sanitize_name(source_dir): + raise Exception("Invalid source directory name: " + source_dir) + + # we have potentially serious security issues related to cloning anything. For example cloning from SSH may use a pre-configured server identity, etc. + if not repo.startswith("https://"): + raise Exception("Only HTTPS repositories are allowed in current implementation") + + target_dir = doc_path + "/repo" + + cmd = "" + cmd += "mkdir -p \"" + target_dir + "\"\n" + cmd += "cd \"" + target_dir + "\"\n" + cmd += "git init \"--initial-branch=" + branch + "\"\n" + cmd += "git remote add -f origin \"" + repo + "\"\n" + cmd += "git sparse-checkout init\n" + cmd += "git sparse-checkout set \"" + source_dir + "\"\n" + cmd += "git pull origin \"" + branch + "\"\n" + cmd += "git branch \"--set-upstream-to=origin/" + branch + "\" \"" +branch + "\"" + + try: + return run(cmd) + except Exception as e: + # cloning failed, clean up and raise the same exception again + shutil.rmtree(doc_path) + raise e def set_document_root(dir): global document_root