diff --git a/src/api/document.py b/src/api/document.py
index bd39ccc..0387791 100644
--- a/src/api/document.py
+++ b/src/api/document.py
@@ -1,6 +1,8 @@
from flask import Blueprint, request
from markupsafe import escape
from web_utils.get_arg import get_arg
+from web_utils.run import run
+import os
from data.document import Document
@@ -20,3 +22,14 @@ def build():
output += doc.build()
return output.replace('\n', '
')
+
+@bp.route('/clone')
+def clone():
+ repo = get_arg('repo')
+ doc_name = get_arg('doc', os.path.splitext(os.path.basename(repo))[0])
+ branch = get_arg('branch', 'master')
+ source_dir = get_arg('source', 'source')
+
+ output = Document.clone(repo, branch, doc_name, source_dir)
+
+ return output.replace('\n', '
')
diff --git a/src/data/document.py b/src/data/document.py
index 9a8a71d..82a67ad 100644
--- a/src/data/document.py
+++ b/src/data/document.py
@@ -1,17 +1,38 @@
import os
from web_utils.run import run
+import shutil
+from unicodedata import normalize
+import string
document_root = None
+def os_path_separators():
+ seps = ['/','\\']
+ for sep in os.path.sep, os.path.altsep:
+ if sep:
+ seps.append(sep)
+ return seps
+
+def sanitize_name(initial_name):
+ # Sort out unicode characters
+ name = normalize('NFKD', initial_name).encode('ascii', 'ignore').decode('ascii')
+ # Replace path separators with underscores
+ name = name.replace('/', '_slash_')
+ for sep in os_path_separators():
+ name = name.replace(sep, '_')
+ # Ensure only valid characters
+ valid_chars = "-_.(){0}{1}".format(string.ascii_letters, string.digits)
+ name = "".join(ch for ch in name if ch in valid_chars)
+ if len(name) == 0 or '..' in name:
+ raise Exception("Invalid name: " + initial_name)
+ return name
+
class Document:
def __init__(self, doc_name, branch = 'master'):
self.doc_name = doc_name
self.branch = branch
- doc_path = os.path.realpath(get_document_root()+'/'+doc_name+'/'+branch)
- if not doc_path.startswith(get_document_root()):
- raise Exception("Invalid document path for "+doc_name+"@"+branch)
-
+ doc_path = Document.make_doc_path(doc_name, branch)
if not os.path.isdir(doc_path + "/repo/.git"):
raise Exception("This document does not exist: "+doc_name+"@"+branch)
@@ -24,6 +45,46 @@ class Document:
def pull(self):
return run("cd \"" + self.doc_path + "/repo\" && git pull")
+
+ @staticmethod
+ def make_doc_path(doc_name, branch):
+ doc_path = os.path.realpath(get_document_root()+'/'+sanitize_name(doc_name)+'/'+sanitize_name(branch))
+ if not doc_path.startswith(get_document_root()):
+ raise Exception("Invalid document path for "+doc_name+"@"+branch)
+ return doc_path
+
+ @staticmethod
+ def clone(repo, branch, doc_name, source_dir):
+ # check the document does not already exist
+ doc_path = Document.make_doc_path(doc_name, branch)
+ if os.path.isdir(doc_path):
+ raise Exception("This document already exists: "+doc_name+"@"+branch)
+
+ if source_dir != sanitize_name(source_dir):
+ raise Exception("Invalid source directory name: " + source_dir)
+
+ # we have potentially serious security issues related to cloning anything. For example cloning from SSH may use a pre-configured server identity, etc.
+ if not repo.startswith("https://"):
+ raise Exception("Only HTTPS repositories are allowed in current implementation")
+
+ target_dir = doc_path + "/repo"
+
+ cmd = ""
+ cmd += "mkdir -p \"" + target_dir + "\"\n"
+ cmd += "cd \"" + target_dir + "\"\n"
+ cmd += "git init \"--initial-branch=" + branch + "\"\n"
+ cmd += "git remote add -f origin \"" + repo + "\"\n"
+ cmd += "git sparse-checkout init\n"
+ cmd += "git sparse-checkout set \"" + source_dir + "\"\n"
+ cmd += "git pull origin \"" + branch + "\"\n"
+ cmd += "git branch \"--set-upstream-to=origin/" + branch + "\" \"" +branch + "\""
+
+ try:
+ return run(cmd)
+ except Exception as e:
+ # cloning failed, clean up and raise the same exception again
+ shutil.rmtree(doc_path)
+ raise e
def set_document_root(dir):
global document_root