venerdì 14 giugno 2013

Quickly get a list of all svn:externals for a remote svn repository

http://stackoverflow.com/questions/10183426/quickly-get-a-list-of-all-svnexternals-for-a-remote-svn-repository
We have an svn repository with lots of directories and files and our build system needs to be able to find all of the svn:externals properties, recursively for a branch in the repository, before checking it out. Currently we use:
svn propget svn:externals -R http://url.of.repo/Branch
This has proved extremely time consuming I finally came up with a solution. I decided to break up the request into multiple small svn requests and then make each of those a task to be run by a thread pool. This kind of slams the svn server, but in our case the svn server is on the LAN and this query is only made during full builds so it doesn't seem to be an issue.
import os
import sys
import threading
import ThreadPool

thread_pool = ThreadPool.ThreadPool(8)
externs_dict = {}
externs_lock = threading.Lock()

def getExternRev( path, url ):
    cmd = 'svn info "%s"' % url
    pipe = os.popen(cmd, 'r')
    data = pipe.read().splitlines()

    #Harvest last changed rev
    for line in data:
        if "Last Changed Rev" in line:
            revision = line.split(":")[1].strip()
            externs_lock.acquire()
            externs_dict[path] = (url, revision)
            externs_lock.release()

def getExterns(url, base_dir):
    cmd = 'svn propget svn:externals "%s"' % url
    pipe = os.popen(cmd, 'r')
    data = pipe.read().splitlines()
    pipe.close()

    for line in data:
        if line:
            line = line.split()
            path = base_dir + line[0]
            url = line[1]
            thread_pool.add_task( getExternRev, path, url )

def processDir(url, base_dir):
    thread_pool.add_task( getExterns, url, base_dir )

    cmd = 'svn list "%s"' % url
    pipe = os.popen(cmd, 'r')
    listing = pipe.read().splitlines()
    pipe.close()

    dir_list = []
    for node in listing:
        if node.endswith('/'):
            dir_list.append(node)

    for node in dir_list:
        #externs_data.extend( analyzePath( url + node, base_dir + node ) )
        thread_pool.add_task( processDir, url+node, base_dir+node )

def analyzePath(url, base_dir = ''):
    thread_pool.add_task( processDir, url, base_dir )
    thread_pool.wait_completion()


analyzePath( "http://url/to/repository" )
print externs_dict