Switch to LRU based sha to PR cache

This updates the github driver to cache PRs by sha using cachetool's
LRUCache.

We make this change because we need to cache closed PRs so can't rely on
the action of closing a PR to remove the PR from the cache. Since we
don't have a good method of evicting entries we fall back to LRU with
a reasonable cache size (2k commits).

Change-Id: I5fb6c8b33f9eed221a8b84e537f02e7dccf2d2df
This commit is contained in:
Clark Boylan 2019-02-18 11:38:16 -08:00
parent 4056352036
commit cee7a4e248
4 changed files with 31 additions and 10 deletions

View File

@ -19,6 +19,7 @@ sqlalchemy
alembic
cryptography>=1.6
cachecontrol
cachetools
pyjwt
iso8601
psutil

View File

@ -251,7 +251,9 @@ class FakeRepository(object):
}
return FakeResponse(data)
def pull_requests(self, state=None):
def pull_requests(self, state=None, sort=None, direction=None):
# sort and direction are unused currently, but present to match
# real world call signatures.
pulls = []
for pull in self.data.pull_requests.values():
if pull.project != self.name:

View File

@ -1336,8 +1336,13 @@ class TestGithubShaCache(BaseTestCase):
}
cache.update('foo/bar', pr_dict)
self.assertEqual(cache.get('foo/bar', '123456'), set({1}))
pr_dict['state'] = 'closed'
cache.update('foo/bar', pr_dict)
# Create 4096 entries so original falls off.
for x in range(0, 4096):
pr_dict['head']['sha'] = str(x)
cache.update('foo/bar', pr_dict)
cache.get('foo/bar', str(x))
self.assertEqual(cache.get('foo/bar', '123456'), set())
def testMultiInsert(self):
@ -1382,7 +1387,7 @@ class TestGithubShaCache(BaseTestCase):
self.assertEqual(cache.get('bar/foo', '789'), set())
self.assertEqual(cache.get('foo/bar', '789'), set())
def testNoUpdate(self):
def testClosedPRRemains(self):
cache = GithubShaCache()
pr_dict = {
'head': {
@ -1392,4 +1397,4 @@ class TestGithubShaCache(BaseTestCase):
'state': 'closed',
}
cache.update('foo/bar', pr_dict)
self.assertEqual(cache.get('bar/foo', '123456'), set())
self.assertEqual(cache.get('foo/bar', '123456'), set({1}))

View File

@ -29,6 +29,7 @@ import cherrypy
import cachecontrol
from cachecontrol.cache import DictCache
from cachecontrol.heuristics import BaseHeuristic
import cachetools
import iso8601
import jwt
import requests
@ -75,14 +76,21 @@ class GithubShaCache(object):
self.projects = {}
def update(self, project_name, pr):
project_cache = self.projects.setdefault(project_name, {})
project_cache = self.projects.setdefault(
project_name,
# Cache up to 4k shas for each project
# Note we cache the actual sha for a PR and the
# merge_commit_sha so we make this fairly large.
cachetools.LRUCache(4096)
)
sha = pr['head']['sha']
number = pr['number']
cached_prs = project_cache.setdefault(sha, set())
if pr['state'] == 'open':
cached_prs.add(number)
merge_commit_sha = pr.get('merge_commit_sha')
if merge_commit_sha:
cached_prs = project_cache.setdefault(merge_commit_sha, set())
cached_prs.add(number)
else:
cached_prs.discard(number)
def get(self, project_name, sha):
project_cache = self.projects.get(project_name, {})
@ -1200,7 +1208,12 @@ class GithubConnection(BaseConnection):
github = self.getGithubClient(project_name)
owner, project = project_name.split('/')
repo = github.repository(owner, project)
for pr in repo.pull_requests(state='open'):
for pr in repo.pull_requests(state='open',
# We sort by updated from oldest to newest
# as that will prefer more recently
# PRs in our LRU cache.
sort='updated',
direction='asc'):
pr_dict = pr.as_dict()
self._sha_pr_cache.update(project_name, pr_dict)
if pr.head.sha != sha: