15328 depot should attach HTTP cache control headers to responses 2010.1H
authorShawn Walker <shawn.walker@oracle.com>
Mon, 17 May 2010 15:55:47 -0500
branch2010.1H
changeset 1900 335d37df0fea
parent 1896 829b39e8a0d7
child 1922 48706bcc893f
15328 depot should attach HTTP cache control headers to responses
src/modules/server/depot.py
src/modules/server/face.py
src/tests/cli/t_pkg_depotd.py
--- a/src/modules/server/depot.py	Thu May 13 18:25:46 2010 -0500
+++ b/src/modules/server/depot.py	Mon May 17 15:55:47 2010 -0500
@@ -21,12 +21,12 @@
 #
 
 #
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 import cherrypy
 from cherrypy.lib.static import serve_file
+from email.utils import formatdate
 
 import cStringIO
 import errno
@@ -63,10 +63,12 @@
 
 from pkg.server.query_parser import Query, ParseError, BooleanQueryException
 
+
 class Dummy(object):
         """Dummy object used for dispatch method mapping."""
         pass
 
+
 class DepotHTTP(object):
         """The DepotHTTP object is intended to be used as a cherrypy
         application object and represents the set of operations that a
@@ -175,6 +177,42 @@
                         # This handles SIGUSR1
                         cherrypy.engine.subscribe("graceful", self.refresh)
 
+        def __set_response_expires(self, op_name, expires, max_age=None):
+                """Used to set expiration headers on a response dynamically
+                based on the name of the operation.
+
+                'op_name' is a string containing the name of the depot
+                operation as listed in the REPO_OPS_* constants.
+
+                'expires' is an integer value in seconds indicating how
+                long from when the request was made the content returned
+                should expire.  The maximum value is 365*86400.
+
+                'max_age' is an integer value in seconds indicating the
+                maximum length of time a response should be considered
+                valid.  For some operations, the maximum value for this
+                parameter is equal to the repository's refresh_seconds
+                property."""
+
+                rs = self._repo.cfg.get_property("repository",
+                    "refresh_seconds")
+                if max_age is None:
+                        max_age = min((rs, expires))
+
+                now = cherrypy.response.time
+                if op_name == "publisher" or op_name == "search" or \
+                    op_name == "catalog":
+                        # For these operations, cap the value based on
+                        # refresh_seconds.
+                        expires = now + min((rs, max_age))
+                else:
+                        expires = now + expires
+
+                headers = cherrypy.response.headers
+                headers["Cache-Control"] = \
+                    "must-revalidate, no-transform, max-age=%d" % max_age
+                headers["Expires"] = formatdate(timeval=expires, usegmt=True)
+
         def refresh(self):
                 """Catch SIGUSR1 and reload the depot information."""
                 self._repo.reload()
@@ -213,12 +251,13 @@
                 raise cherrypy.HTTPError(httplib.NOT_FOUND, "Version '%s' not "
                     "supported for operation '%s'\n" % (ver, op))
 
-        @cherrypy.tools.response_headers(headers = \
-            [("Content-Type", "text/plain")])
+        @cherrypy.tools.response_headers(headers=\
+            [("Content-Type", "text/plain; charset=utf-8")])
         def versions_0(self, *tokens):
                 """Output a text/plain list of valid operations, and their
                 versions, supported by the repository."""
 
+                self.__set_response_expires("versions", 5*60, 5*60)
                 versions = "pkg-server %s\n" % pkg.VERSION
                 versions += "\n".join(
                     "%s %s" % (op, " ".join(vers))
@@ -231,7 +270,8 @@
                 pairs."""
 
                 response = cherrypy.response
-                response.headers["Content-type"] = "text/plain"
+                response.headers["Content-type"] = "text/plain; charset=utf-8"
+                self.__set_response_expires("search", 86400, 86400)
 
                 try:
                         token = tokens[0]
@@ -275,7 +315,6 @@
 
         search_0._cp_config = { "response.stream": True }
 
-
         def search_1(self, *args, **params):
                 """Based on the request path, return a list of packages that
                 match the specified criteria."""
@@ -297,8 +336,6 @@
                 if not query_str_lst:
                         raise cherrypy.HTTPError(httplib.BAD_REQUEST)
 
-                response = cherrypy.response
-
                 if not self._repo.search_available:
                         raise cherrypy.HTTPError(httplib.SERVICE_UNAVAILABLE,
                             "Search temporarily unavailable")
@@ -317,7 +354,9 @@
                         cherrypy.log("Request failed: %s" % str(e))
                         raise cherrypy.HTTPError(httplib.NOT_FOUND, str(e))
 
-                response.headers["Content-type"] = "text/plain"
+                response = cherrypy.response
+                response.headers["Content-type"] = "text/plain; charset=utf-8"
+                self.__set_response_expires("search", 86400, 86400)
 
                 # In order to be able to have a return code distinguish between
                 # no results and search unavailable, we need to use a different
@@ -366,9 +405,10 @@
                 # that yields the catalog content.
                 c = self._repo.catalog
                 response = cherrypy.response
-                response.headers["Content-type"] = "text/plain"
+                response.headers["Content-type"] = "text/plain; charset=utf-8"
                 response.headers["Last-Modified"] = c.last_modified.isoformat()
                 response.headers["X-Catalog-Type"] = "full"
+                self.__set_response_expires("catalog", 86400, 86400)
 
                 def output():
                         try:
@@ -404,7 +444,9 @@
                         # information.
                         cherrypy.log("Request failed: %s" % str(e))
                         raise cherrypy.HTTPError(httplib.NOT_FOUND, str(e))
-                return serve_file(fpath, "text/plain")
+
+                self.__set_response_expires("catalog", 86400, 86400)
+                return serve_file(fpath, "text/plain; charset=utf-8")
 
         catalog_1._cp_config = { "response.stream": True }
 
@@ -424,6 +466,10 @@
                 # A broken proxy (or client) has caused a fully-qualified FMRI
                 # to be split up.
                 comps = [t for t in tokens]
+                if not comps:
+                        raise cherrypy.HTTPError(httplib.FORBIDDEN,
+                            _("Directory listing not allowed."))
+
                 if comps[0] == "pkg:" and comps[1] in pubs:
                         # Only one slash here as another will be added below.
                         comps[0] += "/"
@@ -446,7 +492,8 @@
                         raise cherrypy.HTTPError(httplib.NOT_FOUND, str(e))
 
                 # Send manifest
-                return serve_file(fpath, "text/plain")
+                self.__set_response_expires("manifest", 86400*365, 86400*365)
+                return serve_file(fpath, "text/plain; charset=utf-8")
 
         manifest_0._cp_config = { "response.stream": True }
 
@@ -537,10 +584,14 @@
         # before the response even begins and the point at which @tools
         # hooks in is too late.
         filelist_0._cp_config = {
-                "response.stream": True,
-                "tools.response_headers.on": True,
-                "tools.response_headers.headers": [("Content-Type",
-                "application/data")]
+            "response.stream": True,
+            "tools.response_headers.on": True,
+            "tools.response_headers.headers": [
+                ("Content-Type", "application/data"),
+                ("Pragma", "no-cache"),
+                ("Cache-Control", "no-cache, no-transform, must-revalidate"),
+                ("Expires", 0)
+            ]
         }
 
         def file_0(self, *tokens):
@@ -563,10 +614,14 @@
                         cherrypy.log("Request failed: %s" % str(e))
                         raise cherrypy.HTTPError(httplib.NOT_FOUND, str(e))
 
+                self.__set_response_expires("file", 86400*365, 86400*365)
                 return serve_file(fpath, "application/data")
 
         file_0._cp_config = { "response.stream": True }
 
+        @cherrypy.tools.response_headers(headers=[("Pragma", "no-cache"),
+            ("Cache-Control", "no-cache, no-transform, must-revalidate"),
+            ("Expires", 0)])
         def open_0(self, *tokens):
                 """Starts a transaction for the package name specified in the
                 request path.  Returns no output."""
@@ -585,7 +640,7 @@
 
                 try:
                         trans_id = self._repo.open(client_release, pfmri)
-                        response.headers["Content-type"] = "text/plain"
+                        response.headers["Content-type"] = "text/plain; charset=utf-8"
                         response.headers["Transaction-ID"] = trans_id
                 except repo.RepositoryError, e:
                         # Assume a bad request was made.  A 404 can't be
@@ -594,6 +649,9 @@
                         # operation.
                         raise cherrypy.HTTPError(httplib.BAD_REQUEST, str(e))
 
+        @cherrypy.tools.response_headers(headers=[("Pragma", "no-cache"),
+            ("Cache-Control", "no-cache, no-transform, must-revalidate"),
+            ("Expires", 0)])
         def close_0(self, *tokens):
                 """Ends an in-flight transaction for the Transaction ID
                 specified in the request path.
@@ -638,6 +696,9 @@
                 response.headers["Package-FMRI"] = pfmri
                 response.headers["State"] = pstate
 
+        @cherrypy.tools.response_headers(headers=[("Pragma", "no-cache"),
+            ("Cache-Control", "no-cache, no-transform, must-revalidate"),
+            ("Expires", 0)])
         def abandon_0(self, *tokens):
                 """Aborts an in-flight transaction for the Transaction ID
                 specified in the request path.  Returns no output."""
@@ -657,6 +718,9 @@
                         # operation.
                         raise cherrypy.HTTPError(httplib.BAD_REQUEST, str(e))
 
+        @cherrypy.tools.response_headers(headers=[("Pragma", "no-cache"),
+            ("Cache-Control", "no-cache, no-transform, must-revalidate"),
+            ("Expires", 0)])
         def add_0(self, *tokens):
                 """Adds an action and its content to an in-flight transaction
                 for the Transaction ID specified in the request path.  The
@@ -722,10 +786,13 @@
         # set the timeout higher since the default is five minutes; not really
         # enough for a slow connection to upload content.
         add_0._cp_config = {
-                "request.process_request_body": False,
-                "response.timeout": 3600,
+            "request.process_request_body": False,
+            "response.timeout": 3600,
         }
 
+        @cherrypy.tools.response_headers(headers=[("Pragma", "no-cache"),
+            ("Cache-Control", "no-cache, no-transform, must-revalidate"),
+            ("Expires", 0)])
         def index_0(self, *tokens):
                 """Triggers a refresh of the search indices.
                 Returns no output."""
@@ -745,8 +812,8 @@
                         cherrypy.log("Unknown index subcommand: %s" % cmd)
                         raise cherrypy.HTTPError(httplib.NOT_FOUND, str(e))
 
-        @cherrypy.tools.response_headers(headers = \
-            [("Content-Type", "text/plain")])
+        @cherrypy.tools.response_headers(headers=\
+            [("Content-Type", "text/plain; charset=utf-8")])
         def info_0(self, *tokens):
                 """ Output a text/plain summary of information about the
                     specified package. The request is an encoded pkg FMRI.  If
@@ -821,6 +888,7 @@
                         misc.gunzip_from_stream(lfile, lsummary)
                 lsummary.seek(0)
 
+                self.__set_response_expires("info", 86400*365, 86400*365)
                 return """\
           Name: %s
        Summary: %s
@@ -876,6 +944,7 @@
                         cherrypy.log("Request failed: %s" % str(e))
                         raise cherrypy.HTTPError(httplib.NOT_FOUND, str(e))
                 buf.seek(0)
+                self.__set_response_expires("publisher", 86400*365, 86400*365)
                 return buf.getvalue()
 
         @cherrypy.tools.response_headers(headers=[(
@@ -975,6 +1044,7 @@
                         cherrypy.log("Request failed: %s" % str(e))
                         raise cherrypy.HTTPError(httplib.NOT_FOUND, str(e))
                 buf.seek(0)
+                self.__set_response_expires("p5i", 86400*365, 86400*365)
                 return buf.getvalue()
 
 
@@ -1028,7 +1098,7 @@
                 # that yields the catalog content.
                 c = self._repo.catalog
                 response = cherrypy.response
-                response.headers["Content-type"] = "text/plain"
+                response.headers["Content-type"] = "text/plain; charset=utf-8"
                 response.headers["Last-Modified"] = c.last_modified.isoformat()
                 response.headers["X-Catalog-Type"] = "full"
 
@@ -1044,9 +1114,11 @@
 
                 return output()
 
-        catalog_0._cp_config = { "response.stream": True,
-                                 "tools.nasty_httperror.on": True,
-                                 "tools.nasty_httperror.bonus": 1 }
+        catalog_0._cp_config = {
+            "response.stream": True,
+            "tools.nasty_httperror.on": True,
+            "tools.nasty_httperror.bonus": 1
+        }
 
         def manifest_0(self, *tokens):
                 """The request is an encoded pkg FMRI.  If the version is
@@ -1111,11 +1183,11 @@
                             len(self.requested_manifests) - 1)
                         badpath = self.requested_manifests[pick]
 
-                        return serve_file(badpath, "text/plain")
+                        return serve_file(badpath, "text/plain; charset=utf-8")
 
                 # NASTY
                 # Call a misbehaving serve_file
-                return self.nasty_serve_file(fpath, "text/plain")
+                return self.nasty_serve_file(fpath, "text/plain; charset=utf-8")
 
         manifest_0._cp_config = { "response.stream": True }
 
@@ -1236,11 +1308,15 @@
         # before the response even begins and the point at which @tools
         # hooks in is too late.
         filelist_0._cp_config = {
-                "response.stream": True,
-                "tools.nasty_httperror.on": True,
-                "tools.response_headers.on": True,
-                "tools.response_headers.headers": [("Content-Type",
-                "application/data")]
+            "response.stream": True,
+            "tools.nasty_httperror.on": True,
+            "tools.response_headers.on": True,
+            "tools.response_headers.headers": [
+                ("Content-Type", "application/data"),
+                ("Pragma", "no-cache"),
+                ("Cache-Control", "no-cache, must-revalidate"),
+                ("Expires", 0)
+            ]
         }
 
         def __get_bad_path(self, v):
@@ -1348,11 +1424,11 @@
                             len(self.requested_catalogs) - 1)
                         badpath = self.requested_catalogs[pick]
 
-                        return serve_file(badpath, "text/plain")
+                        return serve_file(badpath, "text/plain; charset=utf-8")
 
                 # NASTY
                 # Call a misbehaving serve_file
-                return self.nasty_serve_file(fpath, "text/plain")
+                return self.nasty_serve_file(fpath, "text/plain; charset=utf-8")
 
         catalog_1._cp_config = { "response.stream": True }
 
--- a/src/modules/server/face.py	Thu May 13 18:25:46 2010 -0500
+++ b/src/modules/server/face.py	Mon May 17 15:55:47 2010 -0500
@@ -18,9 +18,10 @@
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
+
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+#
 
 """face - provides the BUI (Browser User Interface) for the image packaging server"""
 
@@ -80,6 +81,8 @@
         if path == "":
                 path = "index.shtml"
         elif path.split("/")[0] == "feed":
+                response.headers.update({ "Expires": 0, "Pragma": "no-cache",
+                    "Cache-Control": "no-cache, no-transform, must-revalidate" })
                 return feed(repo, request, response)
 
         if not path.endswith(".shtml"):
@@ -94,6 +97,8 @@
                             web_root, spath))
 
         try:
+                response.headers.update({ "Expires": 0, "Pragma": "no-cache",
+                    "Cache-Control": "no-cache, no-transform, must-revalidate" })
                 return __render_template(repo, content_root, web_root, request,
                     path)
         except sae.VersionException, e:
--- a/src/tests/cli/t_pkg_depotd.py	Thu May 13 18:25:46 2010 -0500
+++ b/src/tests/cli/t_pkg_depotd.py	Mon May 17 15:55:47 2010 -0500
@@ -20,8 +20,9 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+#
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+#
 
 import testutils
 if __name__ == "__main__":
@@ -368,8 +369,8 @@
         def test_writable_root(self):
                 """Tests whether the index and feed cache file are written to
                 the writable root parameter."""
+
                 self.make_misc_files(TestPkgDepot.misc_files)
-
                 writable_root = os.path.join(self.test_root,
                     "writ_root")
                 index_dir = os.path.join(writable_root, "index")
@@ -538,6 +539,12 @@
             open [email protected],5.11-0
             close """
 
+        file10 = """
+            open [email protected],5.11-0
+            add dir mode=0755 owner=root group=bin path=/var
+            add file tmp/file path=var/file mode=644 owner=root group=bin
+            close """
+
         system10 = """
             open system/[email protected],5.11-0
             add set name="description" value="Package to test package names with slashes"
@@ -597,6 +604,8 @@
                 self.tpath = tempfile.mkdtemp(prefix="tpath",
                     dir=self.test_root)
 
+                self.make_misc_files("tmp/file")
+
         def test_0_depot_bui_output(self):
                 """Verify that a non-error response and valid HTML is returned
                 for each known BUI page in every available depot mode."""
@@ -766,6 +775,53 @@
                         if e.code != httplib.NOT_FOUND:
                                 raise
 
+        def test_3_headers(self):
+                """Ensure expected headers are present for client operations
+                (excluding publication)."""
+
+                # Now update the repository configuration while the depot is
+                # stopped so changes won't be overwritten on exit.
+                self.__update_repo_config()
+
+                # Start the depot.
+                self.dc.start()
+
+                durl = self.dc.get_depot_url()
+                pfmri = fmri.PkgFmri(self.pkgsend_bulk(durl, self.file10)[0],
+                    "5.11")
+
+                def get_headers(req_path):
+                        try:
+                                rinfo = urllib2.urlopen(urlparse.urljoin(durl,
+                                    req_path)).info()
+                                return rinfo.items()
+                        except Exception, e:
+                                raise RuntimeError("retrieval of %s "
+                                    "failed: %s" % (req_path, str(e)))
+
+                for req_path in ("publisher/0", 'search/0/%2Fvar%2Ffile',
+                    'search/1/False_2_None_None_%2Fvar%2Ffile',
+                    "versions/0", "manifest/0/%s" % pfmri.get_url_path(),
+                    "catalog/0", "catalog/1/catalog.attrs",
+                    "file/0/3aad0bca6f3a6f502c175700ebe90ef36e312d7e",
+                    "filelist/0"):
+                        hdrs = dict(get_headers(req_path))
+
+                        # Fields must be referenced in lowercase.
+                        if req_path.startswith("filelist"):
+                                self.assertEqual(hdrs.get("expires", ""), "0")
+                                self.assertEqual(hdrs.get("cache-control", ""),
+                                    "no-cache, no-transform, must-revalidate")
+                                self.assertEqual(hdrs.get("pragma", None),
+                                    "no-cache")
+                        else:
+                                cc = hdrs.get("cache-control", "")
+                                self.assert_(cc.startswith("must-revalidate, "
+                                    "no-transform, max-age="))
+                                exp = hdrs.get("expires", None)
+                                self.assertNotEqual(exp, None)
+                                self.assert_(exp.endswith(" GMT"))
+
 
 if __name__ == "__main__":
         unittest.main()