15433013 content hash handling should handle different hash functions
authorTim Foster <tim.s.foster@oracle.com>
Fri, 04 Oct 2013 10:56:25 +1300
changeset 2962 ce8cd4c07986
parent 2961 b0a96696db84
child 2964 2278c246ac6c
15433013 content hash handling should handle different hash functions
src/client.py
src/depot.py
src/modules/actions/file.py
src/modules/actions/generic.py
src/modules/actions/license.py
src/modules/actions/signature.py
src/modules/catalog.py
src/modules/client/api_errors.py
src/modules/client/image.py
src/modules/client/imageplan.py
src/modules/client/publisher.py
src/modules/client/transport/transport.py
src/modules/depotcontroller.py
src/modules/digest.py
src/modules/elf.c
src/modules/elfextract.c
src/modules/elfextract.h
src/modules/flavor/elf.py
src/modules/lint/pkglint_manifest.py
src/modules/manifest.py
src/modules/misc.py
src/modules/p5p.py
src/modules/p5s.py
src/modules/search_storage.py
src/modules/server/api.py
src/modules/server/depot.py
src/modules/server/repository.py
src/modules/server/transaction.py
src/pkg/manifests/package:pkg.p5m
src/pkgrepo.py
src/publish.py
src/pull.py
src/sign.py
src/sysrepo.py
src/tests/api/t_api_search.py
src/tests/api/t_elf.py
src/tests/api/t_manifest.py
src/tests/api/t_p5p.py
src/tests/cli/t_https.py
src/tests/cli/t_pkg_install.py
src/tests/cli/t_pkg_publisher.py
src/tests/cli/t_pkg_refresh.py
src/tests/cli/t_pkg_revert.py
src/tests/cli/t_pkg_search.py
src/tests/cli/t_pkg_sysrepo.py
src/tests/cli/t_pkgrecv.py
src/tests/cli/t_pkgrepo.py
src/tests/cli/t_pkgsend.py
src/tests/cli/t_pkgsign.py
src/tests/cli/t_pkgsurf.py
src/tests/cli/t_sysrepo.py
src/tests/pkg5unittest.py
src/util/apache2/depot/depot_index.py
--- a/src/client.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/client.py	Fri Oct 04 10:56:25 2013 +1300
@@ -6105,6 +6105,10 @@
                 elif opt in ("--help", "-?"):
                         show_usage = True
 
+        # The globals in pkg.digest can be influenced by debug flags
+        if DebugValues:
+                reload(pkg.digest)
+
         subcommand = None
         if pargs:
                 subcommand = pargs.pop(0)
--- a/src/depot.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/depot.py	Fri Oct 04 10:56:25 2013 +1300
@@ -89,6 +89,9 @@
 from cherrypy.process.plugins import Daemonizer
 
 from pkg.misc import msg, emsg, setlocale
+from pkg.client.debugvalues import DebugValues
+
+import pkg
 import pkg.client.api_errors as api_errors
 import pkg.config as cfg
 import pkg.portable.util as os_util
@@ -164,7 +167,8 @@
                         operations, simply 'search'.
         --debug         The name of a debug feature to enable; or a whitespace
                         or comma separated list of features to enable.
-                        Possible values are: headers.
+                        Possible values are: headers, hash=sha1+sha256,
+                        hash=sha256
         --image-root    The path to the image whose file information will be
                         used as a cache for file data.
         --log-access    The destination for any access related information
@@ -314,6 +318,16 @@
                                 else:
                                         features = arg.split()
                                 debug_features.extend(features)
+
+                                # We also allow key=value debug flags, which
+                                # get set in pkg.client.debugvalues
+                                for feature in features:
+                                        try:
+                                                key, val = feature.split("=", 1)
+                                                DebugValues.set_value(key, val)
+                                        except (AttributeError, ValueError):
+                                                pass
+
                         elif opt == "--disable-ops":
                                 if arg is None or arg == "":
                                         raise OptionError, \
@@ -485,6 +499,9 @@
                 if addresses:
                         ivalues["pkg"]["address"] = list(addresses)
 
+                if DebugValues:
+                        reload(pkg.digest)
+
                 # Build configuration object.
                 dconf = ds.DepotConfig(target=user_cfg, overrides=ivalues)
         except getopt.GetoptError, _e:
--- a/src/modules/actions/file.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/actions/file.py	Fri Oct 04 10:56:25 2013 +1300
@@ -40,6 +40,7 @@
 import _common
 import pkg.actions
 import pkg.client.api_errors as api_errors
+import pkg.digest as digest
 import pkg.misc as misc
 import pkg.portable as portable
 
@@ -190,7 +191,11 @@
                         stream = self.data()
                         tfile = os.fdopen(tfilefd, "wb")
                         try:
-                                shasum = misc.gunzip_from_stream(stream, tfile)
+                                # Always verify using the most preferred hash
+                                hash_attr, hash_val, hash_func  = \
+                                    digest.get_preferred_hash(self)
+                                shasum = misc.gunzip_from_stream(stream, tfile,
+                                    hash_func)
                         except zlib.error, e:
                                 raise ActionExecutionError(self,
                                     details=_("Error decompressing payload: %s")
@@ -200,15 +205,16 @@
                                 tfile.close()
                                 stream.close()
 
-                        if shasum != self.hash:
+                        if shasum != hash_val:
                                 raise ActionExecutionError(self,
                                     details=_("Action data hash verification "
                                     "failure: expected: %(expected)s computed: "
                                     "%(actual)s action: %(action)s") % {
-                                        "expected": self.hash,
+                                        "expected": hash_val,
                                         "actual": shasum,
                                         "action": self
                                     })
+
                 else:
                         temp = final_path
 
@@ -294,11 +300,12 @@
                             "found": misc.time_to_timestamp(lstat.st_mtime),
                             "expected": self.attrs["timestamp"] })
 
-                # avoid checking pkg.size if elfhash present;
-                # different size files may have the same elfhash
+                # avoid checking pkg.size if we have any content-hashes present;
+                # different size files may have the same content-hash
                 if "preserve" not in self.attrs and \
                     "pkg.size" in self.attrs and    \
-                    "elfhash" not in self.attrs and \
+                    not set(digest.RANKED_CONTENT_HASH_ATTRS).intersection(
+                    set(self.attrs.keys())) and \
                     lstat.st_size != int(self.attrs["pkg.size"]):
                         errors.append(_("Size: %(found)d bytes should be "
                             "%(expected)d") % { "found": lstat.st_size,
@@ -312,7 +319,9 @@
                         return errors, warnings, info
 
                 #
-                # Check file contents
+                # Check file contents. At the moment, the only content-hash
+                # supported in pkg(5) is for ELF files, so this will need work
+                # when additional content-hashes are added.
                 #
                 try:
                         # This is a generic mechanism, but only used for libc on
@@ -322,7 +331,10 @@
                         is_mtpt = self.attrs.get("mountpoint", "").lower() == "true"
                         elfhash = None
                         elferror = None
-                        if "elfhash" in self.attrs and haveelf and not is_mtpt:
+                        ehash_attr, elfhash_val, hash_func = \
+                            digest.get_preferred_hash(self,
+                                hash_type=pkg.digest.CONTENT_HASH)
+                        if ehash_attr and haveelf and not is_mtpt:
                                 #
                                 # It's possible for the elf module to
                                 # throw while computing the hash,
@@ -330,16 +342,28 @@
                                 # corrupted or truncated.
                                 #
                                 try:
-                                        elfhash = elf.get_dynamic(path)["hash"]
+                                        # Annoying that we have to hardcode this
+                                        if ehash_attr == \
+                                            "pkg.content-hash.sha256":
+                                                get_sha256 = True
+                                                get_sha1 = False
+                                        else:
+                                                get_sha256 = False
+                                                get_sha1 = True
+                                        elfhash = elf.get_dynamic(path,
+                                            sha1=get_sha1,
+                                            sha256=get_sha256)[ehash_attr]
                                 except RuntimeError, e:
-                                        errors.append("Elfhash: %s" % e)
+                                        errors.append("ELF content hash: %s" %
+                                            e)
 
                                 if elfhash is not None and \
-                                    elfhash != self.attrs["elfhash"]:
-                                        elferror = _("Elfhash: %(found)s "
+                                    elfhash != elfhash_val:
+                                        elferror = _("ELF content hash: "
+                                            "%(found)s "
                                             "should be %(expected)s") % {
                                             "found": elfhash,
-                                            "expected": self.attrs["elfhash"] }
+                                            "expected": elfhash_val }
 
                         # If we failed to compute the content hash, or the
                         # content hash failed to verify, try the file hash.
@@ -348,21 +372,24 @@
                         # changed, since obviously the file hash is a superset
                         # of the content hash.
                         if (elfhash is None or elferror) and not is_mtpt:
-                                hashvalue, data = misc.get_data_digest(path)
-                                if hashvalue != self.hash:
+                                hash_attr, hash_val, hash_func = \
+                                    digest.get_preferred_hash(self)
+                                sha_hash, data = misc.get_data_digest(path,
+                                    hash_func=hash_func)
+                                if sha_hash != hash_val:
                                         # Prefer the content hash error message.
                                         if "preserve" in self.attrs:
                                                 info.append(_(
-                                                    "editable file has" 
-                                                    " been changed"))
+                                                    "editable file has "
+                                                    "been changed"))
                                         elif elferror:
                                                 errors.append(elferror)
                                         else:
                                                 errors.append(_("Hash: "
                                                     "%(found)s should be "
                                                     "%(expected)s") % {
-                                                    "found": hashvalue,
-                                                    "expected": self.hash })
+                                                    "found": sha_hash,
+                                                    "expected": hash_val })
                                         self.replace_required = True
                 except EnvironmentError, e:
                         if e.errno == errno.EACCES:
@@ -414,30 +441,60 @@
                 # a downgrade since that isn't allowed across rename or obsolete
                 # boundaries.
                 is_file = os.path.isfile(final_path)
-                if orig and pkgplan.destination_fmri and \
-                    self.hash != orig.hash and \
-                    pkgplan.origin_fmri and \
-                    pkgplan.destination_fmri.version < pkgplan.origin_fmri.version:
-                        # Installed, preserved file is for a package newer than
-                        # what will be installed.  So check if the version on
-                        # disk is different than what was originally delivered,
-                        # and if so, preserve it.
-                        if is_file:
-                                ihash, cdata = misc.get_data_digest(final_path)
-                                if ihash != orig.hash:
-                                        # .old is intentionally avoided here to
-                                        # prevent accidental collisions with the
-                                        # normal install process.
-                                        return "renameold.update"
-                        return False
+
+                if orig:
+                        # We must use the same hash algorithm when comparing old
+                        # and new actions. Look for the most-preferred common
+                        # hash between old and new. Since the two actions may
+                        # not share a common hash (in which case, we get a tuple
+                        # of 'None' objects) we also need to know the preferred
+                        # hash to use when examining the old action on its own.
+                        common_hash_attr, common_hash_val, \
+                            common_orig_hash_val, common_hash_func = \
+                            digest.get_common_preferred_hash(self, orig)
+
+                        hattr, orig_hash_val, orig_hash_func = \
+                            digest.get_preferred_hash(orig)
+
+                        if common_orig_hash_val and common_hash_val:
+                                changed_hash = common_hash_val != common_orig_hash_val
+                        else:
+                                # we don't have a common hash, so we must treat
+                                # this as a changed action
+                                changed_hash = True
+
+                        if pkgplan.destination_fmri and \
+                            changed_hash and \
+                            pkgplan.origin_fmri and \
+                            pkgplan.destination_fmri.version < pkgplan.origin_fmri.version:
+                                # Installed, preserved file is for a package
+                                # newer than what will be installed. So check if
+                                # the version on disk is different than what
+                                # was originally delivered, and if so, preserve
+                                # it.
+                                if is_file:
+                                        ihash, cdata = misc.get_data_digest(
+                                            final_path,
+                                            hash_func=orig_hash_func)
+                                        if ihash != orig_hash_val:
+                                                # .old is intentionally avoided
+                                                # here to prevent accidental
+                                                # collisions with the normal
+                                                # install process.
+                                                return "renameold.update"
+                                return False
 
                 # If the action has been marked with a preserve attribute, and
                 # the file exists and has a content hash different from what the
                 # system expected it to be, then we preserve the original file
                 # in some way, depending on the value of preserve.
                 if is_file:
-                        chash, cdata = misc.get_data_digest(final_path)
-                        if not orig or chash != orig.hash:
+                        # if we had an action installed, then we know what hash
+                        # function was used to compute it's hash attribute.
+                        if orig:
+                                chash, cdata = misc.get_data_digest(final_path,
+                                    hash_func=orig_hash_func)
+                        if not orig or chash != orig_hash_val:
                                 if pres_type in ("renameold", "renamenew"):
                                         return pres_type
                                 return True
@@ -446,15 +503,40 @@
 
         # If we're not upgrading, or the file contents have changed,
         # retrieve the file and write it to a temporary location.
-        # For ELF files, only write the new file if the elfhash changed.
+        # For files with content-hash attributes, only write the new file if the
+        # content-hash changed.
         def needsdata(self, orig, pkgplan):
                 if self.replace_required:
                         return True
+                # check for the presence of a simple elfhash attribute,
+                # and if that's present, look for the common preferred elfhash.
+                # For now, this is sufficient, but when additional content
+                # types are supported (and we stop publishing SHA-1 hashes) more
+                # work will be needed to compute 'bothelf'.
                 bothelf = orig and "elfhash" in orig.attrs and \
                     "elfhash" in self.attrs
-                if not orig or \
-                    (orig.hash != self.hash and (not bothelf or
-                        orig.attrs["elfhash"] != self.attrs["elfhash"])):
+                if bothelf:
+                        common_elf_attr, common_elfhash, common_orig_elfhash, \
+                            common_elf_func = \
+                            digest.get_common_preferred_hash(self, orig,
+                            hash_type=digest.CONTENT_HASH)
+
+                common_hash_attr, common_hash_val, \
+                    common_orig_hash_val, common_hash_func = \
+                    digest.get_common_preferred_hash(self, orig)
+
+                if not orig:
+                        changed_hash = True
+                elif orig and (common_orig_hash_val is None or
+                    common_hash_val is None):
+                        # we have no common hash so we have to treat this as a
+                        # changed action
+                        changed_hash = True
+                else:
+                        changed_hash = common_hash_val != common_orig_hash_val
+
+                if (changed_hash and (not bothelf or
+                    common_orig_elfhash != common_elfhash)):
                         return True
                 elif orig:
                         # It's possible that the file content hasn't changed
@@ -507,8 +589,11 @@
                         # modified since they were installed and this is
                         # not an upgrade.
                         try:
-                                ihash, cdata = misc.get_data_digest(path)
-                                if ihash != self.hash:
+                                hash_attr, hash_val, hash_func  = \
+                                    digest.get_preferred_hash(self)
+                                ihash, cdata = misc.get_data_digest(path,
+                                    hash_func=hash_func)
+                                if ihash != hash_val:
                                         pkgplan.salvage(path)
                                         # Nothing more to do.
                                         return
@@ -524,7 +609,8 @@
         def different(self, other, cmp_hash=True):
                 # Override the generic different() method to ignore the file
                 # hash for ELF files and compare the ELF hash instead.
-                # XXX This should be modularized and controlled by policy.
+                # XXX This should be modularized and controlled by policy and
+                # needs work once additional content-type hashes are added.
 
                 # One of these isn't an ELF file, so call the generic method
                 if "elfhash" in self.attrs and "elfhash" in other.attrs:
@@ -535,12 +621,27 @@
                 """Generates the indices needed by the search dictionary.  See
                 generic.py for a more detailed explanation."""
 
-                return [
+                index_list = [
+                    # this entry shows the hash as the 'index', and the
+                    # file path as the 'value' when showing results when the
+                    # user has searched for the SHA-1 hash. This seems unusual,
+                    # but maintains the behaviour we had for S11.
                     ("file", "content", self.hash, self.hash),
+                    # This will result in a 2nd row of output when searching for
+                    # the SHA-1 hash, but is consistent with our behaviour for
+                    # the other hash attributes.
+                    ("file", "hash", self.hash, None),
                     ("file", "basename", os.path.basename(self.attrs["path"]),
                     None),
                     ("file", "path", os.path.sep + self.attrs["path"], None)
                 ]
+                for attr in digest.DEFAULT_HASH_ATTRS:
+                        # we already have an index entry for self.hash
+                        if attr == "hash":
+                                continue
+                        hash = self.attrs[attr]
+                        index_list.append(("file", attr, hash, None))
+                return index_list
 
         def save_file(self, image, full_path):
                 """Save a file for later installation (in same process
--- a/src/modules/actions/generic.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/actions/generic.py	Fri Oct 04 10:56:25 2013 +1300
@@ -43,6 +43,7 @@
 import _common
 import pkg.actions
 import pkg.client.api_errors as apx
+import pkg.digest as digest
 import pkg.portable as portable
 import pkg.variant as variant
 
@@ -255,7 +256,9 @@
         def __str__(self):
                 """Serialize the action into manifest form.
 
-                The form is the name, followed by the hash, if it exists,
+                The form is the name, followed by the SHA1 hash, if it exists,
+                (this use of a positional SHA1 hash is deprecated, with
+                pkg.*hash.* attributes being preferred over positional hashes)
                 followed by attributes in the form 'key=value'.  All fields are
                 space-separated; fields with spaces in the values are quoted.
 
@@ -427,9 +430,14 @@
                 if cmp_hash:
                         shash = ohash = None
                         try:
-                                shash = self.hash
-                                ohash = other.hash
-                                if shash != other.hash:
+                                attr, shash, ohash, hfunc = \
+                                    digest.get_common_preferred_hash(
+                                    self, other)
+                                if shash != ohash:
+                                        return True
+                                # If there's no common preferred hash, we have
+                                # to treat these actions as different
+                                if shash is None and ohash is None:
                                         return True
                         except AttributeError:
                                 if shash or ohash:
@@ -477,6 +485,8 @@
                 desired user output is.
                 """
 
+                # Indexing based on the SHA-1 hash is enough for the generic
+                # case.
                 if hasattr(self, "hash"):
                         return [
                             (self.name, "content", self.hash, self.hash),
--- a/src/modules/actions/license.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/actions/license.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 """module describing a license packaging object
@@ -36,6 +36,7 @@
 from stat import S_IWRITE, S_IREAD
 
 import generic
+import pkg.digest as digest
 import pkg.misc as misc
 import pkg.portable as portable
 import urllib
@@ -95,7 +96,10 @@
 
                 lfile = file(path, "wb")
                 try:
-                        shasum = misc.gunzip_from_stream(stream, lfile)
+                        hash_attr, hash_val, hash_func = \
+                            digest.get_preferred_hash(self)
+                        shasum = misc.gunzip_from_stream(stream, lfile,
+                            hash_func=hash_func)
                 except zlib.error, e:
                         raise ActionExecutionError(self, details=_("Error "
                             "decompressing payload: %s") %
@@ -104,12 +108,12 @@
                         lfile.close()
                         stream.close()
 
-                if shasum != self.hash:
+                if shasum != hash_val:
                         raise ActionExecutionError(self, details=_("Action "
                             "data hash verification failure: expected: "
                             "%(expected)s computed: %(actual)s action: "
                             "%(action)s") % {
-                                "expected": self.hash,
+                                "expected": hash_val,
                                 "actual": shasum,
                                 "action": self
                             })
@@ -138,9 +142,12 @@
                 path = os.path.join(img.get_license_dir(pfmri),
                     "license." + urllib.quote(self.attrs["license"], ""))
 
+                hash_attr, hash_val, hash_func = \
+                    digest.get_preferred_hash(self)
                 if args["forever"] == True:
                         try:
-                                chash, cdata = misc.get_data_digest(path)
+                                chash, cdata = misc.get_data_digest(path,
+                                    hash_func=hash_func)
                         except EnvironmentError, e:
                                 if e.errno == errno.ENOENT:
                                         errors.append(_("License file %s does "
@@ -148,10 +155,10 @@
                                         return errors, warnings, info
                                 raise
 
-                        if chash != self.hash:
+                        if chash != hash_val:
                                 errors.append(_("Hash: '%(found)s' should be "
                                     "'%(expected)s'") % { "found": chash,
-                                    "expected": self.hash})
+                                    "expected": hash_val})
                 return errors, warnings, info
 
         def remove(self, pkgplan):
@@ -174,8 +181,14 @@
                 indices = [("license", idx, self.attrs[idx], None)
                            for idx in self.reverse_indices]
                 if hasattr(self, "hash"):
+                        indices.append(("license", "hash", self.hash, None))
                         indices.append(("license", "content", self.hash, None))
-
+                for attr in digest.DEFAULT_HASH_ATTRS:
+                        # we already have an index entry for self.hash
+                        if attr == "hash":
+                                continue
+                        hash = self.attrs[attr]
+                        indices.append(("license", attr, hash, None))
                 return indices
 
         def get_text(self, img, pfmri, alt_pub=None):
@@ -189,12 +202,15 @@
                 """
 
                 path = self.get_local_path(img, pfmri)
+                hash_attr, hash_attr_val, hash_func = \
+                    digest.get_least_preferred_hash(self)
                 try:
                         with open(path, "rb") as fh:
                                 length = os.stat(path).st_size
                                 chash, txt = misc.get_data_digest(fh,
-                                    length=length, return_content=True)
-                                if chash == self.hash:
+                                    length=length, return_content=True,
+                                    hash_func=hash_func)
+                                if chash == hash_attr_val:
                                         return txt
                 except EnvironmentError, e:
                         if e.errno != errno.ENOENT:
@@ -206,8 +222,8 @@
                         if not alt_pub:
                                 alt_pub = img.get_publisher(pfmri.publisher)
                         assert pfmri.publisher == alt_pub.prefix
-                        return img.transport.get_content(alt_pub, self.hash,
-                            fmri=pfmri)
+                        return img.transport.get_content(alt_pub, hash_attr_val,
+                            fmri=pfmri, hash_func=hash_func)
                 finally:
                         img.cleanup_downloads()
 
--- a/src/modules/actions/signature.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/actions/signature.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import os
@@ -31,6 +31,7 @@
 import generic
 import pkg.actions
 import pkg.client.api_errors as apx
+import pkg.digest as digest
 import pkg.misc as misc
 import M2Crypto as m2
 
@@ -90,10 +91,20 @@
                 """
 
                 self.chain_cert_openers = []
-                hshes = []
-                sizes = []
-                chshes = []
-                csizes = []
+
+                # chain_hshes and chain_chshes are dictionaries which map a
+                # given hash or compressed hash attribute to a list of the hash
+                # values for each path in chain_certs.
+                chain_hshes = {}
+                chain_chshes = {}
+                chain_csizes = []
+                chain_sizes = []
+
+                for attr in digest.DEFAULT_CHAIN_ATTRS:
+                        chain_hshes[attr] = []
+                for attr in digest.DEFAULT_CHAIN_CHASH_ATTRS:
+                        chain_chshes[attr] = []
+
                 for pth in chain_certs:
                         if not os.path.exists(pth):
                                 raise pkg.actions.ActionDataError(
@@ -104,29 +115,52 @@
                         file_opener = self.make_opener(pth)
                         self.chain_cert_openers.append(file_opener)
                         self.attrs.setdefault("chain.sizes", [])
+                        self.attrs.setdefault("chain.csizes", [])
+
                         try:
                                 fs = os.stat(pth)
-                                sizes.append(str(fs.st_size))
+                                chain_sizes.append(str(fs.st_size))
                         except EnvironmentError, e:
                                 raise pkg.actions.ActionDataError(e, path=pth)
                         # misc.get_data_digest takes care of closing the file
                         # that's opened below.
                         with file_opener() as fh:
-                                hsh, data = misc.get_data_digest(fh,
-                                    length=fs.st_size, return_content=True)
-                        hshes.append(hsh)
-                        csize, chash = misc.compute_compressed_attrs(hsh,
-                            None, data, fs.st_size, chash_dir)
-                        csizes.append(csize)
-                        chshes.append(chash.hexdigest())
-                if hshes:
+                                hshes, data = misc.get_data_digest(fh,
+                                    length=fs.st_size, return_content=True,
+                                    hash_attrs=digest.DEFAULT_CHAIN_ATTRS,
+                                    hash_algs=digest.CHAIN_ALGS)
+
+                        for attr in hshes:
+                                chain_hshes[attr].append(hshes[attr])
+
+                        # We need a filename to use for the uncompressed chain
+                        # cert, so get the preferred chain hash value from the
+                        # chain_hshes
+                        chain_val = None
+                        for attr in digest.RANKED_CHAIN_ATTRS:
+                                if not chain_val and attr in hshes:
+                                        chain_val = hshes[attr]
+
+                        csize, chashes = misc.compute_compressed_attrs(
+                            chain_val, None, data, fs.st_size, chash_dir,
+                            chash_attrs=digest.DEFAULT_CHAIN_CHASH_ATTRS,
+                            chash_algs=digest.CHAIN_CHASH_ALGS)
+
+                        chain_csizes.append(csize)
+                        for attr in chashes:
+                                chain_chshes[attr].append(
+                                    chashes[attr].hexdigest())
+                if chain_hshes:
                         # These attributes are stored as a single value with
                         # spaces in it rather than multiple values to ensure
                         # the ordering remains consistent.
-                        self.attrs["chain.sizes"] = " ".join(sizes)
-                        self.attrs["chain"] = " ".join(hshes)
-                        self.attrs["chain.chashes"] = " ".join(chshes)
-                        self.attrs["chain.csizes"] = " ".join(csizes)
+                        self.attrs["chain.sizes"] = " ".join(chain_sizes)
+                        self.attrs["chain.csizes"] = " ".join(chain_csizes)
+
+                        for attr in digest.DEFAULT_CHAIN_ATTRS:
+                                self.attrs[attr] = " ".join(chain_hshes[attr])
+                        for attr in digest.DEFAULT_CHAIN_CHASH_ATTRS:
+                                self.attrs[attr] = " ".join(chain_chshes[attr])
 
         def get_size(self):
                 res = generic.Action.get_size(self)
@@ -141,6 +175,9 @@
                 return res
 
         def get_chain_csize(self, chain):
+                # The length of 'chain' is also going to be the length
+                # of pkg.chain.<hash alg>, so there's no need to look for
+                # other hash attributes here.
                 for c, s in zip(self.attrs.get("chain", "").split(),
                     self.attrs.get("chain.csizes", "").split()):
                         if c == chain:
@@ -187,39 +224,76 @@
                         size = int(self.attrs.get("pkg.size", 0))
                         tmp_dir = tempfile.mkdtemp()
                         with self.data() as fh:
-                                tmp_a.hash, data = misc.get_data_digest(fh,
-                                    size, return_content=True)
-                        csize, chash = misc.compute_compressed_attrs(
+                                hashes, data = misc.get_data_digest(fh,
+                                    size, return_content=True,
+                                    hash_attrs=digest.DEFAULT_HASH_ATTRS,
+                                    hash_algs=digest.HASH_ALGS)
+                                tmp_a.attrs.update(hashes)
+                                # "hash" is special since it shouldn't appear in
+                                # the action attributes, it gets set as a member
+                                # instead.
+                                if "hash" in tmp_a.attrs:
+                                        tmp_a.hash = tmp_a.attrs["hash"]
+                                        del tmp_a.attrs["hash"]
+
+                        # The use of self.hash here is just to point to a
+                        # filename, the type of hash used for self.hash is
+                        # irrelevant. Note that our use of self.hash for the
+                        # basename will need to be modified when we finally move
+                        # off SHA-1 hashes.
+                        csize, chashes = misc.compute_compressed_attrs(
                             os.path.basename(self.hash), self.hash, data, size,
                             tmp_dir)
                         shutil.rmtree(tmp_dir)
                         tmp_a.attrs["pkg.csize"] = csize
-                        tmp_a.attrs["chash"] = chash.hexdigest()
+                        for attr in chashes:
+                                tmp_a.attrs[attr] = chashes[attr].hexdigest()
                 elif self.hash:
                         tmp_a.hash = self.hash
+                        for attr in digest.DEFAULT_HASH_ATTRS:
+                                if attr in self.attrs:
+                                        tmp_a.attrs[attr] = self.attrs[attr]
 
-                hashes = []
                 csizes = []
-                chashes = []
+                chain_hashes = {}
+                chain_chashes = {}
+                for attr in digest.DEFAULT_CHAIN_ATTRS:
+                        chain_hashes[attr] = []
+                for attr in digest.DEFAULT_CHAIN_CHASH_ATTRS:
+                        chain_chashes[attr] = []
+
                 sizes = self.attrs.get("chain.sizes", "").split()
                 for i, c in enumerate(self.chain_cert_openers):
                         size = int(sizes[i])
                         tmp_dir = tempfile.mkdtemp()
-                        hsh, data = misc.get_data_digest(c(), size,
-                            return_content=True)
-                        hashes.append(hsh)
-                        csize, chash = misc.compute_compressed_attrs("tmp",
-                            None, data, size, tmp_dir)
+                        hshes, data = misc.get_data_digest(c(), size,
+                            return_content=True,
+                            hash_attrs=digest.DEFAULT_CHAIN_ATTRS,
+                            hash_algs=digest.CHAIN_ALGS)
+
+                        for attr in hshes:
+                            chain_hashes[attr].append(hshes[attr])
+
+                        csize, chashes = misc.compute_compressed_attrs("tmp",
+                            None, data, size, tmp_dir,
+                            chash_attrs=digest.DEFAULT_CHAIN_CHASH_ATTRS,
+                            chash_algs=digest.CHAIN_CHASH_ALGS)
                         shutil.rmtree(tmp_dir)
                         csizes.append(csize)
-                        chashes.append(chash.hexdigest())
-                if hashes:
-                        tmp_a.attrs["chain"] = " ".join(hashes)
+                        for attr in chashes:
+                                chain_chashes[attr].append(
+                                    chashes[attr].hexdigest())
+
+                if chain_hashes:
+                        for attr in digest.DEFAULT_CHAIN_ATTRS:
+                                if chain_hashes[attr]:
+                                        tmp_a.attrs[attr] = " ".join(
+                                            chain_hashes[attr])
 
                 # Now that tmp_a looks like the post-published action, transform
                 # it into a string using the generic sig_str method.
                 return generic.Action.sig_str(tmp_a, tmp_a, version)
- 
+
         def actions_to_str(self, acts, version):
                 """Transforms a collection of actions into a string that is
                 used to sign those actions."""
@@ -235,18 +309,50 @@
                 """Retrieve the chain certificates needed to validate this
                 signature."""
 
-                for c in self.attrs.get("chain", "").split():
-                        pub.get_cert_by_hash(c, only_retrieve=True)
+                chain_attr, chain_val, hash_func = \
+                    digest.get_least_preferred_hash(self,
+                    hash_type=digest.CHAIN)
+                # We may not have any chain certs for this signature
+                if not chain_val:
+                        return
+                for c in chain_val.split():
+                        pub.get_cert_by_hash(c, only_retrieve=True,
+                            hash_func=hash_func)
 
-        def get_chain_certs(self):
+        def get_chain_certs(self, least_preferred=False):
+                """Return a list of the chain certificates needed to validate
+                this signature. When retrieving the content from the
+                repository, we use the "least preferred" hash for backwards
+                compatibility, but when verifying the content, we use the
+                "most preferred" hash."""
+
+                if least_preferred:
+                        chain_attr, chain_val, hash_func = \
+                            digest.get_least_preferred_hash(self,
+                            hash_type=digest.CHAIN)
+                else:
+                        chain_attr, chain_val, hash_func = \
+                            digest.get_preferred_hash(self,
+                            hash_type=digest.CHAIN)
+                if not chain_val:
+                        return []
+                return chain_val.split()
+
+        def get_chain_certs_chashes(self, least_preferred=False):
                 """Return a list of the chain certificates needed to validate
                 this signature."""
-                return self.attrs.get("chain", "").split()
 
-        def get_chain_certs_chashes(self):
-                """Return a list of the chain certificates needed to validate
-                this signature."""
-                return self.attrs.get("chain.chashes", "").split()
+                if least_preferred:
+                        chain_chash_attr, chain_chash_val, hash_func = \
+                            digest.get_least_preferred_hash(self,
+                            hash_type=digest.CHAIN_CHASH)
+                else:
+                        chain_chash_attr, chain_chash_val, hash_func = \
+                            digest.get_preferred_hash(self,
+                            hash_type=digest.CHAIN_CHASH)
+                if not chain_chash_val:
+                        return []
+                return chain_chash_val.split()
 
         def is_signed(self):
                 """Returns True if this action is signed using a key, instead
@@ -314,14 +420,17 @@
                             computed_hash:
                                 raise apx.UnverifiedSignature(self,
                                     _("The signature value did not match the "
-                                    "expected value. action:%s") % self)
+                                    "expected value. action: %s") % self)
                         return True
                 # Verify a signature that's not just a hash.
                 if self.sig_alg is None:
                         return None
                 # Get the certificate paired with the key which signed this
                 # action.
-                cert = pub.get_cert_by_hash(self.hash, verify_hash=True)
+                attr, hash_val, hash_func = \
+                    digest.get_least_preferred_hash(self)
+                cert = pub.get_cert_by_hash(hash_val, verify_hash=True,
+                    hash_func=hash_func)
                 # Make sure that the intermediate certificates that are needed
                 # to validate this signature are present.
                 self.retrieve_chain_certs(pub)
@@ -418,6 +527,12 @@
                     self.attrs["algorithm"], self.attrs["algorithm"]))
                 res.append((self.name, "signature", self.attrs["value"],
                     self.attrs["value"]))
+                for attr in digest.DEFAULT_HASH_ATTRS:
+                        # we already have an index entry for self.hash
+                        if attr == "hash":
+                                continue
+                        hash = self.attrs[attr]
+                        res.append((self.name, attr, hash, None))
                 return res
 
         def identical(self, other, hsh):
@@ -429,7 +544,25 @@
                         return False
                 # If the code signing certs are identical, the more checking is
                 # needed.
-                if hsh == other.hash or self.hash == other.hash:
+                # Determine if we share any hash attribute values with the other
+                # action.
+                matching_hash_attrs = set()
+                for attr in digest.DEFAULT_HASH_ATTRS:
+                        if attr == "hash":
+                                # we deal with the 'hash' member later
+                                continue
+                        if attr in self.attrs and attr in other.attrs and \
+                            self.attrs[attr] == other.attrs[attr] and \
+                            self.assrs[attr]:
+                                    matching_hash_attrs.add(attr)
+                        if hsh and hsh == other.attrs.get(attr):
+                                # Technically 'hsh' isn't a hash attr, it's
+                                # a hash attr value, but that's enough for us
+                                # to consider it as potentially identical.
+                                matching_hash_attrs.add(hsh)
+
+                if hsh == other.hash or self.hash == other.hash or \
+                    matching_hash_attrs:
                         # If the algorithms are using different algorithms or
                         # have different versions, then they're not identical.
                         if self.attrs["algorithm"]  != \
--- a/src/modules/catalog.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/catalog.py	Fri Oct 04 10:56:25 2013 +1300
@@ -19,7 +19,7 @@
 #
 # CDDL HEADER END
 #
-# Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
 
 """Interfaces and implementation for the Catalog object, as well as functions
 that operate on lists of package FMRIs."""
@@ -64,6 +64,13 @@
                 # Default to a 32K buffer.
                 self.__bufsz = 32 * 1024
 
+                # catalog signatures *must* use sha-1 only since clients
+                # compare entire dictionaries against the reported hash from
+                # the catalog in the various <CatalogPartBase>.validate()
+                # methods rather than just attributes within those dictionaries.
+                # If old clients are to interoperate with new repositories, the
+                # computed and expected dictionaries must be identical at
+                # present, so we must use sha-1.
                 if sign:
                         if not pathname:
                                 # Only needed if not writing to __fileobj.
@@ -158,7 +165,8 @@
                 # Calculating sha-1 this way is much faster than intercepting
                 # write calls because of the excessive number of write calls
                 # that json.dump() triggers (1M+ for /dev catalog files).
-                self.__sha_1_value = misc.get_data_digest(self.pathname)[0]
+                self.__sha_1_value = misc.get_data_digest(self.pathname,
+                    hash_func=hashlib.sha1)[0]
 
                 # Open the JSON file so that the signature data can be added.
                 sfile = file(self.pathname, "rb+", self.__bufsz)
--- a/src/modules/client/api_errors.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/client/api_errors.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import errno
@@ -2020,7 +2020,7 @@
                                     "found in %(pfmri)s and has a hash of "
                                     "%(hsh)s") % \
                                     {"pfmri": self.pfmri, "hsh": self.sig.hash}
-                        return _("The package involved is:%s") % self.pfmri
+                        return _("The package involved is %s") % self.pfmri
                 if self.sig:
                         return _("The relevant signature action's value "
                             "attribute is %s") % self.sig.attrs["value"]
@@ -2097,8 +2097,8 @@
                         s = _("The following problems were encountered:\n") + \
                         "\n".join([str(e) for e in self.ext_exs])
                 return _("The certificate which issued this "
-                    "certificate:%(subj)s could not be found. The issuer "
-                    "is:%(issuer)s\n") % {"subj":self.cert.get_subject(),
+                    "certificate: %(subj)s could not be found. The issuer "
+                    "is: %(issuer)s\n") % {"subj":self.cert.get_subject(),
                     "issuer":self.cert.get_issuer()} + s + \
                     CertificateException.__str__(self)
 
--- a/src/modules/client/image.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/client/image.py	Fri Oct 04 10:56:25 2013 +1300
@@ -31,6 +31,7 @@
 import copy
 import datetime
 import errno
+import hashlib
 import os
 import platform
 import shutil
@@ -595,8 +596,12 @@
 
                         try:
                                 # Destination name is based on digest of file.
+                                # In order for this image to interoperate with
+                                # older and newer clients, we must use sha-1
+                                # here.
                                 dest = os.path.join(ssl_dir,
-                                    misc.get_data_digest(src)[0])
+                                    misc.get_data_digest(src,
+                                        hash_func=hashlib.sha1)[0])
                                 if src != dest:
                                         portable.copyfile(src, dest)
 
--- a/src/modules/client/imageplan.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/client/imageplan.py	Fri Oct 04 10:56:25 2013 +1300
@@ -50,6 +50,7 @@
 import pkg.client.pkgdefs as pkgdefs
 import pkg.client.pkgplan as pkgplan
 import pkg.client.plandesc as plandesc
+import pkg.digest as digest
 import pkg.fmri
 import pkg.manifest as manifest
 import pkg.misc as misc
@@ -2584,8 +2585,10 @@
                 """Retrieve text for release note from repo"""
                 try:
                         pub = self.image.get_publisher(pfmri.publisher)
-                        return self.image.transport.get_content(pub, act.hash,
-                            fmri=pfmri)
+                        hash_attr, hash_val, hash_func = \
+                            digest.get_least_preferred_hash(act)
+                        return self.image.transport.get_content(pub, hash_val,
+                            fmri=pfmri, hash_func=hash_func)
                 finally:
                         self.image.cleanup_downloads()
 
--- a/src/modules/client/publisher.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/client/publisher.py	Fri Oct 04 10:56:25 2013 +1300
@@ -58,6 +58,7 @@
 import pkg.client.api_errors as api_errors
 import pkg.client.sigpolicy as sigpolicy
 import pkg.client.pkgdefs as pkgdefs
+import pkg.digest as digest
 import pkg.misc as misc
 import pkg.portable as portable
 import pkg.server.catalog as old_catalog
@@ -1622,7 +1623,8 @@
                 if not os.path.exists(self.__origin_root):
                         return
                 # A digest of the URI string is used here to attempt to avoid
-                # path length problems.
+                # path length problems. In order for this image to interoperate
+                # with older clients, we must use sha-1 here.
                 return os.path.join(self.__origin_root,
                     hashlib.sha1(origin.uri).hexdigest())
 
@@ -1637,6 +1639,8 @@
                 on catalog from each origin."""
 
                 # First, remove catalogs for any origins that no longer exist.
+                # We must interoperate with older clients, so force the use of
+                # sha-1 here.
                 ohashes = [
                     hashlib.sha1(o.uri).hexdigest()
                     for o in self.repository.origins
@@ -2355,6 +2359,8 @@
 
         @staticmethod
         def __hash_cert(c):
+                # In order to interoperate with older images, we must use SHA-1
+                # here.
                 return hashlib.sha1(c.as_pem()).hexdigest()
 
         @staticmethod
@@ -2412,7 +2418,7 @@
                 return pkg_hash
 
         def get_cert_by_hash(self, pkg_hash, verify_hash=False,
-            only_retrieve=False):
+            only_retrieve=False, hash_func=digest.DEFAULT_HASH_FUNC):
                 """Given a pkg5 hash, retrieve the cert that's associated with
                 it.
 
@@ -2435,7 +2441,8 @@
                         with open(pth, "rb") as fh:
                                 s = fh.read()
                 else:
-                        s = self.transport.get_content(self, pkg_hash)
+                        s = self.transport.get_content(self, pkg_hash,
+                            hash_func=hash_func)
                 c = self.__string_to_cert(s, pkg_hash)
                 if not pth_exists:
                         try:
@@ -2447,7 +2454,7 @@
 
                 if verify_hash:
                         h = misc.get_data_digest(cStringIO.StringIO(s),
-                            length=len(s))[0]
+                            length=len(s), hash_func=hash_func)[0]
                         if h != pkg_hash:
                                 raise api_errors.ModifiedCertificateException(c,
                                     pth)
--- a/src/modules/client/transport/transport.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/client/transport/transport.py	Fri Oct 04 10:56:25 2013 +1300
@@ -44,6 +44,7 @@
 import pkg.client.transport.repo as trepo
 import pkg.client.transport.stats as tstats
 import pkg.client.progress as progress
+import pkg.digest as digest
 import pkg.file_layout.file_manager as fm
 import pkg.fmri
 import pkg.manifest as manifest
@@ -1094,7 +1095,8 @@
                 raise failures
 
         @LockedTransport()
-        def get_content(self, pub, fhash, fmri=None, ccancel=None):
+        def get_content(self, pub, fhash, fmri=None, ccancel=None,
+            hash_func=None):
                 """Given a fhash, return the uncompressed content content from
                 the remote object.  This is similar to get_datastream, except
                 that the transport handles retrieving and decompressing the
@@ -1102,6 +1104,8 @@
 
                 'fmri' If the fhash corresponds to a known package, the fmri
                 should be specified for optimal transport performance.
+
+                'hash_func' is the hash function that was used to compute fhash.
                 """
 
                 retry_count = global_settings.PKG_CLIENT_MAX_TIMEOUT
@@ -1141,7 +1145,8 @@
                                 resp = d.get_datastream(fhash, v, header,
                                     ccancel=ccancel, pub=pub)
                                 s = cStringIO.StringIO()
-                                hash_val = misc.gunzip_from_stream(resp, s)
+                                hash_val = misc.gunzip_from_stream(resp, s,
+                                    hash_func=hash_func)
 
                                 if hash_val != fhash:
                                         exc = tx.InvalidContentException(
@@ -2415,11 +2420,12 @@
                 check if this action is cached.  This is used for actions which
                 have more than one effective payload."""
 
-                hashval = action.hash
+                hash_attr, hash_val, hash_func = \
+                    digest.get_least_preferred_hash(action)
                 if in_hash:
-                        hashval = in_hash
+                        hash_val = in_hash
                 for cache in self.cfg.get_caches(pub=pub, readonly=True):
-                        cache_path = cache.lookup(hashval)
+                        cache_path = cache.lookup(hash_val)
                         if not cache_path:
                                 continue
                         try:
@@ -2455,20 +2461,40 @@
                 return self._make_opener(self._action_cached(action, pub,
                     verify=False))
 
-        @staticmethod
-        def _verify_content(action, filepath):
+        def _verify_content(self, action, filepath):
                 """If action contains an attribute that has the compressed
                 hash, read the file specified in filepath and verify
                 that the hash values match.  If the values do not match,
                 remove the file and raise an InvalidContentException."""
 
-                chash = action.attrs.get("chash", None)
+                chash_attr, chash, chash_func = digest.get_preferred_hash(
+                    action, hash_type=digest.CHASH)
                 if action.name == "signature":
+                        #
+                        # If we're checking a signature action and the filepath
+                        # parameter points to one of the chain certificates, we
+                        # need to verify against the most-preferred
+                        # [pkg.]chain.chash[.<alg>] attribute that corresponds
+                        # to the filepath we're looking at. We determine the
+                        # index of the least-preferred chain hash that matches
+                        # our filename, and use the most-preferred chash to
+                        # verify against.
+                        #
+                        # i.e. if we have attributes:
+                        # chain="a.a b.b c.c"
+                        # chain.chash="aa bb cc" \
+                        #   pkg.chain.chash.sha256="AA BB CC"
+                        #
+                        # and we're looking at file "b.b" then we must compare
+                        # our computed value against the "BB" chash.
+                        #
                         name = os.path.basename(filepath)
                         found = False
-                        assert len(action.get_chain_certs()) == \
+                        assert len(action.get_chain_certs(
+                            least_preferred=True)) == \
                             len(action.get_chain_certs_chashes())
-                        for n, c in zip(action.get_chain_certs(),
+                        for n, c in zip(
+                            action.get_chain_certs(least_preferred=True),
                             action.get_chain_certs_chashes()):
                                 if name == n:
                                         found = True
@@ -2482,7 +2508,11 @@
                         ofile = open(os.devnull, "wb")
 
                         try:
-                                fhash = misc.gunzip_from_stream(ifile, ofile)
+                                hash_attr, hash_val, hash_func = \
+                                    digest.get_preferred_hash(action,
+                                        hash_type=digest.HASH)
+                                fhash = misc.gunzip_from_stream(ifile, ofile,
+                                    hash_func=hash_func)
                         except zlib.error, e:
                                 s = os.stat(filepath)
                                 os.remove(filepath)
@@ -2494,19 +2524,32 @@
                         ifile.close()
                         ofile.close()
 
-                        if action.hash != fhash:
+                        if hash_val != fhash:
                                 s = os.stat(filepath)
                                 os.remove(filepath)
                                 raise tx.InvalidContentException(action.path,
                                     "hash failure:  expected: %s"
-                                    "computed: %s" % (action.hash, fhash),
+                                    "computed: %s" % (hash, fhash),
                                     size=s.st_size)
                         return
 
-                newhash = misc.get_data_digest(filepath)[0]
+                newhash = misc.get_data_digest(filepath,
+                    hash_func=chash_func)[0]
                 if chash != newhash:
                         s = os.stat(filepath)
-                        os.remove(filepath)
+                        # Check whether we're using the path as a part of the
+                        # content cache, or whether we're actually looking at a
+                        # file:// repository. It's safe to remove the corrupted
+                        # file only if it is part of a cache. Otherwise,
+                        # "pkgrepo verify/fix" should be used to check
+                        # repositories.
+                        cache_fms = self.cfg.get_caches(readonly=False)
+                        remove_content = False
+                        for fm in cache_fms:
+                                if filepath.startswith(fm.root):
+                                        remove_content = True
+                        if remove_content:
+                                os.remove(filepath)
                         raise tx.InvalidContentException(path,
                             "chash failure: expected: %s computed: %s" % \
                             (chash, newhash), size=s.st_size)
@@ -3026,11 +3069,12 @@
                                     filesz, cachehit=True)
                         return
 
-                hashval = action.hash
-
-                self.add_hash(hashval, action)
+                # only retrieve the least preferred hash for this action
+                hash_attr, hash_val, hash_func = \
+                    digest.get_least_preferred_hash(action)
+                self.add_hash(hash_val, action)
                 if action.name == "signature":
-                        for c in action.get_chain_certs():
+                        for c in action.get_chain_certs(least_preferred=True):
                                 self.add_hash(c, action)
 
         def add_hash(self, hashval, item):
@@ -3109,7 +3153,7 @@
             progtrack=None, ccancel=None, alt_repo=None):
                 """Supply the destination publisher in the pub argument.
                 The transport object should be passed in xport.
-                
+
                 'final_dir' indicates the directory the retrieved files should
                 be moved to after retrieval. If it is set to None, files will
                 not be moved and remain in the cache directory specified
@@ -3129,18 +3173,19 @@
 
                 cpath = self._transport._action_cached(action,
                     self.get_publisher())
-                hashval = action.hash
+                hash_attr, hash_val, hash_func = \
+                    digest.get_least_preferred_hash(action)
 
                 if cpath and self._final_dir:
-                        self._final_copy(hashval, cpath)
+                        self._final_copy(hash_val, cpath)
                         if self._progtrack:
                                 filesz = int(misc.get_pkg_otw_size(action))
                                 self._progtrack.download_add_progress(1, filesz,
                                     cachehit=True)
                 else:
-                        self.add_hash(hashval, action)
+                        self.add_hash(hash_val, action)
                 if action.name == "signature":
-                        for c in action.get_chain_certs():
+                        for c in action.get_chain_certs(least_preferred=True):
                                 cpath = self._transport._action_cached(action,
                                     self.get_publisher(), in_hash=c)
                                 if cpath and self._final_dir:
@@ -3234,7 +3279,7 @@
                 src = file(current_path, "rb")
                 outfile = os.fdopen(fd, "wb")
                 if self._decompress:
-                        misc.gunzip_from_stream(src, outfile)
+                        misc.gunzip_from_stream(src, outfile, ignore_hash=True)
                 else:
                         while True:
                                 buf = src.read(64 * 1024)
@@ -3279,7 +3324,7 @@
                 repo = publisher.Repository(origins=repouri_list)
 
         for origin in repo.origins:
-                if origin.scheme == "https": 
+                if origin.scheme == "https":
                         origin.ssl_key = ssl_key
                         origin.ssl_cert = ssl_cert
 
@@ -3316,7 +3361,7 @@
                 if p.repository:
                         for origin in p.repository.origins:
                                 if origin.scheme == \
-                                    pkg.client.publisher.SSL_SCHEMES: 
+                                    pkg.client.publisher.SSL_SCHEMES:
                                         origin.ssl_key = ssl_key
                                         origin.ssl_cert = ssl_cert
 
--- a/src/modules/depotcontroller.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/depotcontroller.py	Fri Oct 04 10:56:25 2013 +1300
@@ -19,7 +19,7 @@
 #
 # CDDL HEADER END
 #
-# Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import httplib
@@ -386,6 +386,9 @@
                 return args
 
         def __initial_start(self):
+                """'env_arg' can be a dictionary of additional os.environ
+                entries to use when starting the depot."""
+
                 if self.__state != self.HALTED:
                         raise DepotStateException("Depot already starting or "
                             "running")
@@ -416,6 +419,7 @@
                 self.__starttime = time.time()
 
         def start(self):
+
                 try:
                         self.__initial_start()
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/modules/digest.py	Fri Oct 04 10:56:25 2013 +1300
@@ -0,0 +1,362 @@
+#!/usr/bin/python2.6
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+#
+
+import hashlib
+
+# When running the test suite, we alter our behaviour depending on certain
+# debug flags.
+from pkg.client.debugvalues import DebugValues
+
+# pkg(5) uses cryptographic hash functions for a number of tasks. We define the
+# default hash function, along with the hash name here. Note that the use of
+# hashes in package metadata is *not* governed by this value, since multiple
+# hashes are supported for payload-bearing actions in a package.
+#
+# Some uses of hashes are image-format specific, and may require image version
+# increments, in which case the required algorithm is hardcoded where it is
+# used, along with with an appropriate comment.
+#
+# Other uses are essentially volatile, and the hash used has no persistence
+# (e.g. saving the hash to a file in a temporary directory, when the hash gets
+# regenerated on service restart). For those volatile uses, DEFAULT_HASH_FUNC is
+# recommended.
+DEFAULT_HASH_FUNC = hashlib.sha1
+DEFAULT_HASH_NAME = "sha-1"
+
+# DEFAULT_XXX_ATTRS are the attributes added to actions by the packaging system
+# at publication time.
+#
+# Notably, the hashes we add to an action at publication *do not* need to
+# correspond to the hashes we may use to verify action payload during install or
+# update, allowing an upgrade path where we could choose to drop publication
+# support for a certain hash algorithm, but still retain the ability to install
+# actions using that hash.
+#
+# The order of these lists of attributes is significant only to the
+# extent that the repository code will store the file in the repository using
+# the first hash value in the list when using the *old* publication model (ie.
+# a transaction, with multiple add_file(..) methods to add content)
+#
+# Otherwise, when publishing, we always store files in the repository
+# using the "least preferred" hash for maximum backwards compatibility with
+# older packaging tools that expect to be able to find those hashes in the
+# repository, but do add additional hashes to the action metadata.
+#
+# When using the transport to download content from a repository, we use the
+# least preferred_hash for file retrieval, but verify the installed content
+# using the "most preferred" hash. See get_preferred_hash(..),
+# get_least_preferred_hash(..) and get_common_preferred_hash(..)
+#
+if DebugValues["hash"] == "sha1+sha256":
+        # Simulate pkg(5) where SHA-1 and SHA-256 are used for publication
+        DEFAULT_HASH_ATTRS = ["hash", "pkg.hash.sha256"]
+        DEFAULT_CHASH_ATTRS = ["chash", "pkg.chash.sha256"]
+        DEFAULT_CONTENT_HASH_ATTRS = ["elfhash", "pkg.content-hash.sha256"]
+        DEFAULT_CHAIN_ATTRS = ["chain", "pkg.chain.sha256"]
+        DEFAULT_CHAIN_CHASH_ATTRS = ["chain.chashes",
+            "pkg.chain.chashes.sha256"]
+
+elif DebugValues["hash"] == "sha256":
+        # Simulate pkg(5) where SHA-1 is no longer used for publication
+        DEFAULT_HASH_ATTRS = ["pkg.hash.sha256"]
+        DEFAULT_CHASH_ATTRS = ["pkg.chash.sha256"]
+        DEFAULT_CONTENT_HASH_ATTRS = ["pkg.content-hash.sha256"]
+        DEFAULT_CHAIN_ATTRS = ["pkg.chain.sha256"]
+        DEFAULT_CHAIN_CHASH_ATTRS = ["pkg.chain.chashes.sha256"]
+
+else:
+        # The current default is to add just a single hash value for each hash
+        # type
+        DEFAULT_HASH_ATTRS = ["hash"]
+        DEFAULT_CHASH_ATTRS = ["chash"]
+        # 'elfhash' was the only content-hash attribute originally supported
+        DEFAULT_CONTENT_HASH_ATTRS = ["elfhash"]
+        DEFAULT_CHAIN_ATTRS = ["chain"]
+        DEFAULT_CHAIN_CHASH_ATTRS = ["chain.chashes"]
+
+# The types of hashes we compute or consult for actions.
+HASH = 0
+CHASH = 1
+CONTENT_HASH = 2
+CHAIN = 3
+CHAIN_CHASH = 4
+
+# In the dictionaries below, we map the action attributes to the name of the
+# class or factory-method that returns an object used to compute that attribute.
+# The class or factory-method takes a 0-parameter constructor to return an
+# object which must have an 'update(data)'  method , used to update the hash
+# value being computed with this data, along with a 'hexdigest()' method to
+# return the hexadecimal value of the hash.
+#
+# At present, these are all hashlib factory methods. When maintaining these
+# dictionaries, it is important to *never remove* entries from them, otherwise
+# clients with installed packages will not be able to verify their content when
+# pkg(5) is updated.
+
+# A dictionary of the pkg(5) hash attributes we know about.
+if DebugValues["hash"] == "sha1":
+        # Simulate older non-SHA2 aware pkg(5) code
+        HASH_ALGS = {"hash": hashlib.sha1}
+else:
+        HASH_ALGS = {
+            "hash":            hashlib.sha1,
+            "pkg.hash.sha256": hashlib.sha256,
+        }
+
+# A dictionary of the compressed hash attributes we know about.
+CHASH_ALGS = {}
+for key in HASH_ALGS:
+        CHASH_ALGS[key.replace("hash", "chash")] = HASH_ALGS[key]
+
+# A dictionary of the content-hash attributes we know about.
+# For now, ELF files are the only ones which have a specific content-hash
+# attribute.
+CONTENT_HASH_ALGS = {}
+for key in HASH_ALGS:
+        if key == "hash":
+                CONTENT_HASH_ALGS["elfhash"] = HASH_ALGS[key]
+        else:
+                CONTENT_HASH_ALGS[key.replace("hash", "content-hash")] = \
+                    HASH_ALGS[key]
+
+# A dictionary of signature action chain hash attributes we know about.
+CHAIN_ALGS = {}
+for key in HASH_ALGS:
+        CHAIN_ALGS[key.replace("hash", "chain")] = HASH_ALGS[key]
+
+# A dictionary of signature action chain chash attributes we know about.
+CHAIN_CHASH_ALGS = {}
+for key in HASH_ALGS:
+        CHAIN_CHASH_ALGS[key.replace("hash", "chain.chashes")] = HASH_ALGS[key]
+
+
+# Ordered lists of "most preferred" hash algorithm to "least preferred"
+# algorithm for each hash attribute we use. It's important to *never remove*
+# items from this list, otherwise we would strand clients installed with
+# packages using hashes that correspond to that item. Instead promote/demote the
+# hash algorithm so that better hashes are used for new packages.
+# 'hash' is a dummy attribute name, since it really references the action.hash
+# member.
+#
+if DebugValues["hash"] == "sha1":
+        RANKED_HASH_ATTRS = ("hash")
+elif DebugValues["hash"] == "sha2":
+        RANKED_HASH_ATTRS = ("pkg.hash.sha256")
+else:
+        RANKED_HASH_ATTRS = (
+            "pkg.hash.sha256",
+            "hash",
+        )
+
+RANKED_CHASH_ATTRS = tuple(key.replace("hash", "chash")
+    for key in RANKED_HASH_ATTRS)
+_content_hash_attrs = []
+for key in RANKED_HASH_ATTRS:
+        if key == "hash":
+                _content_hash_attrs.append("elfhash")
+        else:
+                _content_hash_attrs.append(key.replace("hash", "content-hash"))
+
+RANKED_CONTENT_HASH_ATTRS = tuple(_content_hash_attrs)
+RANKED_CHAIN_ATTRS = tuple(key.replace("hash", "chain") for key in
+    RANKED_HASH_ATTRS)
+RANKED_CHAIN_CHASH_ATTRS = tuple(key.replace("hash", "chain.chashes") for key in
+    RANKED_HASH_ATTRS)
+
+
+# We keep reverse-order lists for all of the hash attribute we know about
+# because hash retrieval from the repository is always done using the least
+# preferred hash, allowing for backwards compatibility with existing clients.
+# Rather than compute the reverse-list every time we call
+# get_least_preferred_hash(..) we compute them here.
+REVERSE_RANKED_HASH_ATTRS = RANKED_HASH_ATTRS[::-1]
+REVERSE_RANKED_CHASH_ATTRS = RANKED_CHASH_ATTRS[::-1]
+REVERSE_RANKED_CONTENT_HASH_ATTRS = RANKED_CONTENT_HASH_ATTRS[::-1]
+REVERSE_RANKED_CHAIN_ATTRS = RANKED_CHAIN_ATTRS[::-1]
+REVERSE_RANKED_CHAIN_CHASH_ATTRS = RANKED_CHAIN_CHASH_ATTRS[::-1]
+
+
+def _get_hash_dics(hash_type, reverse=False):
+        """Based on the 'hash_type', return a tuple describing the ranking of
+        hash attributes from "most preferred" to "least preferred" and a
+        mapping of those attributes to the hash algorithms that are used to
+        compute those attributes.
+
+        If 'reverse' is true, return the rank_tuple in reverse order, from least
+        preferred hash to most preferred hash.
+        """
+
+        if hash_type == HASH:
+                if reverse:
+                        rank_tuple = REVERSE_RANKED_HASH_ATTRS
+                else:
+                        rank_tuple = RANKED_HASH_ATTRS
+                hash_dic = HASH_ALGS
+        elif hash_type == CHASH:
+                if reverse:
+                        rank_tuple = REVERSE_RANKED_CHASH_ATTRS
+                else:
+                        rank_tuple = RANKED_CHASH_ATTRS
+                hash_dic = CHASH_ALGS
+        elif hash_type == CONTENT_HASH:
+                if reverse:
+                        rank_tuple = REVERSE_RANKED_CONTENT_HASH_ATTRS
+                else:
+                        rank_tuple = RANKED_CONTENT_HASH_ATTRS
+                hash_dic = CONTENT_HASH_ALGS
+        elif hash_type == CHAIN:
+                if reverse:
+                        rank_tuple = REVERSE_RANKED_CHAIN_ATTRS
+                else:
+                        rank_tuple = RANKED_CHAIN_ATTRS
+                hash_dic = CHAIN_ALGS
+        elif hash_type == CHAIN_CHASH:
+                if reverse:
+                        rank_tuple = REVERSE_RANKED_CHAIN_CHASH_ATTRS
+                else:
+                        rank_tuple = RANKED_CHAIN_CHASH_ATTRS
+                hash_dic = CHAIN_CHASH_ALGS
+        else:
+                rank_tuple = None
+                hash_dic = None
+
+        return rank_tuple, hash_dic
+
+
+def get_preferred_hash(action, hash_type=HASH):
+        """Returns a tuple of the form (hash_attr, hash_val, hash_func)
+        where 'hash_attr' is the preferred hash attribute name, 'hash_val'
+        is the the preferred hash value, and 'hash_func' is the function
+        used to compute the preferred hash based on the available
+        pkg.*hash.* attributes declared in the action."""
+
+        rank_tuple, hash_dic = _get_hash_dics(hash_type)
+        if not (rank_tuple and hash_dic):
+                raise ValueError("Unknown hash_type %s passed to "
+                    "get_preferred_hash" % hash_type)
+
+        for hash_attr_name in rank_tuple:
+                if hash_attr_name in action.attrs:
+                        return hash_attr_name, action.attrs[hash_attr_name], \
+                            hash_dic[hash_attr_name]
+
+        # fallback to the default hash member since it's not in action.attrs
+        if hash_type == HASH:
+                return None, action.hash, hashlib.sha1
+        # an action can legitimately have no chash
+        if hash_type == CHASH:
+                return None, None, DEFAULT_HASH_FUNC
+        # an action can legitimately have no content-hash if it's not a file
+        # type we know about
+        if hash_type == CONTENT_HASH:
+                return None, None, None
+        # an action can legitimately have no chain
+        if hash_type == CHAIN:
+                return None, None, None
+        # an action can legitimately have no chain_chash
+        if hash_type == CHAIN_CHASH:
+                return None, None, None
+
+        # This should never happen.
+        raise Exception("Error determining the preferred hash for %s %s" %
+            (action, hash_type))
+
+
+def get_least_preferred_hash(action, hash_type=HASH):
+        """Returns a tuple of the least preferred hash attribute name, the hash
+        value that should result when we compute the hash, and the function used
+        to compute the hash based on the available hash and pkg.*hash.*
+        attributes declared in the action."""
+
+        # the default hash member since it's not in action.attrs
+        if hash_type == HASH:
+                if not action:
+                        return "hash", None, hashlib.sha1
+
+                # This is nearly always true, except when we're running the
+                # test suite and have intentionally disabled SHA-1 hashes.
+                if "hash" in DEFAULT_HASH_ATTRS:
+                        return None, action.hash, hashlib.sha1
+
+        rank_list, hash_dic = _get_hash_dics(hash_type, reverse=True)
+        if not (rank_list and hash_dic):
+                raise ValueError("Unknown hash_type %s passed to "
+                    "get_preferred_hash" % hash_type)
+
+        if not action:
+                return rank_list[0], None, hash_dic[rank_list[0]]
+
+        for hash_attr_name in rank_list:
+                if hash_attr_name in action.attrs:
+                        return hash_attr_name, action.attrs[hash_attr_name], \
+                            hash_dic[hash_attr_name]
+
+        # an action can legitimately have no chash
+        if hash_type == CHASH:
+                return None, None, DEFAULT_HASH_FUNC
+        # an action can legitimately have no content-hash if it's not a file
+        # type we know about
+        if hash_type == CONTENT_HASH:
+                return None, None, None
+        # an action can legitimately have no chain
+        if hash_type == CHAIN:
+                return None, None, None
+
+        # This should never happen.
+        raise Exception("Error determining the least preferred hash for %s %s" %
+            (action, hash_type))
+
+
+def get_common_preferred_hash(action, old_action, hash_type=HASH):
+        """Returns the best common hash attribute of those shared between a new
+        action and an installed (old) version of that action. We return the
+        name of the common attribute, the new and original values of that
+        attribute, and the function used to compute the hash.
+
+        If no common attribute is found, we fallback to the legacy <Action>.hash
+        member assuming it is not None for the new and orig actions, and specify
+        hashlib.sha1 as the algorithm. If no 'hash' member is set, we return
+        a tuple of None objects."""
+
+        if not old_action:
+                return None, None, None, None
+
+        rank_list, hash_dic = _get_hash_dics(hash_type)
+        if not (rank_list and hash_dic):
+                raise ValueError("Unknown hash_type %s passed to "
+                    "get_preferred_common_hash" % hash_type)
+
+        common_attrs = set(
+            action.attrs.keys()).intersection(set(old_action.attrs.keys()))
+        for hash_attr_name in rank_list:
+                if hash_attr_name in common_attrs:
+                        return hash_attr_name, action.attrs[hash_attr_name], \
+                            old_action.attrs[hash_attr_name], \
+                            hash_dic[hash_attr_name]
+
+        if action.hash and old_action.hash:
+                return None, action.hash, old_action.hash, hashlib.sha1
+        return None, None, None, None
--- a/src/modules/elf.c	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/elf.c	Fri Oct 04 10:56:25 2013 +1300
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ *  Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/stat.h>
@@ -38,6 +37,23 @@
 
 #include <Python.h>
 
+/*
+ * When getting information about ELF files, sometimes we want to decide
+ * which types of hash we want to calculate. This structure is used to
+ * return information from arg parsing Python method arguments.
+ *
+ * 'fd'      the file descriptor of an ELF file
+ * 'sha1'    an integer > 0 if we should calculate an SHA-1 hash
+ * 'sha256'  an integer > 0 if we should calculate an SHA-2 256 hash
+ *
+ */
+typedef struct
+{
+    int fd;
+    int sha1;
+    int sha256;
+} dargs_t;
+
 static int
 pythonify_ver_liblist_cb(libnode_t *n, void *info, void *info2)
 {
@@ -114,6 +130,42 @@
 	return (fd);
 }
 
+static dargs_t
+py_get_dyn_args(PyObject *args, PyObject *kwargs)
+{
+	int fd = -1;
+	char *f;
+        int get_sha1 = 1;
+        int get_sha256 = 0;
+
+        dargs_t dargs;
+        dargs.fd = -1;
+        /*
+         * By default, we always get an SHA-1 hash, and never get an SHA-2
+         * hash.
+         */
+        dargs.sha1 = 1;
+        dargs.sha256 = 0;
+
+        static char *kwlist[] = {"fd", "sha1", "sha256", NULL};
+
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|ii", kwlist, &f,
+            &get_sha1, &get_sha256)) {
+		PyErr_SetString(PyExc_ValueError, "could not parse argument");
+		return (dargs);
+	}
+
+	if ((fd = open(f, O_RDONLY)) < 0) {
+		PyErr_SetFromErrnoWithFilename(PyExc_OSError, f);
+		return (dargs);
+	}
+
+        dargs.fd = fd;
+        dargs.sha1 = get_sha1;
+        dargs.sha256 = get_sha256;
+	return (dargs);
+}
+
 /*
  * For ELF operations: Need to check if a file is an ELF object.
  */
@@ -185,7 +237,7 @@
 
 /*
  * Returns a dictionary with the relevant information.  No longer
- * accurately titled "get_dynamic," as it returns the hash as well.
+ * accurately titled "get_dynamic," as can return hashes as well.
  *
  * The hash is currently of the following sections (when present):
  * 		.text .data .data1 .rodata .rodata1
@@ -197,11 +249,16 @@
  *	defs: ["version", ... ],
  *	deps: [["file", ["versionlist"]], ...],
  * 	hash: "sha1hash"
+ *      pkg.elf.sha256: "sha2hash"
  * }
  *
  * If any item is empty or has no value, it is omitted from the
  * dictionary.
  *
+ * The keyword arguments "sha1" and "sha256" are allowed, which
+ * take Python booleans, declaring which hashes should be
+ * computed on the input file.
+ *
  * XXX: Currently, defs contains some duplicate entries.  There
  * may be meaning attached to this, or it may just be something
  * worth trimming out at this stage or above.
@@ -209,20 +266,23 @@
  */
 /*ARGSUSED*/
 static PyObject *
-get_dynamic(PyObject *self, PyObject *args)
+get_dynamic(PyObject *self, PyObject *args, PyObject *keywords)
 {
-	int 	fd, i;
+	int 	i;
+        dargs_t         dargs;
 	dyninfo_t 	*dyn = NULL;
 	PyObject	*pdep = NULL;
 	PyObject	*pdef = NULL;
 	PyObject	*pdict = NULL;
 	char		hexhash[41];
+        char            hexsha256[65];
 	char		hexchars[17] = "0123456789abcdef";
 
-	if ((fd = py_get_fd(args)) < 0)
+	dargs = py_get_dyn_args(args, keywords);
+        if (dargs.fd < 0)
 		return (NULL);
 
-	if ((dyn = getdynamic(fd)) == NULL)
+	if ((dyn = getdynamic(dargs.fd, dargs.sha1, dargs.sha256)) == NULL)
 		goto out;
 
 	pdict = PyDict_New();
@@ -259,13 +319,25 @@
 		PyDict_SetItemString(pdict, "runpath", Py_BuildValue("s", str));
 	}
 
-	for (i = 0; i < 20; i++) {
-		hexhash[2 * i] = hexchars[(dyn->hash[i] & 0xf0) >> 4];
-		hexhash[2 * i + 1] = hexchars[dyn->hash[i] & 0x0f];
-	}
-	hexhash[40] = '\0';
+        if (dargs.sha1 > 0) {
+                for (i = 0; i < 20; i++) {
+                        hexhash[2 * i] = hexchars[(dyn->hash[i] & 0xf0) >> 4];
+                        hexhash[2 * i + 1] = hexchars[dyn->hash[i] & 0x0f];
+                }
+                hexhash[40] = '\0';
+        	PyDict_SetItemString(pdict, "hash", Py_BuildValue("s", hexhash));
+        }
 
-	PyDict_SetItemString(pdict, "hash", Py_BuildValue("s", hexhash));
+        if (dargs.sha256 > 0) {
+                for (i = 0; i < 32; i++) {
+                        hexsha256[2 * i] = \
+                            hexchars[(dyn->hash256[i] & 0xf0) >> 4];
+                        hexsha256[2 * i + 1] = hexchars[dyn->hash256[i] & 0x0f];
+                }
+                hexsha256[64] = '\0';
+                PyDict_SetItemString(pdict, "pkg.content-type.sha256",
+                    Py_BuildValue("s", hexsha256));
+        }
 	goto out;
 
 err:
@@ -275,16 +347,17 @@
 
 out:
 	if (dyn != NULL)
-		dyninfo_free(dyn);
+            dyninfo_free(dyn);
 
-	(void) close(fd);
+	(void) close(dargs.fd);
 	return (pdict);
 }
 
 static PyMethodDef methods[] = {
 	{ "is_elf_object", elf_is_elf_object, METH_VARARGS },
 	{ "get_info", get_info, METH_VARARGS },
-	{ "get_dynamic", get_dynamic, METH_VARARGS },
+	{ "get_dynamic", (PyCFunction)get_dynamic,
+        METH_VARARGS | METH_KEYWORDS},
 	{ NULL, NULL }
 };
 
--- a/src/modules/elfextract.c	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/elfextract.c	Fri Oct 04 10:56:25 2013 +1300
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ *  Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <libelf.h>
@@ -39,8 +38,9 @@
 #include <netinet/in.h>
 #include <inttypes.h>
 #if defined(__SVR4) && defined(__sun)
-/* Solaris has a built-in SHA-1 library interface */
+/* Solaris has built-in SHA-1 and SHA-2 library interfaces */
 #include <sha1.h>
+#include <sha2.h>
 #else
 /*
  * All others can use OpenSSL, but OpenSSL's method signatures
@@ -293,7 +293,8 @@
  * Reads a section in 64k increments, adding it to the hash.
  */
 static int
-readhash(int fd, SHA1_CTX *shc, off_t offset, off_t size)
+readhash(int fd, SHA1_CTX *shc, SHA256_CTX *shc2, off_t offset, off_t size,
+    int sha1, int sha256)
 {
 	off_t n;
 	char hashbuf[64 * 1024];
@@ -313,7 +314,12 @@
 			PyErr_SetFromErrno(PyExc_IOError);
 			return (-1);
 		}
-		SHA1Update(shc, hashbuf, rbytes);
+                if (sha1 > 0) {
+		        SHA1Update(shc, hashbuf, rbytes);
+                }
+                if (sha256 > 0) {
+                        SHA256Update(shc2, hashbuf, rbytes);
+                }
 		size -= rbytes;
 	} while (size != 0);
 
@@ -325,9 +331,11 @@
  * information we want from an ELF file.  Returns NULL
  * if it can't find everything (eg. not ELF file, wrong
  * class of ELF file).
+ * If sha1 is > 0, we produce an SHA1 hash as part of the returned dictionary.
+ * If sha256 is > 0, we include an SHA2 256 hash in the returned dictionary.
  */
 dyninfo_t *
-getdynamic(int fd)
+getdynamic(int fd, int sha1, int sha256)
 {
 	Elf		*elf = NULL;
 	Elf_Scn		*scn = NULL;
@@ -342,6 +350,7 @@
 	int		t = 0, num_dyn = 0, dynstr = -1;
 
 	SHA1_CTX	shc;
+        SHA256_CTX      shc2;
 	dyninfo_t	*dyn = NULL;
 
 	liblist_t	*deps = NULL;
@@ -374,7 +383,12 @@
 	}
 
 	/* get useful sections */
-	SHA1Init(&shc);
+        if (sha1 > 0) {
+                SHA1Init(&shc);
+        }
+        if (sha256 > 0) {
+                SHA256Init(&shc2);
+        }
 	while ((scn = elf_nextscn(elf, scn))) {
 		if (gelf_getshdr(scn, &shdr) != &shdr) {
 			PyErr_SetString(ElfError, elf_errmsg(-1));
@@ -386,7 +400,7 @@
 			goto bad;
 		}
 
-		if (hashsection(name)) {
+		if (hashsection(name) && (sha1 > 0 || sha256 > 0)) {
 			if (shdr.sh_type == SHT_NOBITS) {
 				/*
 				 * We can't just push shdr.sh_size into
@@ -398,12 +412,18 @@
 				uint64_t mask = 0xffffffff00000000ULL;
 				uint32_t top = htonl((uint32_t)((n & mask) >> 32));
 				uint32_t bot = htonl((uint32_t)n);
-				SHA1Update(&shc, &top, sizeof (top));
-				SHA1Update(&shc, &bot, sizeof (bot));
+                                if (sha1 > 0) {
+				        SHA1Update(&shc, &top, sizeof (top));
+                                        SHA1Update(&shc, &bot, sizeof (bot));
+                                }
+                                if (sha256 > 0) {
+                                        SHA256Update(&shc2, &top, sizeof (top));
+                                        SHA256Update(&shc2, &bot, sizeof (bot));
+                                }
 			} else {
 				int hash;
-				hash = readhash(fd, &shc, shdr.sh_offset,
-				    shdr.sh_size);
+                                hash = readhash(fd, &shc, &shc2, shdr.sh_offset,
+				    shdr.sh_size, sha1, sha256);
 
 				if (hash == -1)
 					goto bad;
@@ -584,8 +604,12 @@
 	dyn->deps = deps;
 	dyn->def = def;
 	dyn->vers = verdef;
-	SHA1Final(dyn->hash, &shc);
-
+        if (sha1 > 0) {
+	        SHA1Final(dyn->hash, &shc);
+        }
+        if (sha256 > 0) {
+                SHA256Final(dyn->hash256, &shc2);
+        }
 	return (dyn);
 
 bad:
--- a/src/modules/elfextract.h	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/elfextract.h	Fri Oct 04 10:56:25 2013 +1300
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ *  Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _ELFEXTRACT_H
@@ -48,6 +47,8 @@
 	liblist_t 	*vers;		/* version provided list (also	   */
 					/* 	contains offsets)	   */
 	unsigned char	hash[20];	/* SHA1 Hash of significant segs.  */
+     	unsigned char	hash256[32];	/* SHA2 Hash of significant segs.  */
+
 	Elf		*elf;		/* elf data -- must be freed	   */
 } dyninfo_t;
 
@@ -61,7 +62,7 @@
 
 extern int iself(int fd);
 extern int iself32(int fd);
-extern dyninfo_t *getdynamic(int fd);
+extern dyninfo_t *getdynamic(int fd, int sha1, int sha256);
 extern void dyninfo_free(dyninfo_t *dyn);
 extern hdrinfo_t *getheaderinfo(int fd);
 
--- a/src/modules/flavor/elf.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/flavor/elf.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import os
@@ -176,7 +176,7 @@
 
         try:
                 ei = elf.get_info(proto_file)
-                ed = elf.get_dynamic(proto_file)
+                ed = elf.get_dynamic(proto_file, sha1=False, sha256=False)
         except elf.ElfError, e:
                 raise BadElfFile(proto_file, e)
         deps = [
--- a/src/modules/lint/pkglint_manifest.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/lint/pkglint_manifest.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 # Some pkg(5) specific lint manifest checks
@@ -304,6 +304,7 @@
                                 continue
 
                         if action.name == "file" and \
+                            action.attrs.get("pkg.filetype") == "elf" or \
                             "elfarch" in action.attrs:
                                 has_arch_file = True
 
--- a/src/modules/manifest.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/manifest.py	Fri Oct 04 10:56:25 2013 +1300
@@ -977,7 +977,8 @@
                         if signatures:
                                 # Generate manifest signature based upon
                                 # input content, but only if signatures
-                                # were requested.
+                                # were requested. In order to interoperate with
+                                # older clients, we must use sha-1 here.
                                 self.signatures = {
                                     "sha-1": self.hash_create(content)
                                 }
@@ -1205,6 +1206,8 @@
                 """This method takes a string representing the on-disk
                 manifest content, and returns a hash value."""
 
+                # This must be an SHA-1 hash in order to interoperate with
+                # older clients.
                 sha_1 = hashlib.sha1()
                 if isinstance(mfstcontent, unicode):
                         # Byte stream expected, so pass encoded.
--- a/src/modules/misc.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/misc.py	Fri Oct 04 10:56:25 2013 +1300
@@ -34,7 +34,6 @@
 import errno
 import fnmatch
 import getopt
-import hashlib
 import itertools
 import locale
 import os
@@ -62,6 +61,7 @@
 
 import pkg.client.api_errors as api_errors
 import pkg.portable as portable
+import pkg.digest as digest
 
 from pkg import VERSION
 from pkg.client import global_settings
@@ -330,13 +330,22 @@
 
         return False
 
-def gunzip_from_stream(gz, outfile):
+def gunzip_from_stream(gz, outfile, hash_func=None, hash_funcs=None,
+    ignore_hash=False):
         """Decompress a gzipped input stream into an output stream.
 
         The argument 'gz' is an input stream of a gzipped file and 'outfile'
         is is an output stream.  gunzip_from_stream() decompresses data from
-        'gz' and writes it to 'outfile', and returns the hexadecimal SHA-1 sum
-        of that data.
+        'gz' and writes it to 'outfile', and returns the hexadecimal SHA sum
+        of that data using the hash_func supplied.
+
+        'hash_funcs', if supplied, is a list of hash functions which we should
+        use to compute the hash. If 'hash_funcs' is supplied, a list of
+        hexadecimal digests computed using those functions is returned. The
+        returned list is in the same order as 'hash_funcs'.
+
+        If 'ignore_hash' is False, we do not compute a hash when decompressing
+        the content and do not return any value.
         """
 
         FHCRC = 2
@@ -344,6 +353,9 @@
         FNAME = 8
         FCOMMENT = 16
 
+        if not (hash_func or hash_funcs) and not ignore_hash:
+                raise ValueError("no hash functions for gunzip_from_stream")
+
         # Read the header
         magic = gz.read(2)
         if magic != "\037\213":
@@ -378,20 +390,46 @@
         if flag & FHCRC:
                 gz.read(2)
 
-        shasum = hashlib.sha1()
+        if ignore_hash:
+                pass
+        elif hash_funcs:
+                shasums = []
+                for f in hash_funcs:
+                        shasums.append(digest.HASH_ALGS[f]())
+        else:
+                shasum = hash_func()
         dcobj = zlib.decompressobj(-zlib.MAX_WBITS)
 
         while True:
                 buf = gz.read(64 * 1024)
                 if buf == "":
                         ubuf = dcobj.flush()
-                        shasum.update(ubuf) # pylint: disable=E1101
+                        if ignore_hash:
+                                pass
+                        elif hash_funcs:
+                                for sha in shasums:
+                                        sha.update(ubuf)
+                        else:
+                                shasum.update(ubuf) # pylint: disable=E1101
                         outfile.write(ubuf)
                         break
                 ubuf = dcobj.decompress(buf)
-                shasum.update(ubuf) # pylint: disable=E1101
+                if ignore_hash:
+                        pass
+                elif hash_funcs:
+                        for sha in shasums:
+                                sha.update(ubuf)
+                else:
+                        shasum.update(ubuf) # pylint: disable=E1101
                 outfile.write(ubuf)
 
+        if ignore_hash:
+                return
+        elif hash_funcs:
+                hexdigests = []
+                for sha in shasums:
+                        hexdigests.append(sha.hexdigest())
+                return hexdigests
         return shasum.hexdigest()
 
 class PipeError(Exception):
@@ -504,8 +542,10 @@
 
         return int(size)
 
-def get_data_digest(data, length=None, return_content=False):
-        """Returns a tuple of (SHA-1 hexdigest, content).
+def get_data_digest(data, length=None, return_content=False,
+    hash_attrs=None, hash_algs=None, hash_func=None):
+        """Returns a tuple of ({hash attribute name: hash value}, content)
+        or a tuple of (hash value, content) if hash_attrs has only one element.
 
         'data' should be a file-like object or a pathname to a file.
 
@@ -514,7 +554,20 @@
 
         'return_content' is a boolean value indicating whether the
         second tuple value should contain the content of 'data' or
-        if the content should be discarded during processing."""
+        if the content should be discarded during processing.
+
+        'hash_attrs' is a list of keys describing the hashes we want to compute
+        for this data. The keys must be present in 'hash_algs', a dictionary
+        mapping keys to the factory methods that are used to create objects
+        to compute them. The factory method must take no parameters, and must
+        return an object that has 'update()' and 'hexdigest()' methods. In the
+        current implementation, these are all hashlib factory methods.
+
+        'hash_func' is provided as a convenience to simply hash the data with
+        a single hash algorithm. The value of 'hash_func' should be the factory
+        method used to compute that hash value, as described in the previous
+        paragraph.
+        """
 
         bufsz = 128 * 1024
         closefobj = False
@@ -527,15 +580,31 @@
         if length is None:
                 length = os.stat(data).st_size
 
-        # Read the data in chunks and compute the SHA1 hash as it comes in.  A
-        # large read on some platforms (e.g. Windows XP) may fail.
+        # Setup our results dictionary so that each attribute maps to a
+        # new hashlib object.
+        if hash_func:
+                hsh = hash_func()
+        else:
+                if hash_algs is None or hash_attrs is None:
+                        assert False, "get_data_digest without hash_attrs/algs"
+                hash_results = {}
+                for attr in hash_attrs:
+                        hash_results[attr] = hash_algs[attr]()
+
+        # Read the data in chunks and compute the SHA hashes as the data comes
+        # in.  A large read on some platforms (e.g. Windows XP) may fail.
         content = cStringIO.StringIO()
-        fhash = hashlib.sha1()
         while length > 0:
                 data = f.read(min(bufsz, length))
                 if return_content:
                         content.write(data)
-                fhash.update(data) # pylint: disable=E1101
+                if hash_func:
+                        hsh.update(data)
+                else:
+                        # update each hash with this data
+                        for attr in hash_attrs:
+                                hash_results[attr].update(
+                                    data) # pylint: disable=E1101
 
                 l = len(data)
                 if l == 0:
@@ -545,13 +614,30 @@
         if closefobj:
                 f.close()
 
-        return fhash.hexdigest(), content.read()
+        if hash_func:
+                return hsh.hexdigest(), content.read()
+
+        # The returned dictionary can now be populated with the hexdigests
+        # instead of the hashlib objects themselves.
+        for attr in hash_results:
+                hash_results[attr] = hash_results[attr].hexdigest()
+        return hash_results, content.read()
 
 def compute_compressed_attrs(fname, file_path, data, size, compress_dir,
-    bufsz=64*1024):
-        """Returns the size and hash of the compressed data.  If the file
-        located at file_path doesn't exist or isn't gzipped, it creates a file
-        in compress_dir named fname."""
+    bufsz=64*1024, chash_attrs=None, chash_algs=None):
+        """Returns the size and one or more hashes of the compressed data.  If
+        the file located at file_path doesn't exist or isn't gzipped, it creates
+        a file in compress_dir named fname.
+
+        'chash_attrs' is a list of the chash attributes we should compute, with
+        'chash_algs' being a dictionary that maps the attribute names to the
+        algorithms used to compute them.
+        """
+
+        if chash_attrs is None:
+                chash_attrs = digest.DEFAULT_CHASH_ATTRS
+        if chash_algs is None:
+                chash_algs = digest.CHASH_ALGS
 
         #
         # This check prevents compressing a file which is already compressed.
@@ -597,14 +683,18 @@
         # to generate deterministic hashes for different files with identical
         # content.
         cfile = open(opath, "rb")
-        chash = hashlib.sha1()
+        chashes = {}
+        for chash_attr in chash_attrs:
+                chashes[chash_attr] = chash_algs[chash_attr]()
         while True:
                 cdata = cfile.read(bufsz)
                 if cdata == "":
                         break
-                chash.update(cdata) # pylint: disable=E1101
+                for chash_attr in chashes:
+                        chashes[chash_attr].update(
+                            cdata) # pylint: disable=E1101
         cfile.close()
-        return csize, chash
+        return csize, chashes
 
 class ProcFS(object):
         """This class is used as an interface to procfs."""
--- a/src/modules/p5p.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/p5p.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import atexit
@@ -34,6 +34,7 @@
 import pkg
 import pkg.client.api_errors as apx
 import pkg.client.publisher
+import pkg.digest as digest
 import pkg.fmri
 import pkg.manifest
 import pkg.misc
@@ -545,7 +546,7 @@
 
                 'pathname' is an optional string specifying the absolute path
                 of a file to add to the archive.  The file may be a regular
-                file, directory, symbolic link, or hard link. 
+                file, directory, symbolic link, or hard link.
 
                 'arcname' is an optional string specifying an alternative name
                 for the file in the archive.  If not given, the full pathname
@@ -667,16 +668,26 @@
                 # payload.  (That payload can consist of multiple files.)
                 file_dir = os.path.join(pub_dir, "file")
                 for a in m.gen_actions():
-                        if not a.has_payload or not a.hash:
+                        if not a.has_payload:
                                 # Nothing to archive.
                                 continue
 
-                        payloads = set([a.hash])
+                        pref_hattr, hval, hfunc = \
+                            digest.get_least_preferred_hash(a)
+                        if not hval:
+                                # Nothing to archive
+                                continue
+
+                        payloads = set([hval])
 
                         # Signature actions require special handling.
                         if a.name == "signature":
-                                payloads.update(a.attrs.get("chain",
-                                    "").split())
+                                pref_cattr, chain_val, func = \
+                                    digest.get_least_preferred_hash(a,
+                                    hash_type=digest.CHAIN)
+
+                                for chain in chain_val.split():
+                                        payloads.add(chain)
 
                                 if repo:
                                         # This bit of logic only possible if
@@ -1221,7 +1232,7 @@
                         # A new publisher object is created with a copy of only
                         # the information that's needed for the archive.
                         npub = pkg.client.publisher.Publisher(pub.prefix,
-                            alias=pub.alias, 
+                            alias=pub.alias,
                             revoked_ca_certs=pub.revoked_ca_certs,
                             approved_ca_certs=pub.approved_ca_certs)
 
--- a/src/modules/p5s.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/p5s.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,14 +21,14 @@
 #
 
 #
-# Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import copy
-import hashlib
 import os
 import pkg.client.api_errors as api_errors
 import pkg.client.publisher as publisher
+import pkg.digest as digest
 import pkg.fmri as fmri
 import simplejson as json
 import urllib
@@ -74,7 +74,7 @@
                                     system=True)]
                         res.append(r)
                 return res
-        
+
         try:
                 dump_struct = json.loads(data)
         except ValueError, e:
@@ -102,7 +102,7 @@
                         alias = p.get("alias", None)
                         prefix = p.get("name", None)
                         sticky = p.get("sticky", True)
-                        
+
                         if not prefix:
                                 prefix = "Unknown"
 
@@ -190,7 +190,8 @@
                                 # to communicate with the system repository.
                                 res.append("http://%s/%s/%s" %
                                     (publisher.SYSREPO_PROXY, prefix,
-                                    hashlib.sha1(m.uri.rstrip("/")).hexdigest()
+                                    digest.DEFAULT_HASH_FUNC(
+                                    m.uri.rstrip("/")).hexdigest()
                                     ))
                         else:
                                 assert False, "%s is an unknown scheme." % \
--- a/src/modules/search_storage.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/search_storage.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,8 +21,7 @@
 #
 
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import os
@@ -614,21 +613,25 @@
 class IndexStoreSetHash(IndexStoreBase):
         def __init__(self, file_name):
                 IndexStoreBase.__init__(self, file_name)
+                # In order to interoperate with older clients, we must use sha-1
+                # here.
                 self.hash_val = hashlib.sha1().hexdigest()
 
         def set_hash(self, vals):
                 """Set the has value."""
-                self.hash_val = self.calc_hash(vals) 
+                self.hash_val = self.calc_hash(vals)
 
         def calc_hash(self, vals):
                 """Calculate the hash value of the sorted members of vals."""
                 vl = list(vals)
                 vl.sort()
+                # In order to interoperate with older clients, we must use sha-1
+                # here.
                 shasum = hashlib.sha1()
                 for v in vl:
                         shasum.update(v)
                 return shasum.hexdigest()
-                
+
         def write_dict_file(self, path, version_num):
                 """Write self.hash_val out to a line in a file """
                 IndexStoreBase._protected_write_dict_file(self, path,
--- a/src/modules/server/api.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/server/api.py	Fri Oct 04 10:56:25 2013 +1300
@@ -575,7 +575,7 @@
                         s = StringIO.StringIO()
                         lpath = self._depot.repo.file(lic.hash, pub=self._pub)
                         lfile = file(lpath, "rb")
-                        misc.gunzip_from_stream(lfile, s)
+                        misc.gunzip_from_stream(lfile, s, ignore_hash=True)
                         text = s.getvalue()
                         s.close()
                         license_lst.append(LicenseInfo(mfst.fmri, lic,
--- a/src/modules/server/depot.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/server/depot.py	Fri Oct 04 10:56:25 2013 +1300
@@ -857,7 +857,7 @@
         file_0._cp_config = { "response.stream": True }
 
         def file_1(self, *tokens):
-                """Outputs the contents of the file, named by the SHA-1 hash
+                """Outputs the contents of the file, named by the SHA hash
                 name in the request path, directly to the client."""
 
                 method = cherrypy.request.method
@@ -1320,7 +1320,8 @@
                                 continue
 
                         with file(lpath, "rb") as lfile:
-                                misc.gunzip_from_stream(lfile, lsummary)
+                                misc.gunzip_from_stream(lfile, lsummary,
+                                    ignore_hash=True)
                 lsummary.seek(0)
 
                 self.__set_response_expires("info", 86400*365, 86400*365)
@@ -2371,7 +2372,8 @@
                     cfg.PropList("address"),
                     cfg.PropDefined("cfg_file", allowed=["", "<pathname>"]),
                     cfg.Property("content_root"),
-                    cfg.PropList("debug", allowed=["", "headers"]),
+                    cfg.PropList("debug", allowed=["", "headers",
+                        "hash=sha256", "hash=sha1+sha256"]),
                     cfg.PropList("disable_ops"),
                     cfg.PropDefined("image_root", allowed=["",
                         "<abspathname>"]),
--- a/src/modules/server/repository.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/server/repository.py	Fri Oct 04 10:56:25 2013 +1300
@@ -19,13 +19,12 @@
 #
 # CDDL HEADER END
 #
-# Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 
 import cStringIO
 import codecs
 import datetime
 import errno
-import hashlib
 import logging
 import os
 import os.path
@@ -44,6 +43,7 @@
 import pkg.client.progress as progress
 import pkg.client.publisher as publisher
 import pkg.config as cfg
+import pkg.digest as digest
 import pkg.file_layout.file_manager as file_manager
 import pkg.file_layout.layout as layout
 import pkg.fmri as fmri
@@ -1325,7 +1325,11 @@
 
         def file(self, fhash):
                 """Returns the absolute pathname of the file specified by the
-                provided SHA1-hash name."""
+                provided SHA-n hash name. (At present, the repository format
+                always uses the least-preferred hash to content in order to
+                remain backwards compatible with older clients. Actions may be
+                published that have additional hashes set, but those do not
+                influence where the content is stored in the repository.)"""
 
                 if not self.file_root:
                         raise RepositoryUnsupportedOperationError()
@@ -1499,23 +1503,29 @@
                         progtrack = progress.NullProgressTracker()
 
                 def get_hashes(pfmri):
-                        """Given an FMRI, return a set containing all of the
-                        hashes of the files its manifest references."""
+                        """Given an FMRI, return a set of tuples containing all
+                        of the hashes of the files its manifest references.
+                        Each tuple is of the form (hash value, hash function)"""
 
                         m = self._get_manifest(pfmri)
                         hashes = set()
                         for a in m.gen_actions():
-                                if not a.has_payload or not a.hash:
+                                if not a.has_payload:
                                         # Nothing to archive.
                                         continue
 
                                 # Action payload.
-                                hashes.add(a.hash)
+                                hattr, hval, hfunc = \
+                                    digest.get_least_preferred_hash(a)
+                                hashes.add(hval)
 
                                 # Signature actions have additional payloads.
                                 if a.name == "signature":
-                                        hashes.update(a.attrs.get("chain",
-                                            "").split())
+                                        chain_attr, chain_val, chain_func = \
+                                            digest.get_least_preferred_hash(a,
+                                            hash_type=digest.CHAIN)
+                                        for chain in chain_val.split():
+                                                hashes.add(chain)
                         return hashes
 
                 self.__lock_rstore()
@@ -1871,8 +1881,15 @@
                 pfmri = reason.get("pkg")
                 if hsh and pfmri:
                         m = self._get_manifest(pfmri)
+                        # this is not terribly efficient, but the expectation is
+                        # that this will rarely happen.
                         for ac in m.gen_actions_by_types(
                             actions.payload_types.keys()):
+                                for hash in digest.DEFAULT_HASH_ATTRS:
+                                        if ac.attrs.get(hash) == hsh:
+                                                fpath = ac.attrs.get("path")
+                                                if fpath:
+                                                        reason["fpath"] = fpath
                                 if ac.hash == hsh:
                                         fpath = ac.attrs.get("path")
                                         if fpath:
@@ -1922,24 +1939,42 @@
                 return error, path, message, reason
 
         def __get_hashes(self, path, pfmri):
-                """Given an PkgFmri, return a set containing all of the
-                hashes of the files its manifest references."""
+                """Given a PkgFmri, return a set containing tuples of all of
+                the hashes of the files its manifest references which should
+                correspond to files in the repository. Each tuple is of the form
+                (file_name, hash_value, hash_func) where hash_func is the
+                function used to compute that hash and file_name is the name
+                of the hash used to store the file in the repository."""
 
                 hashes = set()
                 errors = []
                 try:
                         m = self._get_manifest(pfmri)
                         for a in m.gen_actions():
-                                if not a.has_payload or not a.hash:
+                                if not a.has_payload:
                                         continue
 
+                                # We store files using the least preferred hash
+                                # in the repository to remain as backwards-
+                                # compatible as possible.
+                                attr, fname, hfunc = \
+                                    digest.get_least_preferred_hash(a)
+                                attr, hval, hfunc = \
+                                    digest.get_preferred_hash(a)
                                 # Action payload.
-                                hashes.add(a.hash)
+                                hashes.add((fname, hval, hfunc))
 
                                 # Signature actions have additional payloads
                                 if a.name == "signature":
-                                        hashes.update(
-                                            a.attrs.get("chain", "").split())
+                                        attr, fname, hfunc = \
+                                            digest.get_least_preferred_hash(a,
+                                            hash_type=digest.CHAIN)
+                                        attr, hval, hfunc = \
+                                            digest.get_preferred_hash(a,
+                                            hash_type=digest.CHAIN)
+                                        hashes.update([
+                                            (fname, chain, hfunc)
+                                            for chain in hval.split()])
                 except apx.PermissionsException:
                         errors.append((REPO_VERIFY_MFPERM, path,
                             {"err": _("Permission denied.")}))
@@ -1955,31 +1990,34 @@
                         return (REPO_VERIFY_PERM, path, {"err": str(e),
                             "pkg": pfmri})
 
-        def __verify_hash(self, path, pfmri, h):
-                """Perform hash verification on the given gzip file."""
+        def __verify_hash(self, path, pfmri, h, alg=digest.DEFAULT_HASH_FUNC):
+                """Perform hash verification on the given gzip file.
+                'path' is the full path to the file in the repository. 'pfmri'
+                is the package that we're verifying. 'h' is the expected hash
+                of the path. 'alg' is the hash function used to compute the
+                hash."""
 
                 gzf = None
-                hash = os.path.basename(path)
                 try:
                         gzf = PkgGzipFile(fileobj=open(path, "rb"))
-                        fhash = hashlib.sha1()
+                        fhash = alg()
                         fhash.update(gzf.read())
                         actual = fhash.hexdigest()
                         if actual != h:
                                 return (REPO_VERIFY_BADHASH, path,
-                                    {"actual": actual, "hash": hash,
+                                    {"actual": actual, "hash": h,
                                     "pkg": pfmri})
                 except (ValueError, zlib.error), e:
                         return (REPO_VERIFY_BADGZIP, path,
-                            {"hash": hash, "pkg": pfmri})
+                            {"hash": h, "pkg": pfmri})
                 except IOError, e:
                         if e.errno in [errno.EACCES, errno.EPERM]:
                                 return (REPO_VERIFY_PERM, path,
-                                    {"err": str(e), "hash": hash,
+                                    {"err": str(e), "hash": h,
                                     "pkg": pfmri})
                         else:
                                 return (REPO_VERIFY_BADGZIP, path,
-                                    {"hash": hash, "pkg": pfmri})
+                                    {"hash": h, "pkg": pfmri})
                 finally:
                         if gzf:
                                 gzf.close()
@@ -2223,17 +2261,18 @@
 
                                 # verify payload delivered by this pkg
                                 errors = []
-                                for h in hashes:
+                                for fname, h, alg in hashes:
                                         try:
                                                 path = self.cache_store.lookup(
-                                                     h, check_existence=False)
+                                                     fname,
+                                                     check_existence=False)
                                         except apx.PermissionsException, e:
                                                 # if we can't even get the path
                                                 # within the repository, then
                                                 # we'll do the best we can to
                                                 # report the problem.
                                                 errors.append((REPO_VERIFY_PERM,
-                                                    pfmri, {"hash": h,
+                                                    pfmri, {"hash": fname,
                                                     "err": _("Permission "
                                                     "denied.", "path", h)}))
                                                 continue
@@ -2242,7 +2281,8 @@
                                         if err:
                                                 errors.append(err)
                                                 continue
-                                        err = self.__verify_hash(path, pfmri, h)
+                                        err = self.__verify_hash(path, pfmri, h,
+                                            alg=alg)
                                         if err:
                                                 errors.append(err)
                                 for err in errors:
@@ -2921,7 +2961,7 @@
                                 pfmri = fmri.PkgFmri(pfmri, client_release)
                 except fmri.FmriError, e:
                         raise RepositoryInvalidFMRIError(e)
- 
+
                 if pub and not pfmri.publisher:
                         pfmri.publisher = pub
 
@@ -3167,7 +3207,7 @@
                                 pfmri = fmri.PkgFmri(pfmri)
                 except fmri.FmriError, e:
                         raise RepositoryInvalidFMRIError(e)
- 
+
                 if not pub and pfmri.publisher:
                         pub = pfmri.publisher
                 elif pub and not pfmri.publisher:
@@ -3205,7 +3245,7 @@
                                 pfmri = fmri.PkgFmri(pfmri, client_release)
                 except fmri.FmriError, e:
                         raise RepositoryInvalidFMRIError(e)
- 
+
                 if pub and not pfmri.publisher:
                         pfmri.publisher = pub
 
--- a/src/modules/server/transaction.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/modules/server/transaction.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import calendar
@@ -34,6 +34,7 @@
 import urllib
 
 import pkg.actions as actions
+import pkg.digest as digest
 import pkg.fmri as fmri
 import pkg.manifest
 import pkg.misc as misc
@@ -467,10 +468,22 @@
                         action.data = lambda: open(os.devnull, "rb")
 
                 if action.data is not None:
-                        fname, data = misc.get_data_digest(action.data(),
-                            length=size, return_content=True)
+                        # get all hashes for this action
+                        hashes, data = misc.get_data_digest(action.data(),
+                            length=size, return_content=True,
+                            hash_attrs=digest.DEFAULT_HASH_ATTRS,
+                            hash_algs=digest.HASH_ALGS)
 
-                        action.hash = fname
+                        # set the hash member for backwards compatibility and
+                        # remove it from the dictionary
+                        action.hash = hashes.pop("hash", None)
+                        action.attrs.update(hashes)
+
+                        # now set the hash value that will be used for storing
+                        # the file in the repository.
+                        hash_attr, hash_val, hash_func = \
+                            digest.get_least_preferred_hash(action)
+                        fname = hash_val
 
                         # Extract ELF information
                         # XXX This needs to be modularized.
@@ -487,9 +500,34 @@
                                         raise TransactionContentError(e)
 
                                 try:
-                                        elf_hash = elf.get_dynamic(
-                                            elf_name)["hash"]
-                                        action.attrs["elfhash"] = elf_hash
+                                        # Check which content checksums to
+                                        # compute and add to the action
+                                        elf256 = "pkg.content-type.sha256"
+                                        elf1 = "elfhash"
+
+                                        if elf256 in \
+                                            digest.DEFAULT_CONTENT_HASH_ATTRS:
+                                                get_sha256 = True
+                                        else:
+                                                get_sha256 = False
+
+                                        if elf1 in \
+                                            digest.DEFAULT_CONTENT_HASH_ATTRS:
+                                                get_sha1 = True
+                                        else:
+                                                get_sha1 = False
+
+                                        dyn = elf.get_dynamic(
+                                            elf_name, sha1=get_sha1,
+                                            sha256=get_sha256)
+
+                                        if get_sha1:
+                                                action.attrs[elf1] = dyn["hash"]
+
+                                        if get_sha256:
+                                                action.attrs[elf256] = \
+                                                    dyn[elf256]
+
                                 except elf.ElfError:
                                         pass
                                 action.attrs["elfbits"] = str(elf_info["bits"])
@@ -506,9 +544,10 @@
                                         raise
                                 dst_path = None
 
-                        csize, chash = misc.compute_compressed_attrs(
+                        csize, chashes = misc.compute_compressed_attrs(
                             fname, dst_path, data, size, self.dir)
-                        action.attrs["chash"] = chash.hexdigest()
+                        for attr in chashes:
+                                action.attrs[attr] = chashes[attr].hexdigest()
                         action.attrs["pkg.csize"] = csize
                         chash = None
                         data = None
@@ -573,14 +612,18 @@
 
         def add_file(self, f, size=None):
                 """Adds the file to the Transaction."""
-
-                fname, data = misc.get_data_digest(f, length=size,
-                    return_content=True)
+                hashes, data = misc.get_data_digest(f, length=size,
+                    return_content=True, hash_attrs=digest.DEFAULT_HASH_ATTRS,
+                    hash_algs=digest.HASH_ALGS)
 
                 if size is None:
                         size = len(data)
 
                 try:
+                        # We don't have an Action yet, so passing None is fine.
+                        default_hash_attr = digest.get_least_preferred_hash(
+                            None)[0]
+                        fname = hashes[default_hash_attr]
                         dst_path = self.rstore.file(fname)
                 except Exception, e:
                         # The specific exception can't be named here due
@@ -590,9 +633,11 @@
                                 raise
                         dst_path = None
 
-                csize, chash = misc.compute_compressed_attrs(fname, dst_path,
-                    data, size, self.dir)
-                chash = None
+                csize, chashes = misc.compute_compressed_attrs(fname, dst_path,
+                    data, size, self.dir,
+                    chash_attrs=digest.DEFAULT_CHASH_ATTRS,
+                    chash_algs=digest.CHASH_ALGS)
+                chashes = None
                 data = None
 
                 self.remaining_payload_cnt -= 1
@@ -633,7 +678,7 @@
                 # XXX If we are going to publish, then we should augment
                 # our response with any other packages that moved to
                 # PUBLISHED due to the package's arrival.
-                
+
                 self.publish_package()
 
                 if add_to_catalog:
--- a/src/pkg/manifests/package:pkg.p5m	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/pkg/manifests/package:pkg.p5m	Fri Oct 04 10:56:25 2013 +1300
@@ -121,6 +121,7 @@
 file path=$(PYDIRVP)/pkg/cpiofile.py
 file path=$(PYDIRVP)/pkg/dependency.py
 file path=$(PYDIRVP)/pkg/depotcontroller.py
+file path=$(PYDIRVP)/pkg/digest.py
 file path=$(PYDIRVP)/pkg/elf.so
 file path=$(PYDIRVP)/pkg/facet.py
 dir  path=$(PYDIRVP)/pkg/file_layout
--- a/src/pkgrepo.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/pkgrepo.py	Fri Oct 04 10:56:25 2013 +1300
@@ -1516,6 +1516,9 @@
                                    "opt":  opt, "arg": arg })
                         DebugValues.set_value(key, value)
 
+        if DebugValues:
+                reload(pkg.digest)
+
         subcommand = None
         if pargs:
                 subcommand = pargs.pop(0)
--- a/src/publish.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/publish.py	Fri Oct 04 10:56:25 2013 +1300
@@ -326,7 +326,7 @@
         if not repo_uri:
                 usage(_("A destination package repository must be provided "
                     "using -s."), cmd="publish")
- 
+
         if not pargs:
                 filelist = [("<stdin>", sys.stdin)]
         else:
@@ -740,6 +740,8 @@
         if repo_uri and not repo_uri.startswith("null:"):
                 repo_uri = misc.parse_uri(repo_uri)
 
+        if DebugValues:
+                reload(pkg.digest)
         subcommand = None
         if pargs:
                 subcommand = pargs.pop(0)
--- a/src/pull.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/pull.py	Fri Oct 04 10:56:25 2013 +1300
@@ -813,7 +813,7 @@
                 elif not pubs_specified:
                         unknown_pubs.append(sp)
 
-        # We only print warning if the user didn't specify any valid publishers 
+        # We only print warning if the user didn't specify any valid publishers
         # to add/sync.
         if len(unknown_pubs):
                 txt = _("\nThe following publishers are present in the "
@@ -1291,7 +1291,11 @@
                                                     "rb")
                                         t.add(a)
                                         if a.name == "signature":
-                                                for fp in a.get_chain_certs():
+                                                # We always store content in the
+                                                # repository by the least-
+                                                # preferred hash.
+                                                for fp in a.get_chain_certs(
+                                                    least_preferred=True):
                                                         fname = os.path.join(
                                                             pkgdir, fp)
                                                         t.add_file(fname)
--- a/src/sign.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/sign.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,11 +21,12 @@
 #
 
 #
-# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import getopt
 import gettext
+import hashlib
 import locale
 import os
 import shutil
@@ -37,11 +38,13 @@
 import pkg.actions as actions
 import pkg.client.api_errors as api_errors
 import pkg.client.transport.transport as transport
+import pkg.digest as digest
 import pkg.fmri as fmri
 import pkg.manifest as manifest
 import pkg.misc as misc
 import pkg.publish.transaction as trans
 from pkg.client import global_settings
+from pkg.client.debugvalues import DebugValues
 from pkg.misc import emsg, msg, PipeError
 import M2Crypto as m2
 
@@ -60,7 +63,7 @@
 
         if cmd:
                 text = "%s: %s" % (cmd, text)
-                
+
         else:
                 text = "%s: %s" % (PKG_CLIENT_NAME, text)
 
@@ -119,7 +122,7 @@
         global_settings.client_name = "pkgsign"
 
         try:
-                opts, pargs = getopt.getopt(sys.argv[1:], "a:c:i:k:ns:",
+                opts, pargs = getopt.getopt(sys.argv[1:], "a:c:i:k:ns:D:",
                     ["help", "no-index", "no-catalog"])
         except getopt.GetoptError, e:
                 usage(_("illegal global option -- %s") % e.opt)
@@ -162,6 +165,14 @@
                         show_usage = True
                 elif opt == "--no-catalog":
                         add_to_catalog = False
+                elif opt == "-D":
+                        try:
+                                key, value = arg.split("=", 1)
+                                DebugValues.set_value(key, value)
+                        except (AttributeError, ValueError):
+                                error(_("%(opt)s takes argument of form "
+                                            "name=value, not %(arg)s") % {
+                                            "opt":  opt, "arg": arg })
 
         if show_usage:
                 usage(retcode=EXIT_OK)
@@ -201,12 +212,15 @@
                     "certificate.  Do not use the -k or -c options with this "
                     "algorithm.") % sig_alg)
 
+        if DebugValues:
+                reload(digest)
+
         errors = []
 
         t = misc.config_temp_root()
         temp_root = tempfile.mkdtemp(dir=t)
         del t
-        
+
         cache_dir = tempfile.mkdtemp(dir=temp_root)
         incoming_dir = tempfile.mkdtemp(dir=temp_root)
         chash_dir = tempfile.mkdtemp(dir=temp_root)
@@ -292,8 +306,12 @@
                                 # comparison to existing signatures.
                                 hsh = None
                                 if cert_path:
+                                        # Action identity still uses the 'hash'
+                                        # member of the action, so we need to
+                                        # stay with the sha1 hash.
                                         hsh, _dummy = \
-                                            misc.get_data_digest(cert_path)
+                                            misc.get_data_digest(cert_path,
+                                            hash_func=hashlib.sha1)
 
                                 # Check whether the signature about to be added
                                 # is identical, or almost identical, to existing
--- a/src/sysrepo.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/sysrepo.py	Fri Oct 04 10:56:25 2013 +1300
@@ -28,7 +28,6 @@
 import errno
 import getopt
 import gettext
-import hashlib
 import locale
 import logging
 import os
@@ -51,6 +50,7 @@
 import pkg.client.api
 import pkg.client.progress as progress
 import pkg.client.api_errors as apx
+import pkg.digest as digest
 import pkg.misc as misc
 import pkg.portable as portable
 import pkg.p5p as p5p
@@ -747,7 +747,7 @@
 
 def _uri_hash(uri):
         """Returns a string hash of the given URI"""
-        return hashlib.sha1(uri).hexdigest()
+        return digest.DEFAULT_HASH_FUNC(uri).hexdigest()
 
 def _chown_runtime_dir(runtime_dir):
         """Change the ownership of all files under runtime_dir to our sysrepo
--- a/src/tests/api/t_api_search.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/api/t_api_search.py	Fri Oct 04 10:56:25 2013 +1300
@@ -20,7 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -28,11 +28,8 @@
 import pkg5unittest
 
 import copy
-import difflib
 import os
-import re
 import shutil
-import sys
 import tempfile
 import time
 import unittest
@@ -41,12 +38,10 @@
 import pkg.client.api as api
 import pkg.client.api_errors as api_errors
 import pkg.client.query_parser as query_parser
-import pkg.client.progress as progress
 import pkg.fmri as fmri
 import pkg.indexer as indexer
 import pkg.portable as portable
 import pkg.search_storage as ss
-import pkg.server.repository as srepo
 
 
 class TestApiSearchBasics(pkg5unittest.SingleDepotTestCase):
@@ -426,12 +421,24 @@
         ])
 
         res_remote_file = set([
-            ('pkg:/[email protected]',
-             'path',
-             'file a686473102ba73bd7920fc0ab1d97e00a24ed704 chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin mode=0555 owner=root path=bin/example_path pkg.csize=30 pkg.size=12'),
-            ('pkg:/[email protected]',
-             'a686473102ba73bd7920fc0ab1d97e00a24ed704',
-             'file a686473102ba73bd7920fc0ab1d97e00a24ed704 chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin mode=0555 owner=root path=bin/example_path pkg.csize=30 pkg.size=12')
+            ("pkg:/[email protected]",
+             "path",
+             "file a686473102ba73bd7920fc0ab1d97e00a24ed704 "
+             "chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin "
+             "mode=0555 owner=root path=bin/example_path pkg.csize=30 "
+             "pkg.size=12"),
+            ("pkg:/[email protected]",
+             "a686473102ba73bd7920fc0ab1d97e00a24ed704",
+             "file a686473102ba73bd7920fc0ab1d97e00a24ed704 "
+             "chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin "
+             "mode=0555 owner=root path=bin/example_path pkg.csize=30 "
+             "pkg.size=12"),
+             ("pkg:/[email protected]",
+             "hash",
+             "file a686473102ba73bd7920fc0ab1d97e00a24ed704 "
+             "chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin "
+             "mode=0555 owner=root path=bin/example_path pkg.csize=30 "
+             "pkg.size=12")
         ]) | res_remote_path
 
         res_remote_url = set([
@@ -441,15 +448,30 @@
         ])
 
         res_remote_path_extra = set([
-            ('pkg:/[email protected]',
-             'basename',
-             'file a686473102ba73bd7920fc0ab1d97e00a24ed704 chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin mode=0555 owner=root path=bin/example_path pkg.csize=30 pkg.size=12'),
-            ('pkg:/[email protected]',
-             'path',
-             'file a686473102ba73bd7920fc0ab1d97e00a24ed704 chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin mode=0555 owner=root path=bin/example_path pkg.csize=30 pkg.size=12'),
-            ('pkg:/[email protected]',
-             'a686473102ba73bd7920fc0ab1d97e00a24ed704',
-             'file a686473102ba73bd7920fc0ab1d97e00a24ed704 chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin mode=0555 owner=root path=bin/example_path pkg.csize=30 pkg.size=12')
+            ("pkg:/[email protected]",
+             "basename",
+             "file a686473102ba73bd7920fc0ab1d97e00a24ed704 "
+             "chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin "
+             "mode=0555 owner=root path=bin/example_path pkg.csize=30 "
+             "pkg.size=12"),
+            ("pkg:/[email protected]",
+             "path",
+             "file a686473102ba73bd7920fc0ab1d97e00a24ed704 "
+             "chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin "
+             "mode=0555 owner=root path=bin/example_path pkg.csize=30 "
+             "pkg.size=12"),
+            ("pkg:/[email protected]",
+             "a686473102ba73bd7920fc0ab1d97e00a24ed704",
+             "file a686473102ba73bd7920fc0ab1d97e00a24ed704 "
+             "chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin "
+             "mode=0555 owner=root path=bin/example_path pkg.csize=30 "
+             "pkg.size=12"),
+            ("pkg:/[email protected]",
+            "hash",
+            "file a686473102ba73bd7920fc0ab1d97e00a24ed704 "
+            "chash=f88920ce1f61db185d127ccb32dc8cf401ae7a83 group=bin "
+            "mode=0555 owner=root path=bin/example_path pkg.csize=30 "
+            "pkg.size=12")
         ])
 
         res_bad_pkg = set([
--- a/src/tests/api/t_elf.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/api/t_elf.py	Fri Oct 04 10:56:25 2013 +1300
@@ -20,7 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -31,7 +31,6 @@
 import pkg.elf as elf
 import os
 import re
-import sys
 import pkg.portable
 
 class TestElf(pkg5unittest.Pkg5TestCase):
@@ -82,5 +81,32 @@
                         elf.get_dynamic(p)
                         elf.get_info(p)
 
+        def test_get_dynamic_params(self):
+                """Test that get_dynamic(..) returns checksums according to the
+                parameters passed to the method."""
+
+                # Check that the hashes generated have the correct length
+                # depending on the algorithm used to generated.
+                sha1_len = 40
+                sha256_len = 64
+
+                # the default is to return an SHA-1 elfhash only
+                d = elf.get_dynamic(self.elf_paths[0])
+                self.assert_(len(d["hash"]) == sha1_len)
+                self.assert_("pkg.content-type.sha256" not in d)
+
+                d = elf.get_dynamic(self.elf_paths[0], sha256=True)
+                self.assert_(len(d["hash"]) == sha1_len)
+                self.assert_(len(d["pkg.content-type.sha256"]) == sha256_len)
+
+                d = elf.get_dynamic(self.elf_paths[0], sha1=False, sha256=True)
+                self.assert_("hash" not in d)
+                self.assert_(len(d["pkg.content-type.sha256"]) == sha256_len)
+
+                d = elf.get_dynamic(self.elf_paths[0], sha1=False, sha256=False)
+                self.assert_("hash" not in d)
+                self.assert_("pkg.content-type.sha256" not in d)
+
+
 if __name__ == "__main__":
         unittest.main()
--- a/src/tests/api/t_manifest.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/api/t_manifest.py	Fri Oct 04 10:56:25 2013 +1300
@@ -32,6 +32,7 @@
 
 import pkg as pkg
 import pkg.client.api_errors as api_errors
+import pkg.digest as digest
 import pkg.manifest as manifest
 import pkg.misc as misc
 import pkg.actions as actions
@@ -415,15 +416,17 @@
         def test_store_to_disk(self):
                 """Verfies that a FactoredManifest gets force-loaded before it
                 gets stored to disk."""
- 
+
                 m1 = manifest.FactoredManifest("[email protected]", self.cache_dir,
                     pathname=self.foo_content_p5m)
 
                 tmpdir = tempfile.mkdtemp(dir=self.test_root)
                 path = os.path.join(tmpdir, "manifest.p5m")
                 m1.store(path)
-                self.assertEqual(misc.get_data_digest(path),
-                    misc.get_data_digest(self.foo_content_p5m))
+                self.assertEqual(misc.get_data_digest(path,
+                    hash_func=digest.DEFAULT_HASH_FUNC),
+                    misc.get_data_digest(self.foo_content_p5m,
+                    hash_func=digest.DEFAULT_HASH_FUNC))
 
         def test_get_directories(self):
                 """Verifies that get_directories() works as expected."""
--- a/src/tests/api/t_p5p.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/api/t_p5p.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import testutils
@@ -31,10 +31,12 @@
 
 import difflib
 import errno
+import hashlib
 import unittest
 import os
 import pkg.catalog
 import pkg.client.progress
+import pkg.digest as digest
 import pkg.fmri
 import pkg.misc
 import pkg.p5p
@@ -632,18 +634,25 @@
                 arc = pkg.p5p.Archive(arc_path, mode="r",
                     archive_index=archive_index)
 
+                # We always store content using the least_preferred hash, so
+                # determine what that is so that we can verify it using
+                # gunzip_from_stream.
+                hash_func = digest.get_least_preferred_hash(None)[2]
+
                 # Test behaviour when specifying publisher.
                 nullf = open(os.devnull, "wb")
                 for h in hashes["test"]:
                         fobj = arc.get_package_file(h, pub="test")
-                        uchash = pkg.misc.gunzip_from_stream(fobj, nullf)
+                        uchash = pkg.misc.gunzip_from_stream(fobj, nullf,
+                            hash_func=hash_func)
                         self.assertEqual(uchash, h)
                         fobj.close()
 
                 # Test behaviour when not specifying publisher.
                 for h in hashes["test"]:
                         fobj = arc.get_package_file(h)
-                        uchash = pkg.misc.gunzip_from_stream(fobj, nullf)
+                        uchash = pkg.misc.gunzip_from_stream(fobj, nullf,
+                            hash_func=hash_func)
                         self.assertEqual(uchash, h)
                         fobj.close()
 
@@ -723,7 +732,8 @@
                 arc.add_repo_package(self.quux, repo)
                 arc.close()
 
-                # Get list of file hashes.
+                # Get list of file hashes. These will be the "least-preferred"
+                # hash for the actions being stored.
                 hashes = { "all": set() }
                 for rstore in repo.rstores:
                         for dirpath, dirnames, filenames in os.walk(
--- a/src/tests/cli/t_https.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_https.py	Fri Oct 04 10:56:25 2013 +1300
@@ -23,13 +23,13 @@
 #
 # Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
 #
-import sys
 
 import testutils
 if __name__ == "__main__":
         testutils.setup_environment("../../../proto")
 import pkg5unittest
 
+import hashlib
 import os
 import shutil
 import stat
@@ -165,14 +165,14 @@
                 """ Test that an expired cert for one publisher doesn't prevent
                 making changes to other publishers due to certifcate checks on
                 all configured publishers. (Bug 17018362)"""
-                
+
                 bad_cert_path = os.path.join(self.cs_dir,
                     "cs3_ch1_ta3_cert.pem")
                 good_cert_path = os.path.join(self.cs_dir,
                     self.get_cli_cert("test"))
                 self.ac.start()
                 self.image_create()
-                
+
                 # Set https-based publisher with correct cert.
                 self.seed_ta_dir("ta7")
                 self.pkg("set-publisher -k %(key)s -c %(cert)s -p %(url)s" % {
@@ -186,10 +186,12 @@
                 # Replace cert of first publisher with one that is expired.
                 # It doesn't need to match the key because we just want to
                 # test if the cert validation code works correctly so we are not
-                # actually using the cert. 
+                # actually using the cert.
 
-                # Cert is stored by content hash in the pkg config of the image.
-                ch = misc.get_data_digest(good_cert_path)[0]
+                # Cert is stored by content hash in the pkg config of the image,
+                # which must be a SHA-1 hash for backwards compatibility.
+                ch = misc.get_data_digest(good_cert_path,
+                    hash_func=hashlib.sha1)[0]
                 pkg_cert_path = os.path.join(self.get_img_path(), "var", "pkg",
                     "ssl", ch)
                 shutil.copy(bad_cert_path, pkg_cert_path)
--- a/src/tests/cli/t_pkg_install.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkg_install.py	Fri Oct 04 10:56:25 2013 +1300
@@ -1915,6 +1915,7 @@
             add dir mode=0755 owner=root group=bin path=etc
             add file tmp/config1 mode=0644 owner=root group=bin path=etc/foo
             add hardlink path=etc/foo.link target=foo
+            add license tmp/copyright1 license=copyright
             close
         """
         iron20 = """
@@ -1922,6 +1923,7 @@
             add dir mode=0755 owner=root group=bin path=etc
             add file tmp/config2 mode=0644 owner=root group=bin path=etc/foo
             add hardlink path=etc/foo.link target=foo
+            add license tmp/copyright2 license=copyright
             close
         """
 
@@ -3062,6 +3064,53 @@
                 self.pkg("update [email protected]")
                 self.file_contains(new_cfg_path, "preserve2")
 
+        def test_many_hashalgs(self):
+                """Test that when upgrading actions where the new action
+                contains more hash attributes than the old action, that the
+                upgrade works."""
+
+                self.pkgsend_bulk(self.rurl, (self.iron10))
+                self.image_create(self.rurl, destroy=True)
+                self.pkg("install [email protected]")
+                self.pkg("contents -m iron")
+                # We have not enabled SHA2 hash publication yet.
+                self.assert_("pkg.hash.sha256" not in self.output)
+
+                # publish with SHA1 and SHA2 hashes
+                self.pkgsend_bulk(self.rurl, self.iron20,
+                    debug_hash="sha1+sha256")
+
+                # verify that a non-SHA2 aware client can install these bits
+                self.pkg("-D hash=sha1 update")
+                self.image_create(self.rurl, destroy=True)
+
+                # This also tests package retrieval: we always retrieve packages
+                # with the least-preferred hash, but verify with the
+                # most-preferred hash.
+                self.pkg("install [email protected]")
+                self.pkg("contents -m iron")
+                self.assert_("pkg.hash.sha256" in self.output)
+
+                # publish with only SHA-2 hashes
+                self.pkgsend_bulk(self.rurl, self.iron20, debug_hash="sha256")
+
+                # verify that a non-SHA2 aware client cannot install these bits
+                # since there are no SHA1 hashes present
+                self.pkg("-D hash=sha1 update", exit=1)
+                self.assert_(
+                    "No file could be found for the specified hash name: "
+                    "'NOHASH'" in self.errout)
+
+                # Make sure we've been publishing only with sha256 by removing
+                # those known attributes, then checking for the presence of
+                # the SHA-1 attributes.
+                self.pkg("-D hash=sha256 update")
+                self.pkg("contents -m iron")
+                for attr in ["pkg.hash.sha256", "pkg.chash.sha256"]:
+                        self.output = self.output.replace(attr, "")
+                self.assert_("hash" not in self.output)
+                self.assert_("chash" not in self.output)
+
 
 class TestPkgInstallActions(pkg5unittest.SingleDepotTestCase):
         # Only start/stop the depot once (instead of for every test)
--- a/src/tests/cli/t_pkg_publisher.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkg_publisher.py	Fri Oct 04 10:56:25 2013 +1300
@@ -29,6 +29,7 @@
         testutils.setup_environment("../../../proto")
 import pkg5unittest
 
+import hashlib
 import os
 import pkg.client.image as image
 import pkg.misc
@@ -160,8 +161,12 @@
                     exit=2)
 
                 # Listing publishers should succeed even if key file is gone.
+                # This test relies on using the same implementation used in
+                # image.py __store_publisher_ssl() which sets the paths to the
+                # SSL keys/certs.
                 img_key_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(key_path)[0])
+                    "ssl", pkg.misc.get_data_digest(key_path,
+                    hash_func=hashlib.sha1)[0])
                 os.unlink(img_key_path)
                 self.pkg("publisher test1")
 
@@ -187,7 +192,8 @@
 
                 # Listing publishers should be possible if cert file is gone.
                 img_cert_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(cert_path)[0])
+                    "ssl", pkg.misc.get_data_digest(cert_path,
+                    hash_func=hashlib.sha1)[0])
                 os.unlink(img_cert_path)
                 self.pkg("publisher test1", exit=3)
 
@@ -311,10 +317,15 @@
                 self.pkg("set-publisher --no-refresh -c %s test1" % cert_path)
                 self.pkg("set-publisher --no-refresh -k %s test1" % key_path)
 
+                # This test relies on using the same implementation used in
+                # image.py __store_publisher_ssl() which sets the paths to the
+                # SSL keys/certs.
                 img_key_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(key_path)[0])
+                    "ssl", pkg.misc.get_data_digest(key_path,
+                    hash_func=hashlib.sha1)[0])
                 img_cert_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(cert_path)[0])
+                    "ssl", pkg.misc.get_data_digest(cert_path,
+                    hash_func=hashlib.sha1)[0])
 
                 # Make the cert/key unreadable by unprivileged users.
                 os.chmod(img_key_path, 0000)
@@ -881,10 +892,15 @@
                     (key_path, cert_path))
                 self.pkg("publisher test1")
 
+                # This test relies on using the same implementation used in
+                # image.py __store_publisher_ssl() which sets the paths to the
+                # SSL keys/certs.
                 img_key_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(key_path)[0])
+                    "ssl", pkg.misc.get_data_digest(key_path,
+                    hash_func=hashlib.sha1)[0])
                 img_cert_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(cert_path)[0])
+                    "ssl", pkg.misc.get_data_digest(cert_path,
+                    hash_func=hashlib.sha1)[0])
                 self.assert_(img_key_path in self.output)
                 self.assert_(img_cert_path in self.output)
 
--- a/src/tests/cli/t_pkg_refresh.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkg_refresh.py	Fri Oct 04 10:56:25 2013 +1300
@@ -20,7 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -28,6 +28,7 @@
 import pkg5unittest
 
 import difflib
+import hashlib
 import os
 import re
 import shutil
@@ -331,10 +332,15 @@
                 self.pkg("set-publisher --no-refresh -k %s test1" % key_path)
 
 
+                # This test relies on using the same implementation used in
+                # image.py __store_publisher_ssl() which sets the paths to the
+                # SSL keys/certs.
                 img_key_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(key_path)[0])
+                    "ssl", pkg.misc.get_data_digest(key_path,
+                    hash_func=hashlib.sha1)[0])
                 img_cert_path = os.path.join(self.img_path(), "var", "pkg",
-                    "ssl", pkg.misc.get_data_digest(cert_path)[0])
+                    "ssl", pkg.misc.get_data_digest(cert_path,
+                    hash_func=hashlib.sha1)[0])
 
                 # Make the cert/key unreadable by unprivileged users.
                 os.chmod(img_key_path, 0000)
--- a/src/tests/cli/t_pkg_revert.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkg_revert.py	Fri Oct 04 10:56:25 2013 +1300
@@ -43,10 +43,7 @@
             add dir mode=0755 owner=root group=bin path=etc
             add file etc/file1 mode=0555 owner=root group=bin path=etc/file1
             close
-            open [email protected],5.11-0
-            add dir mode=0755 owner=root group=bin path=etc
-            add file etc/file2 mode=0555 owner=root group=bin path=etc/file2 revert-tag=bob
-            close
+            # [email protected] is published as part of pkgs2
             open [email protected],5.11-0
             add dir mode=0755 owner=root group=bin path=etc
             add file etc/file3 mode=0555 owner=root group=bin path=etc/file3 revert-tag=bob revert-tag=ted
@@ -80,6 +77,14 @@
             open [email protected],5.11-0
             add dir mode=0755 owner=root group=bin path=etc/y-dir revert-tag=bob=*
             close
+            """
+
+        # A set of packages that we publish with additional hash attributes
+        pkgs2 = """
+            open [email protected],5.11-0
+            add dir mode=0755 owner=root group=bin path=etc
+            add file etc/file2 mode=0555 owner=root group=bin path=etc/file2 revert-tag=bob
+            close
             open [email protected],5.11-0
             add dir mode=0755 owner=root group=bin path=dev revert-tag=init-dev=*
             add dir mode=0755 owner=root group=bin path=dev/cfg revert-tag=init-dev=*
@@ -174,6 +179,8 @@
                 self.make_misc_files(self.misc_files)
                 self.make_misc_files(self.additional_files)
                 self.plist = self.pkgsend_bulk(self.rurl, self.pkgs)
+                self.plist.extend(self.pkgsend_bulk(self.rurl, self.pkgs2,
+                    debug_hash="sha1+sha256"))
 
         def test_revert(self):
                 self.image_create(self.rurl)
@@ -184,7 +191,17 @@
                 self.damage_all_files()
                 # make sure we broke 'em
                 self.pkg("verify A", exit=1)
+
+                # We expect that the SHA-2 hash is used whenever there are SHA-2
+                # hashes on the action. Even though this client is run in
+                # "SHA-1" mode as well as "SHA-2" mode, we always verify with
+                # the most-preferred hash available.
+                self.pkg("-D hash=sha1+sha256 verify B", exit=1)
+                sha2 = "e3868252b2b2de64e85f5b221e46eb23c428fe5168848eb36d113c66628131ce"
+                self.assert_(sha2 in self.output)
                 self.pkg("verify B", exit=1)
+                self.assert_(sha2 in self.output)
+
                 self.pkg("verify C", exit=1)
                 self.pkg("verify D", exit=1)
 
@@ -214,11 +231,21 @@
 
                 # revert damage to B, C, D by tag and test the parsable output.
                 self.pkg("revert -n --parsable=0 --tagged bob")
+                self.debug("\n".join(self.plist))
                 self.assertEqualParsable(self.output,
-                    affect_packages=[self.plist[1], self.plist[2], self.plist[3]])
-                self.pkg("revert --parsable=0 --tagged bob")
+                    affect_packages=[self.plist[10], self.plist[1],
+                    self.plist[2]])
+                # When reverting damage, we always verify using the
+                # most-preferred hash, but retrieve content with the
+                # least-preferred hash: -D hash=sha1+sha256 should have no
+                # effect here whatsoever, but -D hash=sha256 should fail because
+                # our repository stores its files by the SHA1 hash.
+                self.pkg("-D hash=sha256 revert --parsable=0 --tagged bob",
+                    exit=1)
+                self.pkg("-D hash=sha1+sha256 revert --parsable=0 --tagged bob")
                 self.assertEqualParsable(self.output,
-                    affect_packages=[self.plist[1], self.plist[2], self.plist[3]])
+                    affect_packages=[self.plist[10], self.plist[1],
+                    self.plist[2]])
                 self.pkg("verify A", exit=1)
                 self.pkg("verify B")
                 self.pkg("verify C")
--- a/src/tests/cli/t_pkg_search.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkg_search.py	Fri Oct 04 10:56:25 2013 +1300
@@ -30,6 +30,7 @@
 import pkg5unittest
 
 import copy
+import hashlib
 import os
 import shutil
 import sys
@@ -65,7 +66,7 @@
             add dir mode=0755 owner=root group=bin path=/bin
             add file tmp/example_file mode=0555 owner=root group=bin path=/bin/example_path11
             close """
-        
+
         incorp_pkg10 = """
             open [email protected],5.11-0
             add depend [email protected],5.11-0 type=incorporate
@@ -295,7 +296,8 @@
 
         res_remote_file = set([
             'path       file      bin/example_path          pkg:/[email protected]\n',
-            'b40981aab75932c5b2f555f50769d878e44913d7 file      bin/example_path          pkg:/[email protected]\n'
+            'b40981aab75932c5b2f555f50769d878e44913d7 file      bin/example_path          pkg:/[email protected]\n',
+            'hash                                     file   bin/example_path pkg:/[email protected]\n'
         ]) | res_remote_path
 
 
@@ -308,7 +310,8 @@
              headers,
              'path       file      bin/example_path          pkg:/[email protected]\n',
              'basename   file      bin/example_path          pkg:/[email protected]\n',
-             'b40981aab75932c5b2f555f50769d878e44913d7 file      bin/example_path          pkg:/[email protected]\n'
+             'b40981aab75932c5b2f555f50769d878e44913d7 file      bin/example_path          pkg:/[email protected]\n',
+             'hash                                     file   bin/example_path pkg:/[email protected]\n'
         ])
 
         o_headers = \
@@ -992,6 +995,7 @@
                 self.assertEqualDiff(expected, actual)
                 self.pkg("search example_path", exit=1)
 
+
 class TestSearchMultiPublisher(pkg5unittest.ManyDepotTestCase):
 
         same_pub1 = """
@@ -1035,11 +1039,20 @@
         }
 
         def setUp(self):
-                pkg5unittest.ManyDepotTestCase.setUp(self,["samepub", "samepub"],
-                    start_depots=True)
+                pkg5unittest.ManyDepotTestCase.setUp(self, ["samepub",
+                    "samepub"], start_depots=True)
                 self.make_misc_files(self.misc_files)
                 self.durl1 = self.dcs[1].get_depot_url()
+                self.pkgsend_bulk(self.durl1, self.same_pub1, refresh_index=True)
                 self.durl2 = self.dcs[2].get_depot_url()
+                self.rurl2 = self.dcs[2].get_repo_url()
+                # our 2nd depot gets the package published with multiple hash
+                # attributes, but served from a single-hash-aware depot
+                # (the fact that it's single-hash-aware should make no
+                # difference to the content it serves so long as the index was
+                # generated while we were aware of multiple hashes.
+                self.pkgsend_bulk(self.rurl2, self.same_pub2,
+                    refresh_index=True, debug_hash="sha1+sha256")
 
         def test_7140657(self):
                 """ Check that pkg search with -s works as intended when there are
@@ -1097,6 +1110,54 @@
                 expected = self.reduceSpaces(expected_out2)
                 self.assertEqualDiff(expected, actual)
 
+        def test_search_multi_hash(self):
+                """Check that when searching a repository with multiple
+                hashes, all hash attributes are indexed and we can search
+                against all hash attributes.
+
+                This test depends on pkg.digest having DebugValue settings
+                that add sha256 hashes to the set of hashes we append to
+                actions at publication time."""
+
+                self.image_create(self.durl2, prefix="samepub")
+
+                # manually calculate the hashes, in case of bugs in
+                # pkg.misc.get_data_digest
+                sha1_hash = hashlib.sha1("magic").hexdigest()
+                sha2_hash = hashlib.sha256("magic").hexdigest()
+
+                self.pkg("search %s" % sha1_hash)
+                self.pkg("search %s" % sha2_hash)
+
+                # Check that we're matching on the correct index.
+                # For sha1 hashes, our the 'index' returned is actually the
+                # hash itself - that seems unusual, but it's the way the
+                # index was built. We also emit a 2nd search result that shows
+                # 'hash', in order to be consistent with the way we print
+                # the pkg.hash.sha* attribute when dealing with other hashes.
+                self.pkg("search -H -o search.match_type %s" % sha1_hash)
+                self.assertEqualDiff(
+                    self.reduceSpaces(self.output), "%s\nhash\n" % sha1_hash)
+
+                self.pkg("search -H -o search.match_type %s" % sha2_hash)
+                self.assertEqualDiff(
+                    self.reduceSpaces(self.output), "pkg.hash.sha256\n")
+
+                # check that both searches match the same action
+                self.pkg("search -o action.raw %s" % sha1_hash)
+                sha1_action = self.reduceSpaces(self.output)
+
+                self.pkg("search -o action.raw %s" % sha2_hash)
+                sha2_action = self.reduceSpaces(self.output)
+                self.assertEqualDiff(sha1_action, sha2_action)
+
+                # check that the same searches in the non-multihash-aware
+                # repository only return a result for the sha-1 hash
+                # (which checks that we're only setting multiple hashes
+                # on actions when hash=sha1+sha256 is set)
+                self.pkg("search -s %s %s" % (self.durl1, sha1_hash))
+                self.pkg("search -s %s %s" % (self.durl1, sha2_hash), exit=1)
+
 
 if __name__ == "__main__":
         unittest.main()
--- a/src/tests/cli/t_pkg_sysrepo.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkg_sysrepo.py	Fri Oct 04 10:56:25 2013 +1300
@@ -30,16 +30,12 @@
 import pkg5unittest
 
 import copy
-import hashlib
 import os
 import shutil
-import signal
-import sys
-import time
 
-import pkg.client.api as api
 import pkg.client.api_errors as apx
 import pkg.client.transport.exception as tx
+import pkg.digest as digest
 import pkg.misc as misc
 
 class PC(object):
@@ -86,13 +82,22 @@
 
         bar10 = """
             open [email protected],5.11-0
+            add file tmp/example_two mode=0555 owner=root group=bin path=/usr/bin/example_path3
             close"""
 
         bar11 = """
             open [email protected],5.11-0
+            add file tmp/example_two mode=0555 owner=root group=bin path=/usr/bin/example_path3
+            add file tmp/example_two mode=0555 owner=root group=bin path=/usr/bin/example_path4
             close"""
 
-        misc_files = ["tmp/example_file"]
+        baz10 = """
+            open [email protected],5.11-0
+            add file tmp/example_three mode=0555 owner=root group=bin path=/usr/bin/another
+            close"""
+
+        misc_files = ["tmp/example_file", "tmp/example_two",
+            "tmp/example_three"]
 
         expected_all_access =  """\
 PUBLISHER\tSTICKY\tSYSPUB\tENABLED\tTYPE\tSTATUS\tURI\tPROXY
@@ -127,6 +132,14 @@
                 self.durl1 = self.dcs[1].get_depot_url()
                 self.durl2 = self.dcs[2].get_depot_url()
                 self.durl3 = self.dcs[3].get_depot_url()
+
+                # we make self.durl3 multi-hash aware, to ensure that the
+                # system-repository can serve packages published with multiple
+                # hashes.
+                self.dcs[3].stop()
+                self.dcs[3].set_debug_feature("hash=sha1+sha256")
+                self.dcs[3].start()
+
                 self.durl4 = self.dcs[4].get_depot_url()
                 self.durl5 = self.dcs[5].get_depot_url()
 
@@ -142,7 +155,11 @@
 
                 self.pkgsend_bulk(self.rurl1, self.example_pkg10)
                 self.pkgsend_bulk(self.rurl2, self.foo10)
-                self.pkgsend_bulk(self.rurl3, self.bar10)
+                # We send to rurl3 using multi-hash aware publication
+                self.pkgsend_bulk(self.rurl3, self.bar10,
+                    debug_hash="sha1+sha256")
+                self.pkgsend_bulk(self.rurl3, self.baz10,
+                    debug_hash="sha1+sha256")
                 self.pkgsend_bulk(self.rurl4, self.bar10)
                 self.pkgsend_bulk(self.rurl5, self.foo11)
 
@@ -593,6 +610,11 @@
                 # Test that the current api object has the right catalog.
                 self._api_install(api_obj, ["foo", "bar"])
 
+                # Test that we can install a multi-hash package
+                self.pkg("install baz")
+                self.pkg("contents -m baz")
+                self.assert_("pkg.hash.sha256" in self.output)
+
         def test_02_communication(self):
                 """Test that the transport for communicating with the depots is
                 actually going through the proxy. This is done by
@@ -789,9 +811,9 @@
 
                 # Find the hashes that will be included in the urls of the
                 # proxied file repos.
-                hash1 = hashlib.sha1("file://" +
+                hash1 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[1].get_repodir().rstrip("/")).hexdigest()
-                hash3 = hashlib.sha1("file://" +
+                hash3 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[3].get_repodir().rstrip("/")).hexdigest()
 
                 # Check that a user can add and remove mirrors,
@@ -926,6 +948,7 @@
 
                 expected = """\
 bar (test3) 1.0-0 ---
+baz (test3) 1.0-0 ---
 example_pkg 1.0-0 ---
 """
                 self.__check_package_lists(expected)
@@ -953,6 +976,7 @@
 
                 expected = """\
 bar (test3) 1.0-0 ---
+baz (test3) 1.0-0 ---
 example_pkg 1.0-0 ---
 """
                 self.__check_package_lists(expected)
@@ -1281,11 +1305,11 @@
 
                 # Find the hashes that will be included in the urls of the
                 # proxied file repos.
-                hash1 = hashlib.sha1("file://" +
+                hash1 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[1].get_repodir().rstrip("/")).hexdigest()
-                hash2 = hashlib.sha1("file://" +
+                hash2 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[2].get_repodir().rstrip("/")).hexdigest()
-                hash3 = hashlib.sha1("file://" +
+                hash3 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[3].get_repodir().rstrip("/")).hexdigest()
 
                 expected = """\
@@ -1339,11 +1363,11 @@
 
                 # Find the hashes that will be included in the urls of the
                 # proxied file repos.
-                hash1 = hashlib.sha1("file://" +
+                hash1 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[1].get_repodir().rstrip("/")).hexdigest()
-                hash2 = hashlib.sha1("file://" +
+                hash2 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[2].get_repodir().rstrip("/")).hexdigest()
-                hash3 = hashlib.sha1("file://" +
+                hash3 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[3].get_repodir().rstrip("/")).hexdigest()
 
                 self.__set_responses("all-access-f")
@@ -1379,11 +1403,11 @@
 
                 # Find the hashes that will be included in the urls of the
                 # proxied file repos.
-                hash1 = hashlib.sha1("file://" +
+                hash1 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[1].get_repodir().rstrip("/")).hexdigest()
-                hash2 = hashlib.sha1("file://" +
+                hash2 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[2].get_repodir().rstrip("/")).hexdigest()
-                hash3 = hashlib.sha1("file://" +
+                hash3 = digest.DEFAULT_HASH_FUNC("file://" +
                     self.dcs[3].get_repodir().rstrip("/")).hexdigest()
 
                 expected = """\
--- a/src/tests/cli/t_pkgrecv.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkgrecv.py	Fri Oct 04 10:56:25 2013 +1300
@@ -48,6 +48,8 @@
 import unittest
 import zlib
 
+from pkg.digest import DEFAULT_HASH_FUNC
+
 class TestPkgrecvMulti(pkg5unittest.ManyDepotTestCase):
         # Cleanup after every test.
         persistent_setup = False
@@ -245,7 +247,8 @@
                                 # Since the file shouldn't be compressed, this
                                 # should return a zlib.error.
                                 self.assertRaises(zlib.error,
-                                    misc.gunzip_from_stream, ifile, ofile)
+                                    misc.gunzip_from_stream, ifile, ofile,
+                                    ignore_hash=True)
 
                 # Next, send it to another depot
                 self.pkgsend(self.durl2, "open [email protected]")
@@ -270,8 +273,9 @@
                 old = orepo.manifest(f)
                 new = os.path.join(self.tempdir, f.get_dir_path(), "manifest")
 
-                self.assertEqual(misc.get_data_digest(old),
-                    misc.get_data_digest(new))
+                self.assertEqual(
+                    misc.get_data_digest(old, hash_func=DEFAULT_HASH_FUNC),
+                    misc.get_data_digest(new, hash_func=DEFAULT_HASH_FUNC))
 
                 # Next, load the manifest.
                 m = manifest.Manifest()
@@ -288,8 +292,10 @@
                                 new = os.path.join(self.tempdir,
                                     f.get_dir_path(), a.hash)
                                 self.assertNotEqual(old, new)
-                                self.assertEqual(misc.get_data_digest(old),
-                                    misc.get_data_digest(new))
+                                self.assertEqual(misc.get_data_digest(old,
+                                    hash_func=DEFAULT_HASH_FUNC),
+                                    misc.get_data_digest(new,
+                                    hash_func=DEFAULT_HASH_FUNC))
 
                 # Second, pkgrecv to the pkg to a file repository.
                 npath = tempfile.mkdtemp(dir=self.test_root)
@@ -305,8 +311,9 @@
 
                 self.debug(old)
                 self.debug(new)
-                self.assertEqual(misc.get_data_digest(old),
-                    misc.get_data_digest(new))
+                self.assertEqual(
+                    misc.get_data_digest(old, hash_func=DEFAULT_HASH_FUNC),
+                    misc.get_data_digest(new, hash_func=DEFAULT_HASH_FUNC))
 
                 # Next, load the manifest.
                 m = manifest.Manifest()
@@ -322,8 +329,10 @@
                                 old = orepo.file(a.hash)
                                 new = nrepo.file(a.hash)
                                 self.assertNotEqual(old, new)
-                                self.assertEqual(misc.get_data_digest(old),
-                                    misc.get_data_digest(new))
+                                self.assertEqual(misc.get_data_digest(old,
+                                    hash_func=DEFAULT_HASH_FUNC),
+                                    misc.get_data_digest(new,
+                                    hash_func=DEFAULT_HASH_FUNC))
 
                 # Third, pkgrecv to the pkg to a http repository from the
                 # file repository from the last test.
@@ -336,8 +345,9 @@
                 old = orepo.manifest(f)
                 new = nrepo.manifest(f)
 
-                self.assertEqual(misc.get_data_digest(old),
-                    misc.get_data_digest(new))
+                self.assertEqual(
+                    misc.get_data_digest(old, hash_func=DEFAULT_HASH_FUNC),
+                    misc.get_data_digest(new, hash_func=DEFAULT_HASH_FUNC))
 
                 # Next, load the manifest.
                 m = manifest.Manifest()
@@ -353,8 +363,11 @@
                                 old = orepo.file(a.hash)
                                 new = nrepo.file(a.hash)
                                 self.assertNotEqual(old, new)
-                                self.assertEqual(misc.get_data_digest(old),
-                                    misc.get_data_digest(new))
+                                self.assertEqual(
+                                    misc.get_data_digest(old,
+                                    hash_func=DEFAULT_HASH_FUNC),
+                                    misc.get_data_digest(new,
+                                    hash_func=DEFAULT_HASH_FUNC))
 
                 # Fourth, create an image and verify that the sent package is
                 # seen by the client.
@@ -377,8 +390,9 @@
                 old = orepo.manifest(f)
                 new = nrepo.manifest(f)
 
-                self.assertEqual(misc.get_data_digest(old),
-                    misc.get_data_digest(new))
+                self.assertEqual(
+                    misc.get_data_digest(old, hash_func=DEFAULT_HASH_FUNC),
+                    misc.get_data_digest(new, hash_func=DEFAULT_HASH_FUNC))
 
         def test_3_recursive(self):
                 """Verify that retrieving a package recursively will retrieve
@@ -544,7 +558,8 @@
                                 # Since the file shouldn't be compressed, this
                                 # should return a zlib.error.
                                 self.assertRaises(zlib.error,
-                                    misc.gunzip_from_stream, ifile, ofile)
+                                    misc.gunzip_from_stream, ifile, ofile,
+                                    ignore_hash=True)
 
                 for var in ("PKG_SRC", "PKG_DEST"):
                         del os.environ[var]
@@ -847,7 +862,7 @@
                 # Test basic operation of cloning repo which contains one
                 # publisher to repo which contains same publisher
                 self.pkgrecv(self.durl1, "--clone -d %s" % self.dpath2)
-                
+
                 ret = subprocess.call(["/usr/bin/gdiff", "-Naur", "-x", 
                     "index", "-x", "trans", self.dpath1, self.dpath2])
                 self.assertTrue(ret==0)
@@ -884,7 +899,7 @@
                 # Test that clone fails if --raw is specified.
                 self.pkgrecv(self.durl1, "--raw --clone -d %s -p test2" %
                     self.dpath2, exit=2)
-                
+
                 # Test that clone fails if -c is specified.
                 self.pkgrecv(self.durl1, "-c /tmp/ --clone -d %s -p test2" %
                     self.dpath2, exit=2)
@@ -897,6 +912,54 @@
                 self.pkgrecv(self.durl1, "--newest --clone -d %s -p test2" %
                     self.dpath2, exit=2)
 
+        def test_12_multihash(self):
+                """Tests that we can recv to and from repositories with
+                multi-hash support, interoperating with repositories without
+                multi-hash support."""
+
+                f = fmri.PkgFmri(self.published[3], None)
+
+                # We create an image simply so we can use "contents -g" to
+                # inspect the repository.
+                self.image_create()
+
+                # First, recv the package and verify it has no extended hashes
+                self.pkgrecv(self.durl1, "-d %s %s" % (self.durl3, f))
+                self.pkg("contents -g %s -m %s" % (self.durl3, f))
+                self.assert_("pkg.hash.sha256" not in self.output)
+
+                # Now stop and start the repository as multi-hash aware, and
+                # recv it again, making sure that we do not get multiple hashes
+                # added (because modifying the manifest would break signatures)
+                self.dcs[3].stop()
+                self.dcs[3].set_debug_feature("hash=sha1+sha256")
+                self.dcs[3].start()
+                self.pkgrecv(self.durl1, "-d %s %s" % (self.durl3, f))
+                self.pkg("contents -g %s -m %s" % (self.durl3, f))
+                self.assert_("pkg.hash.sha256" not in self.output)
+
+                # Now check the reverse - that a package with multiple hashes
+                # can be received into a repository that is not multi-hash aware
+                b = "[email protected],5.11-0"
+                self.pkgsend_bulk(self.durl3, self.bronze10)
+                self.pkg("contents -g %s -m %s" % (self.durl3, b))
+                self.assert_("pkg.hash.sha256" in self.output)
+                self.pkgrecv(self.durl3, "-d %s %s" % (self.durl4, b))
+                self.pkg("contents -g %s -m %s" % (self.durl4, b))
+                self.assert_("pkg.hash.sha256" in self.output)
+
+                # Ensure that we can recv multi-hash packages into p5p files
+                p5p_path = os.path.join(self.test_root, "multi-hash.p5p")
+                self.pkgrecv(self.durl3, "-ad %s %s" % (p5p_path, b))
+                self.pkg("contents -g %s -m %s" % (p5p_path, b))
+                self.assert_("pkg.hash.sha256" in self.output)
+
+                # Finally, stop and start our scratch repository to clear the
+                # debug feature. If this doesn't happen because we've failed
+                # before now, it's not the end of the world.
+                self.dcs[3].stop()
+                self.dcs[3].unset_debug_feature("hash=sha1+sha256")
+                self.dcs[3].start()
 
 class TestPkgrecvHTTPS(pkg5unittest.HTTPSTestClass):
 
@@ -912,7 +975,7 @@
 
                 pkg5unittest.HTTPSTestClass.setUp(self, pubs,
                     start_depots=True)
-                
+
                 self.srurl = self.dcs[1].get_repo_url()
                 self.make_misc_files(self.misc_files)
                 self.pkgsend_bulk(self.srurl, self.example_pkg10)
--- a/src/tests/cli/t_pkgrepo.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkgrepo.py	Fri Oct 04 10:56:25 2013 +1300
@@ -111,6 +111,8 @@
             close
         """
 
+        # These hashes should remain as SHA-1 until such time as we bump the
+        # least-preferred hash for actions.
         fhashes = {
              "tmp/empty": "5f5fb715934e0fa2bfb5611fd941d33228027006",
              "tmp/truck1": "c9e257b659ace6c3fbc4d334f49326b3889fd109",
@@ -1354,6 +1356,38 @@
                                 continue
                         self.assert_(not os.listdir(rstore.file_root))
 
+                # Reset the src_repo for the rest of the test.
+                shutil.rmtree(src_repo)
+                self.create_repo(src_repo)
+                self.pkgrepo("set -s %s publisher/prefix=test" % src_repo)
+
+                published = self.pkgsend_bulk(src_repo, (self.tree10),
+                    debug_hash="sha1+sha256")
+
+                # Verify that we only have SHA-1 hashes in the rstore
+                repo = self.get_repo(src_repo)
+                known_hashes = self.fhashes.values()
+                for rstore in repo.rstores:
+                        if not rstore.publisher:
+                                continue
+                        for dir, dnames, fnames in os.walk(rstore.file_root):
+                                for f in fnames:
+                                        if f not in known_hashes:
+                                                self.assert_(False,
+                                                    "Unexpected content in "
+                                                    "repodir: %s" % f)
+
+                # Verify that when a repository has been published with multiple
+                # hashes, on removal, we only attempt to remove files using the
+                # least-preferred hash.
+                self.pkgrepo("remove -s %s tree" % src_repo)
+
+                # Verify repository file_root is empty.
+                for rstore in repo.rstores:
+                        if not rstore.publisher:
+                                continue
+                        self.assert_(not os.listdir(rstore.file_root))
+
                 # Cleanup.
                 shutil.rmtree(src_repo)
                 shutil.rmtree(dest_repo)
@@ -1831,8 +1865,8 @@
                 self.assert_("etc/truck1" in self.output)
                 self.assert_("etc/trailer" in self.output)
 
-                # finally, corrupt another file to see that we can also spot
-                # files that aren't gzipped.
+                # Corrupt another file to see that we can also spot files that
+                # aren't gzipped.
                 fmris += self.pkgsend_bulk(repo_path, (self.truck20))
                 bad_gzip_path = self.__inject_badhash("tmp/truck2",
                     valid_gzip=False)
@@ -1845,6 +1879,37 @@
                     self.output.count("ERROR: Corrupted gzip file") == 1)
                 self.assert_(bad_gzip_path in self.output)
 
+                # Check that when verifying content, we always use the most
+                # preferred hash. Remove all existing packages first.
+                self.pkgrepo("-s %s remove %s" % (repo_path, " ".join(fmris)))
+                fmris = self.pkgsend_bulk(repo_path, (self.tree10),
+                    debug_hash="sha1+sha256")
+                self.pkgrepo("-s %s verify" % repo_path, exit=0)
+
+                # break a file in the repository and ensure we spot it.
+                bad_hash_path = self.__inject_badhash("tmp/truck1")
+                bad_basename = os.path.basename(bad_hash_path)
+
+                self.pkgrepo("-s %s verify" % repo_path, exit=1)
+                self.assert_(
+                    self.output.count("ERROR: Invalid file hash") == 1)
+
+                # We should be verifying using the SHA-2 hash, and so we should
+                # only see the SHA-1 value in the output once, when printing
+                # the path to the file in the repository, not when reporting
+                # the computed or expected hash.
+                self.assert_(self.output.count(bad_basename) == 1)
+
+                # Verify that when we publish using SHA-1 only, that we get
+                # the SHA-1 value printed twice: once when printing the path
+                # to the file in the repository, and once when printing the
+                # expected hash.
+                self.pkgrepo("-s %s remove %s" % (repo_path, " ".join(fmris)))
+                fmris = self.pkgsend_bulk(repo_path, (self.tree10))
+                self.__inject_badhash("tmp/truck1")
+
+                self.pkgrepo("-s %s verify" % repo_path, exit=1)
+                self.assert_(self.output.count(bad_basename) == 2)
 
         def test_12_verify_badmanifest(self):
                 """Test that verify finds bad manifests."""
--- a/src/tests/cli/t_pkgsend.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkgsend.py	Fri Oct 04 10:56:25 2013 +1300
@@ -39,6 +39,7 @@
 
 from pkg import misc
 from pkg.actions import fromstr
+from pkg.digest import DEFAULT_HASH_FUNC
 import pkg.portable as portable
 
 
@@ -689,10 +690,11 @@
                                 f = file(fpath, "wb")
                                 f.write("test" + entry)
                                 f.close()
-                                # compute a digest of the file we just created, which
-                                # we can use when validating later.
+                                # compute a digest of the file we just created,
+                                # which we can use when validating later.
                                 contents_dict[entry][4] = \
-                                    misc.get_data_digest(fpath)[0]
+                                    misc.get_data_digest(fpath,
+                                    hash_func=DEFAULT_HASH_FUNC)[0]
 
                         elif ftype == "d":
                                 try:
@@ -845,9 +847,16 @@
                                 continue
 
                         if digest:
-                                pkg5_digest, contents = misc.get_data_digest(name, return_content=True)
+                                # the hash_func used here just needs to
+                                # correspond with the one used when creating
+                                # the svr4 package - it does not consult the
+                                # pkg(5) hash or chash attributes.
+                                pkg5_digest, contents = misc.get_data_digest(
+                                    name, return_content=True,
+                                    hash_func=DEFAULT_HASH_FUNC)
                                 self.assertEqual(digest, pkg5_digest,
-                                    "%s: %s != %s, '%s'" % (name, digest, pkg5_digest, contents))
+                                    "%s: %s != %s, '%s'" % (name, digest,
+                                    pkg5_digest, contents))
 
                         st = os.stat(os.path.join(self.img_path(), name))
                         if mode is not None:
@@ -1280,6 +1289,39 @@
                     add license license=copyright
                     close""", exit=1)
 
+        def test_26_pkgsend_multihash(self):
+                """Tests that when publishing packages with mutiple hashes,
+                we only overwrite those hashes if we're in multi-hash mode
+                and only if they match the hash attributes we know how to
+                compute, other attributes are left alone."""
+
+                # we use a file:// URI rather than the repo URI so we don't have
+                # to worry about starting the depot in SHA-2 mode. Other tests
+                # in the test suite ensure SHA-2 publication is working over
+                # HTTP.
+                furi = self.dc.get_repo_url()
+                mfpath = os.path.join(self.test_root, "pkgsend_multihash.mf")
+                payload = self.make_misc_files(["pkgsend_multihash"])[0]
+
+                with open(mfpath, "wb") as mf:
+                        mf.write("""
+set name=pkg.fmri value=pkg:/[email protected]
+file %s path=/foo owner=root group=sys mode=0644 pkg.hash.sha256=spaghetti \
+    pkg.hash.rot13=caesar
+""" % payload)
+                self.pkgsend("", "-s %s publish %s" % (furi, mfpath))
+                self.image_create(furi)
+                self.pkg("contents -rm multihash")
+                self.assert_("pkg.hash.sha256=spaghetti" in self.output)
+
+                self.pkgsend("", "-s %s publish %s" % (furi, mfpath),
+                    debug_hash="sha1+sha256")
+                self.pkg("refresh")
+
+                self.pkg("contents -rm multihash")
+                self.assert_("pkg.hash.sha256=spaghetti" not in self.output)
+                self.assert_("pkg.hash.rot13=caesar" in self.output)
+
 
 class TestPkgsendHardlinks(pkg5unittest.CliTestCase):
 
--- a/src/tests/cli/t_pkgsign.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkgsign.py	Fri Oct 04 10:56:25 2013 +1300
@@ -21,7 +21,7 @@
 #
 
 #
-# Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
 #
 
 import testutils
@@ -39,6 +39,7 @@
 import pkg.actions as action
 import pkg.actions.signature as signature
 import pkg.client.api_errors as apx
+import pkg.digest as digest
 import pkg.facet as facet
 import pkg.fmri as fmri
 import pkg.misc as misc
@@ -551,7 +552,7 @@
                             "ch5_ta1_cert.pem"),
                         "pkg": plist[0]
                     }
-                self.pkgsign(self.rurl1, sign_args)
+                self.pkgsign(self.rurl1, sign_args, debug_hash="sha1+sha256")
 
                 sign_args = "-k %(key)s -c %(cert)s %(name)s" % {
                     "name": plist[0],
@@ -565,6 +566,15 @@
                 self.pkg("set-property signature-policy verify")
                 api_obj = self.get_img_api_obj()
                 self._api_install(api_obj, ["example_pkg"])
+
+                # Make sure we've got exactly 1 signature with SHA2 hashes
+                self.pkg("contents -m")
+                self.assert_(self.output.count("pkg.chain.sha256") == 1)
+                self.assert_(self.output.count("pkg.chain.chashes") == 1)
+                # and SHA1 hashes on both signatures
+                self.assert_(self.output.count("chain=") == 2)
+                self.assert_(self.output.count("chain.chashes=") == 2)
+
                 self._api_uninstall(api_obj, ["example_pkg"])
                 self.pkg("set-property signature-policy require-signatures")
                 api_obj = self.get_img_api_obj()
@@ -969,7 +979,7 @@
                 self.write_img_manifest(pfmri, s)
 
                 DebugValues["manifest_validate"] = "Never"
-                
+
                 self.pkg("set-property signature-policy verify")
                 # This should fail because the text of manifest has changed
                 # so the hash should no longer validate.
@@ -2372,6 +2382,11 @@
                 # signature actions in it.
                 self.pkgsign_simple(self.rurl1, plist[0], exit=1)
 
+                # The addition of SHA-256 hashes should still result in us
+                # believing the signatures are identical
+                self.pkgsign_simple(self.rurl1, plist[0], exit=1,
+                    debug_hash="sha1+sha256")
+
                 self.pkg_image_create(self.rurl1)
                 self.seed_ta_dir("ta3")
                 self.pkg("set-property signature-policy verify")
@@ -2943,7 +2958,12 @@
                 fd, new_cert = tempfile.mkstemp(dir=self.test_root)
                 with os.fdopen(fd, "wb") as fh:
                         fh.write(cert.as_pem())
-                file_name = misc.get_data_digest(new_cert)[0]
+
+                # the file-store uses the least-preferred hash when storing
+                # content
+                alg = digest.HASH_ALGS[digest.REVERSE_RANKED_HASH_ATTRS[0]]
+                file_name = misc.get_data_digest(new_cert,
+                    hash_func=alg)[0]
                 subdir = os.path.join(cache_dir, file_name[:2])
                 os.mkdir(subdir)
                 fp = os.path.join(subdir, file_name)
@@ -2986,13 +3006,16 @@
                 fd, new_cert = tempfile.mkstemp(dir=self.test_root)
                 with os.fdopen(fd, "wb") as fh:
                         fh.write(cert.as_pem())
-                file_name = misc.get_data_digest(new_cert)[0]
-                subdir = os.path.join(cache_dir, file_name[:2])
-                os.mkdir(subdir)
-                fp = os.path.join(subdir, file_name)
-                fh = PkgGzipFile(fp, "wb")
-                fh.write(cert.as_pem())
-                fh.close()
+                for attr in digest.DEFAULT_HASH_ATTRS:
+                        alg = digest.HASH_ALGS[attr]
+                        file_name = misc.get_data_digest(new_cert,
+                            hash_func=alg)[0]
+                        subdir = os.path.join(cache_dir, file_name[:2])
+                        os.mkdir(subdir)
+                        fp = os.path.join(subdir, file_name)
+                        fh = PkgGzipFile(fp, "wb")
+                        fh.write(cert.as_pem())
+                        fh.close()
 
                 self.pkgrecv(self.rurl2, "-c %s -d %s '*'" %
                     (cache_dir, self.rurl1))
--- a/src/tests/cli/t_pkgsurf.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_pkgsurf.py	Fri Oct 04 10:56:25 2013 +1300
@@ -30,23 +30,14 @@
 import pkg5unittest
 
 import os
-import pkg.catalog as catalog
-import pkg.config as cfg
-import pkg.client.pkgdefs as pkgdefs
+import pkg.digest as digest
 import pkg.fmri as fmri
 import pkg.manifest as manifest
 import pkg.misc as misc
-import pkg.p5p as p5p
-import pkg.portable as portable
-import pkg.server.repository as repo
 import shutil
 import subprocess
 import tempfile
-import time
-import urllib
-import urlparse
 import unittest
-import zlib
 
 class TestPkgsurf(pkg5unittest.ManyDepotTestCase):
         # Cleanup after every test.
@@ -57,7 +48,7 @@
         # Since we publish the expected package to an additional repo, we have
         # to set the timestamps to make sure the target and expected packages
         # are equal.
-        
+
         # The test cases are mainly in the different types of packages we
         # have in the repo.
 
@@ -122,7 +113,7 @@
 
         hammerhead_exp = hammerhead_targ
 
-        # Package has only dep change but dependency package changed, 
+        # Package has only dep change but dependency package changed,
         # should not be reversioned.
         blue_ref = """
             open [email protected],5.11-0:20000101T000000Z
@@ -152,7 +143,7 @@
         """
 
         bull_exp = bull_targ
-        
+
         # Package has only dep change and dependency package didn't change,
         # should be reversioned.
         mako_ref = """
@@ -188,7 +179,7 @@
         # dependencies should be fixed.
         # Pkg has all sorts of actions to make sure everything gets moved
         # correctly.
-        
+
         angel_ref = """
             open [email protected],5.11-0:20000101T000000Z
             add file tmp/bat mode=0444 owner=root group=bin path=/etc/angel
@@ -236,7 +227,7 @@
 
         # Package has content change and depends on package which didn't get
         # reversioned, shouldn't be touched.
-        
+
         horn_ref = """
             open [email protected],5.11-0:20000101T000000Z
             add file tmp/bat mode=0444 owner=root group=bin path=/etc/horn
@@ -254,9 +245,9 @@
         horn_exp = horn_targ
 
 
-        # Package has content change but has require-any dep on package which 
+        # Package has content change but has require-any dep on package which
         # got reversioned, dependencies should be fixed.
-        
+
         lemon_ref = """
             open [email protected],5.11-0:20000101T000000Z
             add file tmp/bat mode=0444 owner=root group=bin path=/etc/lemon
@@ -283,7 +274,7 @@
         # version. The version of the pkg in the ref repo should be substituted
         # for tiger but not for sandtiger (since dep pkg is still successor of
         # dep FMRI).
-        
+
         leopard_ref = """
             open [email protected],5.11-0:20000101T000000Z
             add file tmp/bat mode=0444 owner=root group=bin path=/etc/leopard
@@ -347,18 +338,18 @@
         # Package has no content change but a change in an attribute,
         # should be treated as content change by default but reversioned if
         # proper CLI options are given (goblin_exp is just for the default
-        # behavior, gets modified in actual test case) 
+        # behavior, gets modified in actual test case)
 
         goblin_ref = """
             open [email protected],5.11-0:20000101T000000Z
-            add set name=info.home value="deep sea" 
+            add set name=info.home value="deep sea"
             add file tmp/bat mode=0444 owner=root group=bin path=/etc/goblin
             close
         """
 
         goblin_targ = """
             open [email protected],5.11-0:20000101T000000Z
-            add set name=info.home value="deeper sea" 
+            add set name=info.home value="deeper sea"
             add file tmp/bat mode=0444 owner=root group=bin path=/etc/goblin
             close
         """
@@ -422,7 +413,7 @@
             close
         """
 
-        sleeper_exp = sleeper_ref    
+        sleeper_exp = sleeper_ref
 
 
         # Check for correct handling of Varcets. Pkg contains same dep FMRI stem
@@ -456,7 +447,7 @@
         """
 
         # Pkg in ref repo is newer than the one in target.
-        # Should not be reversioned. 
+        # Should not be reversioned.
         thresher_ref = """
             open [email protected],5.11-0:20000101T000000Z
             close
@@ -470,7 +461,7 @@
         thresher_exp = thresher_targ
 
         # Package only found in target, not in ref.
-        # Package has a dep on a reversioned pkg, but the reversioned pkg is 
+        # Package has a dep on a reversioned pkg, but the reversioned pkg is
         # still a successor of the dep FMRI.
         # The dep should not be changed.
         bamboo_targ = """
@@ -480,7 +471,7 @@
         """
 
         bamboo_exp = bamboo_targ
-        
+
 
         # Create some packages for an additional publisher
         humpback_targ = """
@@ -488,10 +479,10 @@
             close
         """
 
-        humpback_ref = """                                                     
-            open pkg://cetacea/[email protected],5.11-0:20000101T000000Z             
-            close                                                               
-        """                                                                     
+        humpback_ref = """
+            open pkg://cetacea/[email protected],5.11-0:20000101T000000Z
+            close
+        """
 
         humpback_exp = humpback_targ
 
@@ -522,7 +513,7 @@
                                 pass
                         self.targ_pkgs.append(getattr(self, targ))
                         self.exp_pkgs.append(getattr(self, exp))
-                        
+
                 pkg5unittest.ManyDepotTestCase.setUp(self, ["selachii",
                     "selachii", "selachii", "selachii"], start_depots=True)
 
@@ -543,9 +534,9 @@
                 self.published_exp = self.pkgsend_bulk(self.dpath3,
                     self.exp_pkgs)
 
-                # keep a tmp repo to copy the target into for each new test 
+                # keep a tmp repo to copy the target into for each new test
                 self.dpath_tmp = self.dcs[4].get_repodir()
-                
+
         def test_0_options(self):
                 """Check for correct input handling."""
                 self.pkgsurf("-x", exit=2)
@@ -584,7 +575,7 @@
                 self.pkgsurf("-s %s -r %s" % (tempdir, self.dpath1), exit=1)
                 self.pkgsurf("-s %s -r %s" % (self.dpath1, tempdir), exit=1)
 
-                # Repo empty 
+                # Repo empty
                 self.pkgrepo("create -s %s" % tempdir)
                 self.pkgsurf("-s %s -r %s" % (tempdir, self.dpath1), exit=1)
                 self.pkgsurf("-s %s -r %s" % (self.dpath1, tempdir), exit=1)
@@ -595,7 +586,7 @@
                 self.assertTrue("No packages to reversion." in self.output)
                 self.pkgsurf("-s %s -r %s" % (self.dpath1, tempdir))
                 self.assertTrue("No packages to reversion." in self.output)
-                shutil.rmtree(tempdir)             
+                shutil.rmtree(tempdir)
 
                 # Now check if it actually works.
                 self.pkgsurf("-s %s -r %s" % (self.dpath_tmp, self.dpath1))
@@ -632,7 +623,7 @@
                 # Just run again and see if goblin pkg now gets reversioned.
                 self.pkgsurf("-s %s -r %s -i info.home" % (self.dpath_tmp,
                     self.dpath1))
-                
+
                 # Find goblin package
                 for s in self.published_ref:
                         if "goblin" in s:
@@ -640,8 +631,10 @@
                 f = fmri.PkgFmri(s, None)
                 targ = targ_repo.manifest(f)
                 ref = ref_repo.manifest(f)
-                self.assertEqual(misc.get_data_digest(targ),
-                    misc.get_data_digest(ref))
+                self.assertEqual(misc.get_data_digest(targ,
+                    hash_func=digest.DEFAULT_HASH_FUNC),
+                    misc.get_data_digest(ref,
+                    hash_func=digest.DEFAULT_HASH_FUNC))
 
                 # Check that running the tool again doesn't find any pkgs
                 # to reversion. Use http for accessing reference repo this time.
--- a/src/tests/cli/t_sysrepo.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/cli/t_sysrepo.py	Fri Oct 04 10:56:25 2013 +1300
@@ -49,6 +49,8 @@
 import pkg.misc as misc
 import pkg.portable as portable
 
+from pkg.digest import DEFAULT_HASH_FUNC
+
 SYSREPO_USER = "pkg5srv"
 
 class TestBasicSysrepoCli(pkg5unittest.ApacheDepotTestCase):
@@ -849,8 +851,10 @@
                 os.rename(repo_dir, repo_dir + ".new")
                 try:
                         self.sysrepo("", stderr=True)
-                        self.assert_(misc.get_data_digest(sysrepo_conf)[0] ==
-                            misc.get_data_digest(saved_sysrepo_conf)[0],
+                        self.assert_(misc.get_data_digest(sysrepo_conf,
+                            hash_func=DEFAULT_HASH_FUNC)[0] ==
+                            misc.get_data_digest(saved_sysrepo_conf,
+                            hash_func=DEFAULT_HASH_FUNC)[0],
                             "system repository configuration changed "
                             "unexpectedly.")
                 finally:
--- a/src/tests/pkg5unittest.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/tests/pkg5unittest.py	Fri Oct 04 10:56:25 2013 +1300
@@ -2447,8 +2447,14 @@
                     su_wrap=su_wrap, env_arg=env_arg)
 
         def pkgrepo(self, command, comment="", exit=0, su_wrap=False,
-            env_arg=None, stderr=False, out=False):
-                cmdline = "%s/usr/bin/pkgrepo %s" % (g_proto_area, command)
+            env_arg=None, stderr=False, out=False, debug_hash=None):
+                if debug_hash:
+                        debug_arg = "-D hash=%s " % debug_hash
+                else:
+                        debug_arg = ""
+
+                cmdline = "%s/usr/bin/pkgrepo %s%s" % (g_proto_area, debug_arg,
+                    command)
                 return self.cmdline_run(cmdline, comment=comment, exit=exit,
                     su_wrap=su_wrap, env_arg=env_arg, out=out, stderr=stderr)
 
@@ -2459,11 +2465,14 @@
                     su_wrap=su_wrap, env_arg=env_arg, out=out, stderr=stderr)
 
         def pkgsign(self, depot_url, command, exit=0, comment="",
-            env_arg=None):
+            env_arg=None, debug_hash=None):
                 args = []
                 if depot_url:
                         args.append("-s %s" % depot_url)
 
+                if debug_hash:
+                        args.append("-D hash=%s" % debug_hash)
+
                 if command:
                         args.append(command)
 
@@ -2472,7 +2481,8 @@
                 return self.cmdline_run(cmdline, comment=comment, exit=exit,
                     env_arg=env_arg)
 
-        def pkgsign_simple(self, depot_url, pkg_name, exit=0, env_arg=None):
+        def pkgsign_simple(self, depot_url, pkg_name, exit=0, env_arg=None,
+            debug_hash=None):
                 chain_cert_path = os.path.join(self.chain_certs_dir,
                     "ch1_ta3_cert.pem")
                 sign_args = "-k %(key)s -c %(cert)s -i %(ch1)s %(name)s" % {
@@ -2482,16 +2492,23 @@
                     "ch1": chain_cert_path,
                 }
                 return self.pkgsign(depot_url, sign_args, exit=exit,
-                    env_arg=env_arg)
+                    env_arg=env_arg, debug_hash=debug_hash)
 
         def pkgsend(self, depot_url="", command="", exit=0, comment="",
-            allow_timestamp=False, env_arg=None, su_wrap=False):
+            allow_timestamp=False, env_arg=None, su_wrap=False,
+            debug_hash=None):
                 args = []
                 if allow_timestamp:
                         args.append("-D allow-timestamp")
                 if depot_url:
                         args.append("-s " + depot_url)
 
+                # debug_hash lets us choose the type of hash attributes that
+                # should be added to this package on publication. Valid values
+                # are: sha1, sha1+sha256, sha256
+                if debug_hash:
+                        args.append("-D hash=%s" % debug_hash)
+
                 if command:
                         args.append(command)
 
@@ -2536,7 +2553,8 @@
                 return retcode, published
 
         def pkgsend_bulk(self, depot_url, commands, exit=0, comment="",
-            no_catalog=False, refresh_index=False, su_wrap=False):
+            no_catalog=False, refresh_index=False, su_wrap=False,
+            debug_hash=None):
                 """ Send a series of packaging commands; useful  for quickly
                     doing a bulk-load of stuff into the repo.  All commands are
                     expected to work; if not, the transaction is abandoned.  If
@@ -2603,7 +2621,8 @@
                                                 retcode, published = \
                                                     self.pkgsend(depot_url, cmd,
                                                     allow_timestamp=True,
-                                                    su_wrap=su_wrap)
+                                                    su_wrap=su_wrap,
+                                                    debug_hash=debug_hash)
                                                 if retcode == 0 and published:
                                                         plist.append(published)
                                         except:
@@ -2621,7 +2640,8 @@
 
                         if exit == 0 and refresh_index:
                                 self.pkgrepo("-s %s refresh --no-catalog" %
-                                    depot_url, su_wrap=su_wrap)
+                                    depot_url, su_wrap=su_wrap,
+                                    debug_hash=debug_hash)
                 except UnexpectedExitCodeException, e:
                         if e.exitcode != exit:
                                 raise
--- a/src/util/apache2/depot/depot_index.py	Thu Oct 03 09:25:02 2013 +0530
+++ b/src/util/apache2/depot/depot_index.py	Fri Oct 04 10:56:25 2013 +1300
@@ -22,7 +22,6 @@
 # Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
 
 import cherrypy
-import hashlib
 import httplib
 import logging
 import mako
@@ -35,6 +34,7 @@
 import urllib
 import Queue
 
+import pkg.digest as digest
 import pkg.p5i
 import pkg.server.api
 import pkg.server.repository as sr
@@ -295,7 +295,7 @@
 
                 for prefix in repo_paths:
                         path = repo_paths[prefix]
-                        repo_hash = hashlib.sha1(path).hexdigest()
+                        repo_hash = digest.DEFAULT_HASH_FUNC(path).hexdigest()
                         index_dir = os.path.sep.join(
                             [self.cache_dir, "indexes", repo_hash])