17649505 pkgrepo should offer a way to compare repositories s12b75
authorXiaobo Shen <xiaobo.shen@oracle.com>
Wed, 27 May 2015 11:41:38 -0700
changeset 3210 2bcfa8dfa03c
parent 3209 917d0dd21063
child 3211 622c5b8a6b93
17649505 pkgrepo should offer a way to compare repositories
src/man/pkgrepo.1
src/pkg/external_deps.txt
src/pkg/manifests/developer:opensolaris:pkg5.p5m
src/pkgrepo.py
src/tests/cli/t_pkgrepo.py
--- a/src/man/pkgrepo.1	Fri May 22 11:23:28 2015 -0700
+++ b/src/man/pkgrepo.1	Wed May 27 11:41:38 2015 -0700
@@ -7,11 +7,11 @@
 
 <refentry id="pkgrepo-1">
 <refmeta><refentrytitle>pkgrepo</refentrytitle><manvolnum>1</manvolnum>
-<refmiscinfo class="date">4 Nov 2014</refmiscinfo>
+<refmiscinfo class="date">26 May 2015</refmiscinfo>
 <refmiscinfo class="sectdesc">&man1;</refmiscinfo>
 <refmiscinfo class="software">&release;</refmiscinfo>
 <refmiscinfo class="arch">generic</refmiscinfo>
-<refmiscinfo class="copyright">Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.</refmiscinfo>
+<refmiscinfo class="copyright">Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.</refmiscinfo>
 </refmeta>
 <refnamediv>
 <refname>pkgrepo</refname><refpurpose>Image Packaging System repository management utility</refpurpose></refnamediv>
@@ -49,6 +49,9 @@
     -s <replaceable>repo_uri_or_path</replaceable></synopsis>
 <synopsis>/usr/bin/pkgrepo fix [-v] [-p <replaceable>publisher</replaceable>]...
     -s <replaceable>repo_uri_or_path</replaceable></synopsis>
+<synopsis>/usr/bin/pkgrepo diff [-vq] [--strict] [--parsable] [-p <replaceable>publisher</replaceable>]...
+    -s <replaceable>first_repo_uri_or_path</replaceable> [--key <replaceable>ssl_key</replaceable> --cert <replaceable>ssl_cert</replaceable>]...
+    -s <replaceable>second_repo_uri_or_path</replaceable> [--key <replaceable>ssl_key</replaceable> --cert <replaceable>ssl_cert</replaceable>]...</synopsis>
 <synopsis>/usr/bin/pkgrepo help</synopsis>
 <synopsis>/usr/bin/pkgrepo version</synopsis>
 </refsynopsisdiv>
@@ -585,6 +588,58 @@
 </variablelist>
 </listitem>
 </varlistentry>
+<varlistentry><term><command>pkgrepo diff</command> [<option>vq</option>] [<option>-strict</option>] [<option>-parsable</option>] [<option>p</option> <replaceable>publisher</replaceable>]... <option>s</option> <replaceable>first_repo_uri_or_path</replaceable>... [<option>-key</option> <replaceable>ssl_key</replaceable> <option>-cert</option> <replaceable>ssl_cert</replaceable>]... <option>s</option> <replaceable>second_repo_uri_or_path</replaceable>... [<option>-key</option> <replaceable>ssl_key</replaceable> <option>-cert</option> <replaceable>ssl_cert</replaceable>]...</term>
+<listitem><para>Compare two repositories and show the differences.</para>
+<para><literal>-</literal> symbol in the beginning of an output
+line indicates the item found only in the first repository, while
+<literal>+</literal> symbol indicates the item found only in the second
+repository. No symbol in the beginning means it is a common item.
+</para>
+<variablelist termlength="wholeline">
+<varlistentry><term><option>v</option></term>
+<listitem><para>Include output detailing the comparison including per-fmri
+output.</para>
+</listitem>
+</varlistentry>
+<varlistentry><term><option>q</option></term>
+<listitem><para>Perform quiet comparison; no output will be generated.</para>
+</listitem>
+</varlistentry>
+<varlistentry><term><option>-strict</option></term>
+<listitem><para>Compare catalog last modified time stamp. This is useful to
+determine whether one repository is the exact clone of the other one.</para>
+</listitem>
+<varlistentry><term><option>-parsable</option></term>
+<listitem><para>Generate parsable output in JSON format.</para>
+</listitem>
+</varlistentry>
+</varlistentry>
+<varlistentry><term><option>p</option> <replaceable>publisher</replaceable></term>
+<listitem><para>Perform the operation only for the specified publisher. If
+no publisher is specified, or if the special value <literal>all</literal> is
+specified, the operation is performed for all publishers. This option can
+be specified multiple times.</para>
+</listitem>
+</varlistentry>
+<varlistentry><term><option>s</option> <replaceable>repo_uri_or_path</replaceable></term>
+<listitem><para>Operate on the repository located at the given URI or file
+system path.</para>
+</listitem>
+</varlistentry>
+<varlistentry><term><option>-key</option> <replaceable>ssl_key</replaceable> <option>-cert</option> <replaceable>ssl_cert</replaceable></term>
+<listitem><para>Use the <option>-key</option> option to specify a client SSL
+key file to use for package retrieval from an HTTPS repository. Use the <option>-cert
+</option> option to specify a client SSL certificate file to use for package
+retrieval from an HTTPS repository. This option pair can be specified multiple
+times. The pair specified after the first <option>s</option> but before the
+second <option>s</option> will be applied to the first repository. The pair
+specified after the second <option>s</option> will be applied to the second
+repository.</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</listitem>
+</varlistentry>
 <varlistentry><term><command>pkgrepo help</command></term>
 <listitem><para>Display a usage message.</para>
 </listitem>
@@ -663,7 +718,8 @@
 <para>The following exit values are returned:</para>
 <variablelist>
 <varlistentry><term><returnvalue>0</returnvalue></term>
-<listitem><para>Command succeeded.</para>
+<listitem><para>Command succeeded (or no differences encountered
+for pkgrepo diff).</para>
 </listitem>
 </varlistentry>
 <varlistentry><term><returnvalue>1</returnvalue></term>
@@ -683,6 +739,10 @@
 <listitem><para>No changes were made, nothing to do.</para>
 </listitem>
 </varlistentry>
+<varlistentry><term><returnvalue>5</returnvalue></term>
+<listitem><para>Differences found for pkgrepo diff.</para>
+</listitem>
+</varlistentry>
 <varlistentry><term><returnvalue>99</returnvalue></term>
 <listitem><para>An unanticipated exception occurred.</para>
 </listitem>
--- a/src/pkg/external_deps.txt	Fri May 22 11:23:28 2015 -0700
+++ b/src/pkg/external_deps.txt	Wed May 27 11:41:38 2015 -0700
@@ -17,6 +17,7 @@
     pkg:/library/python/m2crypto-27
     pkg:/library/python/mako-27
     pkg:/library/python/ply-27
+    pkg:/library/python/prettytable-27
     pkg:/library/python/pybonjour-27
     pkg:/library/python/pycurl-27
     pkg:/library/python/pyopenssl-27
--- a/src/pkg/manifests/developer:opensolaris:pkg5.p5m	Fri May 22 11:23:28 2015 -0700
+++ b/src/pkg/manifests/developer:opensolaris:pkg5.p5m	Wed May 27 11:41:38 2015 -0700
@@ -40,6 +40,7 @@
 depend type=require fmri=pkg:/library/python/jsonrpclib-27
 depend type=require fmri=pkg:/library/python/jsonschema-27
 depend type=require fmri=pkg:/library/python/locale-services
+depend type=require fmri=pkg:/library/python/prettytable-27
 depend type=require fmri=pkg:/package/svr4
 depend type=require fmri=pkg:/runtime/python-27
 depend type=require fmri=pkg:/runtime/python-34
--- a/src/pkgrepo.py	Fri May 22 11:23:28 2015 -0700
+++ b/src/pkgrepo.py	Wed May 27 11:41:38 2015 -0700
@@ -31,10 +31,17 @@
 EXIT_OOPS    = 1
 EXIT_BADOPT  = 2
 EXIT_PARTIAL = 3
+EXIT_DIFF = 10
 
 # listing constants
 LISTING_FORMATS = ("default", "json", "json-formatted", "tsv")
 
+# diff type
+MINUS = -1
+PLUS = 1
+COMMON = 0
+diff_type_f = {MINUS: "- ", PLUS: "+ ", COMMON: ""}
+
 # globals
 tmpdirs = []
 
@@ -47,6 +54,7 @@
 import locale
 import logging
 import os
+import operator
 import shlex
 import shutil
 import sys
@@ -59,6 +67,7 @@
 from pkg.client import global_settings
 from pkg.client.debugvalues import DebugValues
 from pkg.misc import msg, PipeError
+from prettytable import PrettyTable
 import pkg
 import pkg.catalog
 import pkg.client.api_errors as apx
@@ -66,8 +75,10 @@
 import pkg.client.progress
 import pkg.client.publisher as publisher
 import pkg.client.transport.transport as transport
+import pkg.fmri as fmri
 import pkg.misc as misc
 import pkg.server.repository as sr
+import simplejson as json
 
 logger = global_settings.logger
 
@@ -170,6 +181,10 @@
 
      pkgrepo fix [-v] [-p publisher ...] -s repo_uri_or_path
 
+     pkgrepo diff [-vq] [--strict] [--parsable] [-p publisher ...]
+         -s first_repo_uri_or_path [--key ssl_key ... --cert ssl_cert ...]
+         -s second_repo_uri_or_path [--key ssl_key ... --cert ssl_cert ...]
+
      pkgrepo help
      pkgrepo version
 
@@ -279,9 +294,8 @@
             root=path)
 
 
-def setup_transport(conf, subcommand=None, prefix=None, verbose=False,
+def setup_transport(repo_uri, subcommand=None, prefix=None, verbose=False,
     remote_prefix=True, ssl_key=None, ssl_cert=None):
-        repo_uri = conf.get("repo_uri", None)
         if not repo_uri:
                 usage(_("No repository location specified."), cmd=subcommand)
 
@@ -315,7 +329,6 @@
 
         return xport, src_pub, tmp_dir
 
-
 def subcmd_add_publisher(conf, args):
         """Add publisher(s) to the specified repository."""
 
@@ -529,8 +542,8 @@
         if not conf.get("repo_uri", None):
                 usage(_("A package repository location must be provided "
                     "using -s."), cmd=subcommand)
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
-            ssl_key=key, ssl_cert=cert)
+        xport, xpub, tmp_dir = setup_transport(conf.get("repo_uri"),
+            subcommand=subcommand, ssl_key=key, ssl_cert=cert)
 
         # Get properties.
         if pubs:
@@ -612,7 +625,7 @@
 
 
 def _get_matching_pubs(subcommand, pubs, xport, xpub, out_format="default",
-    use_transport=False):
+    use_transport=False, repo_uri=None):
 
         # Retrieve publisher information.
         pub_data = xport.get_publisherdata(xpub)
@@ -637,8 +650,11 @@
         elif pubs and not found:
                 if out_format == "default":
                         # Don't pollute other output formats.
-                        error(_("no matching publishers found"),
-                            cmd=subcommand)
+                        err_msg = _("no matching publishers found")
+                        if repo_uri:
+                                err_msg = _("no matching publishers found in "
+                                    "repository: {0}").format(repo_uri)
+                        error(err_msg, cmd=subcommand)
                 return EXIT_OOPS, None, None
         return rval, found, pub_data
 
@@ -799,8 +815,8 @@
         if not conf.get("repo_uri", None):
                 usage(_("A package repository location must be provided "
                     "using -s."), cmd=subcommand)
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
-            ssl_key=key, ssl_cert=cert)
+        xport, xpub, tmp_dir = setup_transport(conf.get("repo_uri"),
+            subcommand=subcommand, ssl_key=key, ssl_cert=cert)
 
         # Retrieve repository status information.
         stat_idx = xport.get_status(xpub)
@@ -902,8 +918,8 @@
         if not conf.get("repo_uri", None):
                 usage(_("A package repository location must be provided "
                     "using -s."), cmd=subcommand)
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
-            ssl_key=key, ssl_cert=cert)
+        xport, xpub, tmp_dir = setup_transport(conf.get("repo_uri"),
+            subcommand=subcommand, ssl_key=key, ssl_cert=cert)
 
         rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
             xpub, out_format=out_format, use_transport=True)
@@ -1073,8 +1089,8 @@
                 usage(_("A package repository location must be provided "
                     "using -s."), cmd=subcommand)
 
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
-            ssl_key=key, ssl_cert=cert)
+        xport, xpub, tmp_dir = setup_transport(conf.get("repo_uri"),
+            subcommand=subcommand, ssl_key=key, ssl_cert=cert)
 
         rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
             xpub, use_transport=True)
@@ -1202,8 +1218,8 @@
                 elif build_index:
                         xport.publish_rebuild_indexes(xpub)
 
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
-            ssl_key=key, ssl_cert=cert)
+        xport, xpub, tmp_dir = setup_transport(conf.get("repo_uri"),
+            subcommand=subcommand, ssl_key=key, ssl_cert=cert)
         rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
             xpub)
         if rval == EXIT_OOPS:
@@ -1315,8 +1331,8 @@
                 elif refresh_index:
                         xport.publish_refresh_indexes(xpub)
 
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand,
-            ssl_key=key, ssl_cert=cert)
+        xport, xpub, tmp_dir = setup_transport(conf.get("repo_uri"),
+            subcommand=subcommand, ssl_key=key, ssl_cert=cert)
         rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
             xpub)
         if rval == EXIT_OOPS:
@@ -1674,7 +1690,7 @@
                 usage(_("-d or -i option cannot be used when dependency "
                     "verification is disabled."), cmd=subcommand)
 
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand)
+        xport, xpub, tmp_dir = setup_transport(repo_uri, subcommand=subcommand)
         rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
             xpub)
 
@@ -1698,7 +1714,7 @@
 
         found_pubs = []
         for pfx in found:
-                xport, xpub, tmp_dir = setup_transport(conf, prefix=pfx,
+                xport, xpub, tmp_dir = setup_transport(repo_uri, prefix=pfx,
                     remote_prefix=False,
                     subcommand=subcommand)
                 xpub.transport = xport
@@ -1752,14 +1768,13 @@
                 usage(_("Network repositories are not currently supported "
                     "for this operation."), cmd=subcommand)
 
-        xport, xpub, tmp_dir = setup_transport(conf, subcommand=subcommand)
+        xport, xpub, tmp_dir = setup_transport(repo_uri, subcommand=subcommand)
         rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
             xpub)
         if rval == EXIT_OOPS:
                 return rval
 
         logger.info("Initiating repository fix.")
-        progtrack = get_tracker()
 
         def verify_cb(tracker, verify_tuple):
                 """A method passed to sr.Repository.fix(..) to emit verify
@@ -1778,7 +1793,7 @@
 
         found_pubs = []
         for pfx in found:
-                xport, xpub, tmp_dir = setup_transport(conf, prefix=pfx,
+                xport, xpub, tmp_dir = setup_transport(repo_uri, prefix=pfx,
                     remote_prefix=False,
                     subcommand=subcommand)
                 xpub.transport = xport
@@ -1829,6 +1844,410 @@
                 return EXIT_OOPS
         return EXIT_OK
 
+def __get_pub_fmris(pub, xport, tmp_dir):
+        if not pub.meta_root:
+                # Create a temporary directory for catalog.
+                cat_dir = tempfile.mkdtemp(prefix="pkgrepo-diff.", dir=tmp_dir)
+                pub.meta_root = cat_dir
+                pub.transport = xport
+                pub.refresh(full_refresh=True, immediate=True)
+
+        pkgs, fmris, unmatched = pub.catalog.get_matching_fmris("*")
+        fmris = [f for f in fmris]
+        return fmris, pkgs
+
+def __format_diff(diff_type, subject):
+        """formatting diff output.
+        diff_type: can be MINUS, PLUS or COMMON.
+
+        subject: can be a publisher or a package.
+        """
+
+        format_pub = "{0}{1}"
+        format_fmri = "        {0}{1}"
+        format_str = "        {0}{1}"
+        text = ""
+        if isinstance(subject, publisher.Publisher):
+                text = format_pub.format(diff_type_f[diff_type],
+                    subject.prefix)
+        elif isinstance(subject, fmri.PkgFmri):
+                text = format_fmri.format(diff_type_f[diff_type],
+                    str(subject))
+        else:
+                text = format_str.format(diff_type_f[diff_type],
+                    subject)
+        return text
+
+def __sorted(subject, stype=None):
+        if stype == "pub":
+                skey = operator.attrgetter("prefix")
+                return sorted(subject, key=skey)
+        return sorted(subject)
+
+def __emit_msg(diff_type, subject):
+        text = __format_diff(diff_type, subject)
+        msg(text)
+
+def __repo_diff(conf, pubs, xport, rpubs, rxport, tmp_dir, verbose, quiet,
+    compare_ts, compare_cat, parsable):
+        """Determine the differences between two repositories."""
+
+        same_repo = True
+        if conf["repo_uri"].scheme == "file":
+                conf["repo_uri"] = conf["repo_uri"].get_pathname()
+        if conf["com_repo_uri"].scheme == "file":
+                conf["com_repo_uri"] = conf["com_repo_uri"].get_pathname()
+
+        foundpfx = set([pub.prefix for pub in pubs])
+        rfoundpfx = set([pub.prefix for pub in rpubs])
+
+        minus_pfx = __sorted(foundpfx - rfoundpfx)
+        minus_pubs = __sorted([pub for pub in pubs if pub.prefix in minus_pfx],
+            stype="pub")
+        plus_pfx = __sorted(rfoundpfx - foundpfx)
+        plus_pubs = __sorted([pub for pub in rpubs if pub.prefix in plus_pfx],
+            stype="pub")
+
+        if minus_pubs or plus_pubs:
+                same_repo = False
+                if quiet:
+                        return EXIT_DIFF
+
+        pcommon_set = foundpfx & rfoundpfx
+        common_pubs = __sorted([p for p in pubs if p.prefix in pcommon_set],
+            stype="pub")
+        common_rpubs = __sorted([p for p in rpubs if p.prefix in pcommon_set],
+            stype="pub")
+
+        res_dict = {"table_legend": [["Repo1", str(conf["repo_uri"])],
+                ["Repo2", str(conf["com_repo_uri"])]],
+            "table_header": [_("Publisher"),
+                # This is a table column header which tells that this
+                # row shows number of packages found in specific
+                # repository only.
+                # Use terse translation to avoid too-wide header.
+                _("{repo} only").format(repo="Repo1"),
+                _("{repo} only").format(repo="Repo2"),
+                # This is a table column header which tells that this
+                # row shows number of packages found in both
+                # repositories being compared together.
+                # Use terse translation to avoid too-wide header.
+                _("In both"), _("Total")],
+            # Row based table contents.
+            "table_data": []
+            }
+
+        verbose_res_dict = {"plus_pubs": [], "minus_pubs": [],
+            "common_pubs": []}
+
+        def __diff_pub_helper(pub, symbol):
+                fmris, pkgs = __get_pub_fmris(pub, xport, tmp_dir)
+                # Summary level.
+                if not verbose:
+                        td_row = [pub.prefix,
+                            {"packages": len(pkgs), "versions": len(fmris)},
+                            None, {"packages": 0, "versions": 0},
+                            {"packages": len(pkgs), "versions": len(fmris)}]
+                        if symbol == PLUS:
+                                td_row[1], td_row[2] = td_row[2], td_row[1]
+                        res_dict["table_data"].append(td_row)
+                        return
+
+                if parsable:
+                        key_name = "minus_pubs"
+                        if symbol == PLUS:
+                                key_name = "plus_pubs"
+                        verbose_res_dict[key_name].append(
+                            {"publisher": pub.prefix, "packages": len(pkgs),
+                            "versions": len(fmris)})
+                        return
+
+                __emit_msg(symbol, pub)
+                __emit_msg(symbol, _("({0:d} package(s) with "
+                    "{1:d} different version(s))").format(len(pkgs),
+                    len(fmris)))
+
+        for pub in minus_pubs:
+                __diff_pub_helper(pub, MINUS)
+
+        for pub in plus_pubs:
+                __diff_pub_helper(pub, PLUS)
+
+        for pub, rpub in zip(common_pubs, common_rpubs):
+                # Indicates whether those two pubs have same pkgs.
+                same_pkgs = True
+                same_cat = True
+                fmris, pkgs = __get_pub_fmris(pub, xport, tmp_dir)
+                rfmris, rpkgs = __get_pub_fmris(rpub, rxport, tmp_dir)
+                fmris_str = set([str(f) for f in fmris])
+                rfmris_str = set([str(f) for f in rfmris])
+                del fmris, rfmris
+
+                minus_fmris = __sorted(fmris_str - rfmris_str)
+                plus_fmris = __sorted(rfmris_str - fmris_str)
+                if minus_fmris or plus_fmris:
+                        same_repo = False
+                        same_pkgs = False
+                        if quiet:
+                                return EXIT_DIFF
+
+                cat_lm_pub = None
+                cat_lm_rpub = None
+                if compare_cat:
+                        cat_lm_pub = pub.catalog.last_modified.isoformat()
+                        cat_lm_rpub = rpub.catalog.last_modified.isoformat()
+                        same_cat = same_repo = cat_lm_pub == cat_lm_rpub
+                        if not same_cat and quiet:
+                                return EXIT_DIFF
+
+                common_fmris = fmris_str & rfmris_str
+                pkg_set = set(pkgs.keys())
+                rpkg_set = set(rpkgs.keys())
+                del pkgs, rpkgs
+                common_pkgs = pkg_set & rpkg_set
+
+                # Print summary.
+                if not verbose:
+                        if not same_cat:
+                                # Common publishers with different catalog
+                                # modification time.
+                                res_dict.setdefault("nonstrict_pubs", []
+                                    ).append(pub.prefix)
+
+                        # Add to the table only if there are differences
+                        # for this publisher.
+                        if not same_pkgs:
+                                minus_pkgs = pkg_set - rpkg_set
+                                minus_pkg_vers = {"packages": len(minus_pkgs),
+                                    "versions": len(minus_fmris)}
+                                del minus_pkgs, minus_fmris
+
+                                plus_pkgs = rpkg_set - pkg_set
+                                plus_pkg_vers = {"packages": len(plus_pkgs),
+                                    "versions": len(plus_fmris)}
+                                del plus_pkgs, plus_fmris
+
+                                total_pkgs = pkg_set | rpkg_set
+                                total_fmris = fmris_str | rfmris_str
+                                total_pkg_vers = {"packages": len(total_pkgs),
+                                    "versions": len(total_fmris)}
+                                del total_pkgs, total_fmris
+
+                                com_pkg_vers = {"packages": len(common_pkgs),
+                                    "versions": len(common_fmris)}
+
+                                res_dict["table_data"].append([pub.prefix,
+                                    minus_pkg_vers, plus_pkg_vers,
+                                    com_pkg_vers,
+                                    total_pkg_vers])
+                        del common_pkgs, common_fmris, pkg_set, rpkg_set
+                        continue
+
+                com_pub_info = {}
+                # Emit publisher name if there are differences.
+                if not same_pkgs or not same_cat:
+                        if parsable:
+                                com_pub_info["publisher"] = pub.prefix
+                                com_pub_info["+"] = []
+                                com_pub_info["-"] = []
+                        else:
+                                __emit_msg(COMMON, pub)
+
+                # Emit catalog differences.
+                if not same_cat:
+                        omsg = _("catalog last modified: {0}")
+                        minus_cat = omsg.format(cat_lm_pub)
+                        plus_cat = omsg.format(cat_lm_rpub)
+                        if parsable:
+                                com_pub_info["catalog"] = {"-": minus_cat,
+                                    "+": plus_cat}
+                        else:
+                                __emit_msg(MINUS, minus_cat)
+                                __emit_msg(PLUS, plus_cat)
+
+                for f in minus_fmris:
+                        if parsable:
+                                com_pub_info["-"].append(str(f))
+                        else:
+                                __emit_msg(MINUS, f)
+                del minus_fmris
+
+                for f in plus_fmris:
+                        if parsable:
+                                com_pub_info["+"].append(str(f))
+                        else:
+                                __emit_msg(PLUS, f)
+                del plus_fmris
+
+                if not same_pkgs:
+                        if parsable:
+                                com_pub_info["common"] = {
+                                    "packages": len(common_pkgs),
+                                    "versions": len(common_fmris)}
+                        else:
+                                msg(_("        ({0:d} pkg(s) with {1:d} "
+                                    "version(s) are in both repositories.)"
+                                    ).format(len(common_pkgs),
+                                    len(common_fmris)))
+                del common_pkgs, common_fmris, pkg_set, rpkg_set
+
+                if com_pub_info:
+                        verbose_res_dict["common_pubs"].append(com_pub_info)
+
+        if same_repo:
+                # Same repo. Will use EXIT_OK to represent.
+                return EXIT_OK
+
+        if verbose:
+                if parsable:
+                        msg(json.dumps(verbose_res_dict))
+                return EXIT_DIFF
+
+        if not parsable:
+                ftemp = "{0:d} [{1:{2}d}]"
+                if "nonstrict_pubs" in res_dict and res_dict["nonstrict_pubs"]:
+                        msg("")
+                        msg(_("The catalog for the following publisher(s) "
+                            "in repository {0} is not an exact copy of the "
+                            "one for the same publisher in repository {1}:"
+                            "\n    {2}").format(conf["repo_uri"],
+                            conf["com_repo_uri"],
+                            ", ".join(res_dict["nonstrict_pubs"])))
+                if res_dict["table_data"]:
+                        info_table = PrettyTable(res_dict["table_header"],
+                            encoding=locale.getpreferredencoding())
+                        info_table.align = "r"
+                        info_table.align[unicode(_("Publisher"),
+                            locale.getpreferredencoding())] = "l"
+                        # Calculate column wise maximum number for formatting.
+                        col_maxs = 4 * [0]
+                        for td in res_dict["table_data"]:
+                                for idx, cell in enumerate(td):
+                                        if idx > 0 and isinstance(cell, dict):
+                                                col_maxs[idx-1] = max(
+                                                    col_maxs[idx-1],
+                                                    cell["versions"])
+
+                        for td in res_dict["table_data"]:
+                                t_row = []
+                                for idx, cell in enumerate(td):
+                                        if not cell:
+                                                t_row.append("-")
+                                        elif isinstance(cell, basestring):
+                                                t_row.append(cell)
+                                        elif isinstance(cell, dict):
+                                                t_row.append(ftemp.format(
+                                                    cell["packages"],
+                                                    cell["versions"], len(str(
+                                                    col_maxs[idx-1]))))
+                                info_table.add_row(t_row)
+
+                        # This message explains that each cell of the table
+                        # shows two numbers in a format e.g. "4870 [10227]".
+                        # Here "number of packages" and "total distinct
+                        # versions" are shown outside and inside of square
+                        # brackets respectively.
+                        msg(_("""
+The table below shows the number of packages [total distinct versions]
+by publisher in the specified repositories.
+"""))
+                        for leg in res_dict["table_legend"]:
+                                msg("* " + leg[0] + ": " + leg[1])
+                        msg("")
+                        msg(info_table)
+        else:
+                msg(json.dumps(res_dict))
+
+        return EXIT_DIFF
+
+
+def subcmd_diff(conf, args):
+        """Compare two repositories."""
+
+        opts, pargs = getopt.getopt(args, "vqp:s:", ["strict", "parsable",
+            "key=", "cert="])
+        subcommand = "diff"
+        pubs = set()
+        verbose = 0
+        quiet = False
+        compare_ts = True
+        compare_cat = False
+        parsable = False
+
+        def key_cert_conf_helper(conf_type, arg):
+                """Helper function for collecting key and cert."""
+
+                if conf.get("repo_uri") and not conf.get("com_repo_uri"):
+                        conf["repo_" + conf_type] = arg
+                elif conf.get("com_repo_uri"):
+                        conf["com_repo_" + conf_type] = arg
+                else:
+                        usage(_("--{0} must be specified following a "
+                            "-s").format(conf_type), cmd=subcommand)
+
+        for opt, arg in opts:
+                if opt == "-s":
+                        if "repo_uri" not in conf:
+                                conf["repo_uri"] = parse_uri(arg)
+                        elif "com_repo_uri" not in conf:
+                                conf["com_repo_uri"] = parse_uri(arg)
+                        else:
+                                usage(_("only two repositories can be "
+                                    "specified"), cmd=subcommand)
+                if opt == "-v":
+                        verbose += 1
+                elif opt == "-q":
+                        quiet = True
+                elif opt == "--strict":
+                        compare_cat = True
+                elif opt == "--parsable":
+                        parsable = True
+                elif opt == "-p":
+                        if not misc.valid_pub_prefix(arg):
+                                error(_("Invalid publisher prefix '{0}'").format(
+                                    arg), cmd=subcommand)
+                                return EXIT_OOPS
+                        pubs.add(arg)
+                elif opt == "--key":
+                        key_cert_conf_helper("key", arg)
+                elif opt == "--cert":
+                        key_cert_conf_helper("cert", arg)
+
+        if len(pargs) > 0:
+                usage(_("command does not take any operands"), cmd=subcommand)
+
+        if quiet and verbose:
+                usage(_("-q and -v can not be combined"), cmd=subcommand)
+
+        repo_uri = conf.get("repo_uri")
+        if not repo_uri:
+                usage(_("Two package repository locations must be provided "
+                    "using -s."), cmd=subcommand)
+
+        com_repo_uri = conf.get("com_repo_uri")
+        if not com_repo_uri:
+                usage(_("A second package repository location must also be "
+                    "provided using -s."), cmd=subcommand)
+
+        xport, xpub, tmp_dir = setup_transport(repo_uri, subcommand=subcommand,
+            ssl_key=conf.get("repo_key"), ssl_cert=conf.get("repo_cert"))
+        cxport, cxpub, c_tmp_dir = setup_transport(com_repo_uri,
+            subcommand=subcommand, prefix="com",
+            ssl_key=conf.get("com_repo_key"),
+            ssl_cert=conf.get("com_repo_cert"))
+        rval, found, pub_data = _get_matching_pubs(subcommand, pubs, xport,
+            xpub, use_transport=True, repo_uri=repo_uri)
+        if rval == EXIT_OOPS:
+                return rval
+
+        rval, cfound, cpub_data = _get_matching_pubs(subcommand, pubs, cxport,
+            cxpub, use_transport=True, repo_uri=com_repo_uri)
+        if rval == EXIT_OOPS:
+                return rval
+
+        return  __repo_diff(conf, pub_data, xport, cpub_data, cxport, tmp_dir,
+            verbose, quiet, compare_ts, compare_cat, parsable)
+
 
 def main_func():
         global_settings.client_name = PKG_CLIENT_NAME
--- a/src/tests/cli/t_pkgrepo.py	Fri May 22 11:23:28 2015 -0700
+++ b/src/tests/cli/t_pkgrepo.py	Wed May 27 11:41:38 2015 -0700
@@ -42,6 +42,8 @@
 import pkg.client.api_errors as apx
 import pkg.p5p
 import shutil
+import simplejson as json
+import subprocess
 import tempfile
 import time
 import urllib
@@ -3549,6 +3551,297 @@
                 self.pkgrepo("contents -s {0} zoo".format(repo_path))
 
 
+class TestPkgrepoMultiRepo(pkg5unittest.ManyDepotTestCase):
+        # Only start/stop the depot once (instead of for every test)
+        persistent_setup = True
+
+        foo10 = """
+            open [email protected],5.11-0:20110804T203458Z
+            close"""
+
+        foo20t1 = """
+            open [email protected],5.11-0:20120804T203458Z
+            close"""
+
+        foo20t2 = """
+            open [email protected],5.11-0:20130804T203458Z
+            close"""
+
+        bar10 = """
+            open [email protected],5.11-0:20130804T203458Z
+            close"""
+
+        moo10 = """
+            open [email protected],5.11-0:20130804T203458Z
+            close"""
+
+        noo10 = """
+            open [email protected],5.11-0:20130804T203458Z
+            close"""
+
+        def setUp(self):
+                """Create four repositories. Three with the same publisher name
+                and one with a different publisher name.
+                """
+
+                pkg5unittest.ManyDepotTestCase.setUp(self, ["test1", "test2",
+                    "test1", "test1", "test1"])
+
+                self.rurl1 = self.dcs[1].get_repo_url()
+                self.durl1 = self.dcs[1].get_depot_url()
+
+                self.rurl2 = self.dcs[2].get_repo_url()
+                self.durl2 = self.dcs[2].get_depot_url()
+
+                self.rurl3 = self.dcs[3].get_repo_url()
+                self.durl3 = self.dcs[3].get_depot_url()
+
+                self.rurl4 = self.dcs[4].get_repo_url()
+                self.rdir4 = self.dcs[4].get_repodir()
+                self.pkgsend_bulk(self.rurl4, (self.moo10, self.noo10))
+
+                self.rurl5 = self.dcs[5].get_repo_url()
+                self.rdir5 = self.dcs[5].get_repodir()
+
+        def test_01_diff(self):
+                """Verify that diff subcommand works as expected."""
+
+                # Verify invalid input will cause failure.
+                self.pkgrepo("diff".format(self.rurl1), exit=2)
+                self.pkgrepo("diff -s {0}".format(self.rurl1), exit=2)
+                self.pkgrepo("diff {0}".format(self.rurl1), exit=2)
+                self.pkgrepo("diff --unknown -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=2)
+                self.pkgrepo("diff --unknown -s {0} -s {1} -s {2}".format(
+                    self.rurl1, self.rurl2, self.rurl3), exit=2)
+                self.pkgrepo("diff --!invalid -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=2)
+                self.pkgrepo("diff -s {0} -s {1} invalidarg".format(self.rurl1,
+                    self.rurl2), exit=2)
+                self.pkgrepo("diff -p +faf -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=1)
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    "+++1a"), exit=1)
+                self.pkgrepo("diff -qv -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=2)
+
+                self.dcs[1].start()
+                self.dcs[2].start()
+                self.dcs[3].start()
+                # Verify empty repos comparison with just publisher names.
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=10)
+                self.assert_("test1" in self.output and "test2" in
+                    self.output)
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3))
+                self.pkgrepo("diff -s {0} -s {1}".format(self.durl1,
+                    self.durl3))
+                self.assert_(not self.output)
+                self.pkgrepo("diff -p test1 -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=1)
+                self.pkgrepo("diff -s {0} -s {1}".format(self.durl1,
+                    self.durl2), exit=10)
+                self.pkgrepo("diff -p test2 -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=1)
+                self.pkgrepo("diff -p test2 -s {0} -s {1}".format(self.durl1,
+                    self.durl2), exit=1)
+                self.pkgrepo("diff -p test1 -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3))
+
+                # Publish some pkgs.
+                self.pkgsend_bulk(self.rurl1, (self.foo10))
+                self.pkgsend_bulk(self.rurl2, (self.foo10))
+                self.pkgsend_bulk(self.rurl3, (self.foo10))
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3))
+                self.assert_(not self.output)
+                self.pkgrepo("diff -v -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3))
+                self.assert_(not self.output)
+                self.pkgrepo("diff -v -s {0} -s {1}".format(self.durl1,
+                    self.durl3))
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=10)
+                self.assert_("test1" in self.output and "test2" in
+                    self.output)
+
+                # Test -q option.
+                self.pkgrepo("diff -q -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=10)
+                self.assert_(not self.output)
+                self.pkgrepo("diff -q -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3))
+                self.assert_(not self.output)
+
+                self.pkgsend_bulk(self.rurl1, (self.foo20t1))
+                self.pkgsend_bulk(self.rurl2, (self.foo20t1))
+                self.pkgsend_bulk(self.rurl3, (self.foo20t2))
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                self.assert_("test1" in self.output)
+                self.pkgrepo("diff -v -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                self.assert_("- pkg://test1/[email protected],5.11-0:20120804T203458Z" in
+                    self.output)
+                self.assert_("+ pkg://test1/[email protected],5.11-0:20130804T203458Z" in
+                    self.output)
+                self.assert_("(1 pkg(s) with 1 version(s) are in both "
+                    "repositories.)" in self.output)
+                self.assert_("test1" in self.output)
+
+                # Test --strict option.
+                self.pkgrepo("diff --strict -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                self.assert_("catalog" in self.output)
+
+                self.pkgrepo("diff --strict -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=10)
+
+                # Make repo1 has publishers: test1, test2
+                # repo2 has publishers: test2, test3
+                # repo3 has publishers: test1, test2
+                self.pkgrepo("-s {0} add-publisher test2".format(
+                    self.rurl1))
+                self.pkgrepo("-s {0} add-publisher test2".format(
+                    self.rurl3))
+                self.pkgrepo("-s {0} add-publisher test3".format(
+                    self.rurl2))
+                self.pkgrepo("set -s {0} publisher/prefix=test2".format(
+                    self.rurl1))
+                self.pkgrepo("set -s {0} publisher/prefix=test3".format(
+                    self.rurl2))
+                self.pkgrepo("set -s {0} publisher/prefix=test2".format(
+                    self.rurl3))
+                # Make repo1 test2 the same as repo2 test2
+                self.pkgsend_bulk(self.rurl1, (self.foo10, self.foo20t1))
+                # Make repo3 test2 the same as repo2 test2
+                self.pkgsend_bulk(self.rurl3, (self.foo10, self.foo20t1))
+
+                self.pkgsend_bulk(self.rurl2, (self.bar10, self.moo10))
+                # repo1 and repo3 contain same pkgs, but one pkg has different
+                # timestamps.
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                self.assert_("test1" in self.output)
+                self.assert_("test2" not in self.output)
+                self.pkgrepo("diff -q -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=10)
+                self.assert_(not self.output)
+
+                self.pkgrepo("diff -v -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=10)
+                self.assert_("- test1" in self.output and "test2" not in
+                    self.output and "+ test3" in self.output)
+                self.pkgrepo("diff -q -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2), exit=10)
+                self.assert_(not self.output)
+                self.pkgrepo("diff --parsable --strict -s {0} -s {1}".format(
+                    self.rurl1, self.rurl2), exit=10)
+                expected = {
+"table_header": ["Publisher", "Repo1 only", "Repo2 only", "In both", "Total"],
+"table_data": [["test1", {"packages": 1, "versions": 2},
+                None, {"packages": 0, "versions": 0},
+                {"packages": 1, "versions": 2}],
+                ["test3", None, {"packages": 2, "versions": 2},
+    {"packages": 0, "versions": 0}, {"packages": 2, "versions": 2}]],
+"table_legend": [["Repo1", self.rurl1],
+                 ["Repo2", self.rurl2]],
+"nonstrict_pubs": ["test2"]}
+                self.assertEqualJSON(json.dumps(expected), self.output)
+                self.pkgrepo("diff --parsable --strict -vs {0} -s {1}".format(
+                    self.rurl1, self.rurl2), exit=10)
+                expected = {
+"common_pubs": [{"publisher": "test2", "+": [], "-": [],
+    "catalog": {"+": "replaced",
+                "-": "replaced"}}],
+"minus_pubs": [{"publisher": "test1", "packages": 1, "versions": 2}],
+"plus_pubs": [{"publisher": "test3", "packages": 2, "versions": 2}]}
+                output = json.loads(self.output)
+                self.assert_("common_pubs" in output)
+                output["common_pubs"][0]["catalog"]["+"] = "replaced"
+                output["common_pubs"][0]["catalog"]["-"] = "replaced"
+                self.assertEqualJSON(json.dumps(expected),
+                    json.dumps(output))
+                # Test -p option.
+                self.pkgrepo("diff -vp test2 -s {0} -s {1}".format(self.rurl1,
+                    self.rurl2))
+                self.assert_(not self.output)
+                # Enable strict check.
+                self.pkgrepo("diff -vp test2 --strict -s {0} -s {1}".format(
+                    self.rurl1, self.rurl2), exit=10)
+                self.assert_("test2" in self.output)
+                self.pkgrepo("diff -p test2 --strict -s {0} -s {1}".format(
+                    self.rurl1, self.rurl2), exit=10)
+                self.assert_("test2" in self.output)
+                self.assert_("Repo1:" not in self.output)
+
+                # Test set relationship.
+                self.pkgsend_bulk(self.rurl1, (self.bar10))
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                self.assert_("test1" in self.output)
+                self.assert_("test2" in self.output and "0 [0]" in \
+                    self.output)
+                self.pkgrepo("diff --parsable -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                output = json.loads(self.output)
+                # test2 in repo1 is the superset of test2 in repo2.
+                self.assert_(output["table_data"][1][2]["packages"] == 0)
+                self.assert_(output["table_data"][1][2]["versions"] == 0)
+
+                self.pkgrepo("diff --parsable -v -s {0} -s {1}".format(
+                    self.rurl1, self.rurl3), exit=10)
+                output = json.loads(self.output)
+                # test2 in repo1 is the superset of test2 in repo2.
+                self.assert_(output["common_pubs"][1]["-"])
+                self.assert_(not output["common_pubs"][1]["+"])
+                self.assert_("common" in output["common_pubs"][1])
+
+                self.pkgsend_bulk(self.rurl3, (self.bar10, self.moo10))
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                self.assert_("test1" in self.output)
+                self.assert_("test2" in self.output and "0 [0]" in \
+                    self.output)
+                self.pkgrepo("diff --parsable -s {0} -s {1}".format(self.rurl1,
+                    self.rurl3), exit=10)
+                output = json.loads(self.output)
+                # test2 in repo1 is the subset of test2 in repo2.
+                self.assert_(output["table_data"][1][1]["packages"] == 0)
+                self.assert_(output["table_data"][1][1]["versions"] == 0)
+
+                self.pkgrepo("diff --parsable -v -s {0} -s {1}".format(
+                    self.rurl1, self.rurl3), exit=10)
+                output = json.loads(self.output)
+                # test2 in repo1 is the superset of test2 in repo2.
+                self.assert_(not output["common_pubs"][1]["-"])
+                self.assert_(output["common_pubs"][1]["+"])
+
+                self.pkgrepo("diff -s {0} -s {1}".format(self.rurl1,
+                    self.rurl4), exit=10)
+                self.assert_("test2" in self.output)
+                self.assert_("test1" in self.output and "-" in \
+                    self.output and "0 [0]" in self.output)
+                self.pkgrepo("diff --parsable -s {0} -s {1}".format(self.rurl1,
+                    self.rurl4), exit=10)
+                output = json.loads(self.output)
+                # test1 in repo4 conatins completely different fmris for the
+                # the one in repo1.
+                self.assert_(output["table_data"][0][3]["packages"] == 0)
+                self.assert_(output["table_data"][0][3]["versions"] == 0)
+
+                # Test clone repositories are exactly the same as the
+                # originals.
+                self.pkgrecv(self.rurl4, "--clone -d {0}".format(self.rdir5))
+                ret = subprocess.call(["/usr/bin/gdiff", "-Naur", "-x",
+                    "index", "-x", "trans", self.rdir4, self.rdir5])
+                self.assertTrue(ret==0)
+                self.pkgrepo("diff -v --strict -s {0} -s {1}".format(
+                    self.rurl4, self.rurl5))
+                self.assert_(not self.output)
+
+
 class TestPkgrepoHTTPS(pkg5unittest.HTTPSTestClass):
 
         example_pkg10 = """
@@ -3588,6 +3881,7 @@
                     "key": os.path.join(self.keys_dir,
                     self.get_cli_key("test")),
                     "url": self.url,
+                    "srurl": self.srurl,
                     "empty": os.path.join(self.test_root, "tmp/empty"),
                     "noexist": os.path.join(self.test_root, "octopus"),
                     "verboten": self.verboten,
@@ -3624,6 +3918,28 @@
                 self.pkgrepo("-s {url} contents --key {key} --cert {cert}"
                    .format(**arg_dict))
 
+                # pkgrepo diff.
+                self.pkgrepo("-s {url} diff --key {key} --cert {cert}"
+                   " -s {url} --key {key} --cert {cert}".format(**arg_dict))
+
+                self.pkgrepo("diff --key {key} --cert {cert} -s {url}"
+                   " -s {url} --key {key} --cert {cert}".format(**arg_dict),
+                   exit=2)
+
+                # Test only provides key and cert to the first repo.
+                self.pkgrepo("-s {url} diff --key {key} --cert {cert} "
+                    "-s {srurl}".format(**arg_dict))
+
+                self.pkgrepo("-s {url} diff --key {key} --cert {cert} "
+                    "-s {url}".format(**arg_dict), exit=1)
+
+                # Test only provides key and cert to the second repo.
+                self.pkgrepo("-s {srurl} diff -s {url} --key {key} "
+                    "--cert {cert}".format(**arg_dict))
+
+                self.pkgrepo("-s {url} diff -s {url} --key {key} "
+                    "--cert {cert}".format(**arg_dict), exit=1)
+
                 # Try without key and cert (should fail)
                 self.pkgrepo("-s {url} rebuild".format(**arg_dict), exit=1)