7213 pkgrepo should support package removal
authorShawn Walker <shawn.walker@oracle.com>
Tue, 19 Apr 2011 13:29:05 -0700
changeset 2301 d4b6490d2c24
parent 2300 85f6dc1ef6f9
child 2302 2310564590c7
7213 pkgrepo should support package removal 18188 pkgrepo usage message erroneously shows -s as optional
src/man/pkgrepo.1.txt
src/modules/catalog.py
src/modules/client/api_errors.py
src/modules/server/repository.py
src/pkgrepo.py
src/tests/cli/t_pkgrepo.py
--- a/src/man/pkgrepo.1.txt	Tue Apr 19 10:22:24 2011 -0700
+++ b/src/man/pkgrepo.1.txt	Tue Apr 19 13:29:05 2011 -0700
@@ -7,21 +7,24 @@
 SYNOPSIS
      pkgrepo create [--version] uri_or_path
 
-     pkgrepo add-publisher [-s repo_uri_or_path] publisher ...
+     pkgrepo add-publisher -s repo_uri_or_path publisher ...
 
-     pkgrepo get [-F format] [-p publisher ...] [-s repo_uri_or_path]
+     pkgrepo get [-F format] [-p publisher ...] -s repo_uri_or_path
          [section/property ...]
 
      pkgrepo info [-F format] [-H] [-p publisher ...]
-         [-s repo_uri_or_path]
+         -s repo_uri_or_path
 
-     pkgrepo rebuild [-p publisher ...] [-s repo_uri_or_path]
+     pkgrepo rebuild [-p publisher ...] -s repo_uri_or_path
          [--no-catalog] [--no-index]
 
-     pkgrepo refresh [-p publisher ...] [-s repo_uri_or_path]
+     pkgrepo refresh [-p publisher ...] -s repo_uri_or_path
          [--no-catalog] [--no-index]
 
-     pkgrepo set [-p publisher] [-s repo_uri_or_path]
+     pkgrepo remove [-n] [-p publisher ...] -s repo_uri_or_path
+         pkg_fmri_pattern ...
+
+     pkgrepo set [-p publisher] -s repo_uri_or_path
          section/property=[value] ... or
          section/property=([value]) ...
 
@@ -63,13 +66,13 @@
                     publishers, catalog version 1, and search
                     version 1.
 
-     add-publisher [-s repo_uri_or_path] publisher ...
+     add-publisher -s repo_uri_or_path publisher ...
           May only be used with version 4 filesystem-based repositories.
 
           Add the specified publisher(s) to the repository.  The new
           publisher(s) will not have any packages or content.
 
-     get [-F format] [-H] [-p publisher ...] [-s repo_uri_or_path]
+     get [-F format] [-H] [-p publisher ...] -s repo_uri_or_path
        [section/property ...]
           Displays the property information for the repository or
           its publishers.
@@ -97,7 +100,7 @@
           With -s, operate on the repository located at the given URI
           or filesystem path.
 
-     info [-F format] [-H] [-p publisher ...] [-s repo_uri_or_path]
+     info [-F format] [-H] [-p publisher ...] -s repo_uri_or_path
           Displays a listing of the package publishers known by the
           repository.  The listing includes the number of packages
           for each publisher, when the publisher's package data was
@@ -116,7 +119,7 @@
           With -s, operate on the repository located at the given URI
           or filesystem path.
 
-     rebuild [-p publisher ...] [-s repo_uri_or_path] [--no-catalog]
+     rebuild [-p publisher ...] -s repo_uri_or_path [--no-catalog]
        [--no-index]
           Discards all catalog, search, and other cached information
           found in the repository and then re-creates it based on the
@@ -134,7 +137,7 @@
 
           With --no-index, don't rebuild search indexes.
 
-     refresh [-p publisher ...] [-s repo_uri_or_path] [--no-catalog]
+     refresh [-p publisher ...] -s repo_uri_or_path [--no-catalog]
        [--no-index]
           Catalogs any new packages found in the repository and
           updates all search indexes.  This is intended for use with
@@ -153,8 +156,31 @@
 
           With --no-index, don't update search indexes.
 
-     set [-p publisher] [-s repo_uri_or_path]
-       [section/property=value ...]
+     remove [-n] [-p publisher ...] -s repo_uri_or_path
+       pkg_fmri_pattern ...
+          May only be used with filesystem-based repositories.
+
+          WARNING: This operation is not reversible and should not be used
+          while other clients are accessing the repository as it may cause
+          them to fail during retrieval operations.
+
+          Remove the packages matching the specified patterns from the
+          repository (including any files they reference that are not in
+          use by any other package).  Please note that all search index
+          data for related publishers will be removed.
+
+          With -n, perform a trial run of the operation with no package
+          changes made.  A list of the packages to be removed will be
+          displayed before exiting.
+
+          With -p, only remove matching packages for the given publisher.
+          If not provided, any matching packages will be removed for all
+          publishers.  This option may be specified multiple times.
+
+          With -s, operate on the repository located at the given URI
+          or filesystem path.
+
+     set [-p publisher] -s repo_uri_or_path [section/property=value ...]
           May only be used with filesystem-based repositories.
 
           Sets the value of the specified properties for the repository
--- a/src/modules/catalog.py	Tue Apr 19 10:22:24 2011 -0700
+++ b/src/modules/catalog.py	Tue Apr 19 13:29:05 2011 -0700
@@ -19,7 +19,7 @@
 #
 # CDDL HEADER END
 #
-# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
 
 """Interfaces and implementation for the Catalog object, as well as functions
 that operate on lists of package FMRIs."""
@@ -2755,6 +2755,213 @@
                                 return values
                 return None
 
+        def get_matching_fmris(self, patterns, raise_unmatched=True):
+                """Given a user-specified list of FMRI pattern strings, return
+                a tuple of ('matching', 'references', 'unmatched'), where
+                matching is a dict of matching fmris, references is a dict of
+                the patterns indexed by matching FMRI, and unmatched is a set of
+                the patterns that did not match any FMRIs respectively:
+
+                {
+                 pkgname: [fmri1, fmri2, ...],
+                 pkgname: [fmri1, fmri2, ...],
+                 ...
+                }
+
+                {
+                 fmri1: [pat1, pat2, ...],
+                 fmri2: [pat1, pat2, ...],
+                 ...
+                }
+
+                set(['unmatched1', 'unmatchedN'])
+
+                'patterns' is the list of package patterns to match.
+
+                'raise_unmatched' is an optional boolean indicating that an
+                exception should be raised if any patterns are not matched.
+
+                Constraint used is always AUTO as per expected UI behavior when
+                determining successor versions.
+
+                Note that patterns starting w/ pkg:/ require an exact match;
+                patterns containing '*' will using fnmatch rules; the default
+                trailing match rules are used for remaining patterns.
+
+                Exactly duplicated patterns are ignored.
+
+                Routine raises PackageMatchErrors if errors occur: it is
+                illegal to specify multiple different patterns that match the
+                same package name.  Only patterns that contain wildcards are
+                allowed to match multiple packages.
+                """
+
+                # problems we check for
+                illegals = []
+                unmatched = set()
+                multimatch = []
+                multispec = []
+                pat_data = []
+                wildcard_patterns = set()
+
+                # ignore dups
+                patterns = list(set(patterns))
+
+                # figure out which kind of matching rules to employ
+                brelease = "5.11" # XXX a better way to default this?
+                latest_pats = set()
+                for pat in patterns:
+                        try:
+                                parts = pat.split("@", 1)
+                                pat_stem = parts[0]
+                                pat_ver = None
+                                if len(parts) > 1:
+                                        pat_ver = parts[1]
+
+                                if "*" in pat_stem or "?" in pat_stem:
+                                        matcher = fmri.glob_match
+                                        wildcard_patterns.add(pat)
+                                elif pat_stem.startswith("pkg:/") or \
+                                    pat_stem.startswith("/"):
+                                        matcher = fmri.exact_name_match
+                                else:
+                                        matcher = fmri.fmri_match
+
+                                if matcher == fmri.glob_match:
+                                        pfmri = fmri.MatchingPkgFmri(
+                                            pat_stem, brelease)
+                                else:
+                                        pfmri = fmri.PkgFmri(
+                                            pat_stem, brelease)
+
+                                if not pat_ver:
+                                        # Do nothing.
+                                        pass
+                                elif "*" in pat_ver or "?" in pat_ver or \
+                                    pat_ver == "latest":
+                                        pfmri.version = \
+                                            pkg.version.MatchingVersion(pat_ver,
+                                                brelease)
+                                else:
+                                        pfmri.version = \
+                                            pkg.version.Version(pat_ver,
+                                                brelease)
+
+                                if pat_ver and getattr(pfmri.version,
+                                    "match_latest", None):
+                                        latest_pats.add(pat)
+
+                                pat_data.append((matcher, pfmri))
+                        except (fmri.FmriError,
+                            pkg.version.VersionError), e:
+                                illegals.append(e)
+
+                if illegals:
+                        raise api_errors.PackageMatchErrors(illegal=illegals)
+
+                # Create a dictionary of patterns, with each value being a
+                # dictionary of pkg names & fmris that match that pattern.
+                ret = dict(zip(patterns, [dict() for i in patterns]))
+
+                for name in self.names():
+                        for pat, (matcher, pfmri) in zip(patterns, pat_data):
+                                pub = pfmri.publisher
+                                version = pfmri.version
+                                if not matcher(name, pfmri.pkg_name):
+                                        continue # name doesn't match
+                                for ver, entries in self.entries_by_version(name):
+                                        if version and not ver.is_successor(
+                                            version, pkg.version.CONSTRAINT_AUTO):
+                                                continue # version doesn't match
+                                        for f, metadata in entries:
+                                                fpub = f.publisher
+                                                if pub and pub != fpub:
+                                                        # specified pubs
+                                                        # conflict
+                                                        continue 
+                                                ret[pat].setdefault(f.pkg_name,
+                                                    []).append(f)
+
+                # Discard all but the newest version of each match.
+                if latest_pats:
+                        # Rebuild ret based on latest version of every package.
+                        latest = {}
+                        nret = {}
+                        for p in patterns:
+                                if p not in latest_pats or not ret[p]:
+                                        nret[p] = ret[p]
+                                        continue
+
+                                nret[p] = {}
+                                for pkg_name in ret[p]:
+                                        nret[p].setdefault(pkg_name, [])
+                                        for f in ret[p][pkg_name]:
+                                                nver = latest.get(f.pkg_name,
+                                                    None)
+                                                if nver > f.version:
+                                                        # Not the newest.
+                                                        continue
+                                                if nver == f.version:
+                                                        # Allow for multiple
+                                                        # FMRIs of the same
+                                                        # latest version.
+                                                        nret[p][pkg_name].append(
+                                                            f)
+                                                        continue
+
+                                                latest[f.pkg_name] = f.version
+                                                nret[p][pkg_name] = [f]
+
+                        # Assign new version of ret and discard latest list.
+                        ret = nret
+                        del latest
+
+                # Determine match failures.
+                matchdict = {}
+                for p in patterns:
+                        l = len(ret[p])
+                        if l == 0: # no matches at all
+                                unmatched.add(p)
+                        elif l > 1 and p not in wildcard_patterns:
+                                # multiple matches
+                                multimatch.append((p, [
+                                    ret[p][n][0].get_pkg_stem()
+                                    for n in ret[p]
+                                ]))
+                        else:
+                                # single match or wildcard
+                                for k in ret[p].keys():
+                                        # for each matching package name
+                                        matchdict.setdefault(k, []).append(p)
+
+                for name in matchdict:
+                        if len(matchdict[name]) > 1:
+                                # different pats, same pkg
+                                multispec.append(tuple([name] +
+                                    matchdict[name]))
+
+                if (raise_unmatched and unmatched) or multimatch or multispec:
+                        raise api_errors.PackageMatchErrors(
+                            multiple_matches=multimatch,
+                            multispec=multispec,
+                            unmatched_fmris=unmatched)
+
+                # merge patterns together now that there are no conflicts
+                proposed_dict = {}
+                for d in ret.values():
+                        proposed_dict.update(d)
+
+                # construct references so that we can know which pattern
+                # generated which fmris...
+                references = dict([
+                    (f, p)
+                    for p in ret.keys()
+                    for flist in ret[p].values()
+                    for f in flist
+                ])
+
+                return proposed_dict, references, unmatched
+
         def get_package_counts_by_pub(self):
                 """Returns a generator of tuples of the form (pub,
                 package_count, package_version_count).  'pub' is the publisher
--- a/src/modules/client/api_errors.py	Tue Apr 19 10:22:24 2011 -0700
+++ b/src/modules/client/api_errors.py	Tue Apr 19 13:29:05 2011 -0700
@@ -339,6 +339,48 @@
                         "filesystem.")
 
 
+class PackageMatchErrors(ApiException):
+        def __init__(self, unmatched_fmris=EmptyI, multiple_matches=EmptyI,
+            illegal=EmptyI, multispec=EmptyI):
+                ApiException.__init__(self)
+                self.unmatched_fmris = unmatched_fmris
+                self.multiple_matches = multiple_matches
+                self.illegal = illegal
+                self.multispec = multispec
+
+        def __str__(self):
+                res = []
+                if self.unmatched_fmris:
+                        s = _("The following pattern(s) did not match any "
+                            "packages:")
+
+                        res += [s]
+                        res += ["\t%s" % p for p in self.unmatched_fmris]
+
+                if self.multiple_matches:
+                        s = _("'%s' matches multiple packages")
+                        for p, lst in self.multiple_matches:
+                                res.append(s % p)
+                                for pfmri in lst:
+                                        res.append("\t%s" % pfmri)
+
+                if self.illegal:
+                        s = _("'%s' is an illegal FMRI")
+                        res += [ s % p for p in self.illegal ]
+
+                if self.multispec:
+                        s = _("The following different patterns specify the "
+                          "same package(s):")
+                        res += [s]
+                        for t in self.multispec:
+                                res += [
+                                    ", ".join([t[i] for i in range(1, len(t))])
+                                    + ": %s" % t[0]
+                                ]
+
+                return "\n".join(res)
+
+
 class PlanCreationException(ApiException):
         def __init__(self, unmatched_fmris=EmptyI, multiple_matches=EmptyI,
             missing_matches=EmptyI, illegal=EmptyI,
--- a/src/modules/server/repository.py	Tue Apr 19 10:22:24 2011 -0700
+++ b/src/modules/server/repository.py	Tue Apr 19 13:29:05 2011 -0700
@@ -38,6 +38,7 @@
 import pkg.actions as actions
 import pkg.catalog as catalog
 import pkg.client.api_errors as apx
+import pkg.client.progress as progress
 import pkg.client.publisher as publisher
 import pkg.config as cfg
 import pkg.file_layout.file_manager as file_manager
@@ -574,6 +575,44 @@
                         self.log_obj.log(msg=msg, context=context,
                             severity=severity)
 
+        def __purge_search_index(self):
+                """Private helper function to dump repository search data."""
+
+                if not self.index_root or not os.path.exists(self.index_root):
+                        return
+
+                ind = indexer.Indexer(self.index_root,
+                    self._get_manifest,
+                    self.manifest,
+                    log=self.__index_log,
+                    sort_file_max_size=self.__sort_file_max_size)
+
+                # To prevent issues with NFS consumers, attempt to lock the
+                # index first, but don't hold the lock as holding a lock while
+                # removing the directory containing it can cause rmtree() to
+                # fail.
+                ind.lock(blocking=False)
+                ind.unlock()
+
+                # Since the lock succeeded, immediately try to rename the index
+                # directory to prevent other consumers from using the index
+                # while it is being removed since a lock can't be held.
+                portable.rename(self.index_root, self.index_root + ".old")
+                try:
+                        shutil.rmtree(self.index_root + ".old")
+                except EnvironmentError, e:
+                        if e.errno == errno.EACCES:
+                                raise apx.PermissionsException(
+                                    e.filename)
+                        if e.errno == errno.EROFS:
+                                raise apx.ReadOnlyFileSystemException(
+                                    e.filename)
+                        if e.errno != errno.ENOENT:
+                                raise
+
+                # Discard in-memory search data.
+                self.reset_search()
+
         def __rebuild(self, build_catalog=True, build_index=False, lm=None,
             incremental=False):
                 """Private version; caller responsible for repository
@@ -643,42 +682,10 @@
                         self.catalog.finalize()
                         self.__save_catalog(lm=lm)
 
-                if not incremental and self.index_root and \
-                    os.path.exists(self.index_root):
+                if not incremental:
                         # Only discard search data if this isn't an incremental
-                        # rebuild, and there's an index to destroy.
-                        ind = indexer.Indexer(self.index_root,
-                            self._get_manifest,
-                            self.manifest,
-                            log=self.__index_log,
-                            sort_file_max_size=self.__sort_file_max_size)
-
-                        # To prevent issues with NFS consumers, attempt to lock
-                        # the index first, but don't hold the lock as holding
-                        # a lock while removing the directory containing it
-                        # can cause rmtree() to fail.
-                        ind.lock(blocking=False)
-                        ind.unlock()
-
-                        # Since the lock succeeded, immediately try to rename
-                        # the index directory to prevent other consumers from
-                        # using the index while it is being removed since
-                        # a lock can't be held.
-                        portable.rename(self.index_root, self.index_root + ".old")
-                        try:
-                                shutil.rmtree(self.index_root + ".old")
-                        except EnvironmentError, e:
-                                if e.errno == errno.EACCES:
-                                        raise apx.PermissionsException(
-                                            e.filename)
-                                if e.errno == errno.EROFS:
-                                        raise apx.ReadOnlyFileSystemException(
-                                            e.filename)
-                                if e.errno != errno.ENOENT:
-                                        raise
-
-                        # Discard in-memory search data.
-                        self.reset_search()
+                        # rebuild.
+                        self.__purge_search_index()
 
                 if build_index:
                         self.__refresh_index()
@@ -1398,6 +1405,218 @@
                 finally:
                         self.__unlock_rstore()
 
+        def remove_packages(self, packages, progtrack=None):
+                """Removes the specified packages from the repository store.  No
+                other modifying operations may be performed until complete.
+
+                'packages' is a list of FMRIs of packages to remove.
+
+                'progtrack' is an optional ProgressTracker object.
+                """
+
+                if self.mirror:
+                        raise RepositoryMirrorError()
+                if self.read_only:
+                        raise RepositoryReadOnlyError()
+                if not self.catalog_root or self.catalog_version < 1:
+                        raise RepositoryUnsupportedOperationError()
+                if not self.manifest_root:
+                        raise RepositoryUnsupportedOperationError()
+                if not progtrack:
+                        progtrack = progress.QuietProgressTracker()
+
+                def get_hashes(pfmri):
+                        """Given an FMRI, return a set containing all of the
+                        hashes of the files its manifest references."""
+
+                        m = self._get_manifest(pfmri)
+                        hashes = set()
+                        for a in m.gen_actions():
+                                if not a.has_payload or not a.hash:
+                                        # Nothing to archive.
+                                        continue
+
+                                # Action payload.
+                                hashes.add(a.hash)
+
+                                # Signature actions have additional payloads.
+                                if a.name == "signature":
+                                        hashes.update(a.attrs.get("chain",
+                                            "").split())
+                        return hashes
+
+                self.__lock_rstore()
+                c = self.catalog
+                try:
+                        # First, dump all search data as it will be invalidated
+                        # as soon as the catalog is updated.
+                        progtrack.actions_set_goal(_("Delete search index"), 1)
+                        self.__purge_search_index()
+                        progtrack.actions_add_progress()
+                        progtrack.actions_done()
+
+                        # Next, remove all of the packages to be removed
+                        # from the catalog (if they are present).  That way
+                        # any active clients are less likely to be surprised
+                        # when files for packages start disappearing.
+                        progtrack.actions_set_goal(_("Update catalog"), 1)
+                        c.batch_mode = True
+                        save_catalog = False
+                        for pfmri in packages:
+                                try:
+                                        c.remove_package(pfmri)
+                                except apx.UnknownCatalogEntry:
+                                        # Assume already removed from catalog or
+                                        # not yet added to it.
+                                        continue
+                                save_catalog = True
+
+                        c.batch_mode = False
+                        if save_catalog:
+                                # Only need to re-write catalog if at least one
+                                # package had to be removed from it.
+                                c.finalize(pfmris=packages)
+                                c.save()
+
+                        progtrack.actions_add_progress()
+                        progtrack.actions_done()
+
+                        # Next, build a list of all of the hashes for the files
+                        # that can potentially be removed from the repository.
+                        # This will also indirectly abort the operation should
+                        # any of the packages not actually have a manifest in
+                        # the repository.
+                        pfiles = set()
+                        progtrack.actions_set_goal(
+                            _("Analyze removed packages"), len(packages))
+                        for pfmri in packages:
+                                pfiles.update(get_hashes(pfmri))
+                                progtrack.actions_add_progress()
+                        progtrack.actions_done()
+
+                        # Now for the slow part; iterate over every manifest in
+                        # the repository (excluding the ones being removed) and
+                        # remove any hashes in use from the list to be removed.
+                        # However, if the package being removed doesn't have any
+                        # payloads, then we can skip checking all of the
+                        # packages in the repository for files in use.
+                        if pfiles:
+                                # Number of packages to check is total found in
+                                # repo minus number to be removed.
+                                slist = os.listdir(self.manifest_root)
+                                remaining = sum(
+                                    1
+                                    for s in slist
+                                    for v in os.listdir(os.path.join(
+                                        self.manifest_root, s))
+                                )
+
+                                progtrack.actions_set_goal(
+                                    _("Analyze repository packages"), remaining)
+                                for name in slist:
+                                        # Stem must be decoded before use.
+                                        try:
+                                                pname = urllib.unquote(name)
+                                        except Exception:
+                                                # Assume error is result of
+                                                # unexpected file in directory;
+                                                # just skip it and drive on.
+                                                continue
+
+                                        pdir = os.path.join(self.manifest_root,
+                                            name)
+                                        for ver in os.listdir(pdir):
+                                                if not pfiles:
+                                                        # Skip remaining entries
+                                                        # since no files are
+                                                        # safe to remove, but
+                                                        # update progress.
+                                                        progtrack.actions_add_progress()
+                                                        continue
+
+                                                # Version must be decoded before
+                                                # use.
+                                                pver = urllib.unquote(ver)
+                                                try:
+                                                        pfmri = fmri.PkgFmri(
+                                                            "@".join((pname,
+                                                            pver)), publisher=self.publisher)
+                                                except Exception:
+                                                        # Assume error is result
+                                                        # of unexpected file in
+                                                        # directory; just skip
+                                                        # it and drive on.
+                                                        progtrack.actions_add_progress()
+                                                        continue
+
+                                                if pfmri in packages:
+                                                        # Package is one of
+                                                        # those queued for
+                                                        # removal.
+                                                        progtrack.actions_add_progress()
+                                                        continue
+
+                                                # Any files in use by another
+                                                # package can't be removed.
+                                                pfiles -= get_hashes(pfmri)
+                                                progtrack.actions_add_progress()
+                                progtrack.actions_done()
+
+                        # Next, remove the manifests of the packages to be
+                        # removed.  (This is done before removing the files
+                        # so that clients won't have a chance to retrieve a
+                        # manifest which has missing files.)
+                        progtrack.actions_set_goal(
+                            _("Remove package manifests"), len(packages))
+                        for pfmri in packages:
+                                mpath = self.manifest(pfmri)
+                                portable.remove(mpath)
+                                progtrack.actions_add_progress()
+                        progtrack.actions_done()
+
+                        # Next, remove any package files that are not
+                        # referenced by other packages.
+                        progtrack.actions_set_goal(
+                            _("Remove package files"), len(pfiles))
+                        for h in pfiles:
+                                # File might already be gone (don't care if
+                                # it is).
+                                fpath = self.cache_store.lookup(h)
+                                if fpath is not None:
+                                        portable.remove(fpath)
+                                        progtrack.actions_add_progress()
+                        progtrack.actions_done()
+
+                        # Finally, tidy up repository structure by discarding
+                        # unused package data directories for any packages
+                        # removed.
+                        def rmdir(d):
+                                """rmdir; but ignores non-empty directories."""
+                                try:
+                                        os.rmdir(d)
+                                except OSError, e:
+                                        if e.errno not in (
+                                            errno.ENOTEMPTY,
+                                            errno.EEXIST):
+                                                raise
+
+                        for name in set(
+                            f.get_dir_path(stemonly=True)
+                            for f in packages):
+                                rmdir(os.path.join(self.manifest_root, name))
+
+                        if self.file_root:
+                                for entry in os.listdir(self.file_root):
+                                        rmdir(os.path.join(self.file_root,
+                                            entry))
+                except EnvironmentError, e:
+                        raise apx._convert_error(e)
+                finally:
+                        # This ensures batch_mode is reset in the event of an
+                        # error.
+                        c.batch_mode = False
+                        self.__unlock_rstore()
+
         def rebuild(self, build_catalog=True, build_index=False):
                 """Rebuilds the repository catalog and search indexes using the
                 package manifests currently in the repository.
@@ -2309,6 +2528,89 @@
                         rstore = self.__new_rstore(pfmri.publisher)
                 return rstore.open(client_release, pfmri)
 
+        def get_matching_fmris(self, patterns, pubs=misc.EmptyI):
+                """Given a user-specified list of FMRI pattern strings, return
+                a tuple of ('matching', 'references'), where matching is a dict
+                of matching fmris and references is a dict of the patterns
+                indexed by matching FMRI respectively:
+
+                {
+                 pkgname: [fmri1, fmri2, ...],
+                 pkgname: [fmri1, fmri2, ...],
+                 ...
+                }
+
+                {
+                 fmri1: [pat1, pat2, ...],
+                 fmri2: [pat1, pat2, ...],
+                 ...
+                }
+
+                'patterns' is the list of package patterns to match.
+
+                'pubs' is an optional set of publisher prefixes to restrict the
+                results to.
+
+                Constraint used is always AUTO as per expected UI behavior when
+                determining successor versions.
+
+                Note that patterns starting w/ pkg:/ require an exact match;
+                patterns containing '*' will using fnmatch rules; the default
+                trailing match rules are used for remaining patterns.
+
+                Exactly duplicated patterns are ignored.
+
+                Routine raises PackageMatchErrors if errors occur: it is illegal
+                to specify multiple different patterns that match the same
+                package name.  Only patterns that contain wildcards are allowed
+                to match multiple packages.
+                """
+
+                def merge(src, dest):
+                        for k, v in src.iteritems():
+                                if k in dest:
+                                        dest[k].extend(v)
+                                else:
+                                        dest[k] = v
+
+                matching = {}
+                references = {}
+                unmatched = None
+                for rstore in self.rstores:
+                        if not rstore.catalog_root or not rstore.publisher:
+                                # No catalog to aggregate matches from.
+                                continue
+                        if pubs and rstore.publisher not in pubs:
+                                # Doesn't match specified publisher.
+                                continue
+
+                        # Get matching items from target catalog and then
+                        # merge the result.
+                        mdict, mrefs, munmatched = \
+                            rstore.catalog.get_matching_fmris(patterns,
+                                raise_unmatched=False)
+                        merge(mdict, matching)
+                        merge(mrefs, references)
+                        if unmatched is None:
+                                unmatched = munmatched
+                        else:
+                                # The only unmatched entries that are
+                                # interesting are the ones that have no
+                                # matches for any publisher.
+                                unmatched.intersection_update(munmatched)
+
+                        del mdict, mrefs, munmatched
+
+                if unmatched:
+                        # One or more patterns didn't match a package from any
+                        # publisher.
+                        raise apx.PackageMatchErrors(unmatched_fmris=unmatched)
+                if not matching:
+                        # No packages or no publishers matching 'pubs'.
+                        raise apx.PackageMatchErrors(unmatched_fmris=patterns)
+
+                return matching, references
+
         @property
         def publishers(self):
                 """A set containing the list of publishers known to the
@@ -2336,6 +2638,63 @@
                                 continue
                         rstore.refresh_index()
 
+        def remove_packages(self, packages, progtrack=None, pub=None):
+                """Removes the specified packages from the repository.
+
+                'packages' is a list of FMRIs of packages to remove.
+
+                'progtrack' is an optional ProgressTracker object.
+
+                'pub' is an optional publisher prefix to limit the operation to.
+                """
+
+                plist = set()
+                pubs = set()
+                for p in packages:
+                        try:
+                                pfmri = p
+                                if not isinstance(pfmri, fmri.PkgFmri):
+                                        pfmri = fmri.PkgFmri(pfmri)
+                                if pub and not pfmri.publisher:
+                                        pfmri.publisher = pub
+                                if pfmri.publisher:
+                                        pubs.add(pfmri.publisher)
+                                plist.add(pfmri)
+                        except fmri.FmriError, e:
+                                raise RepositoryInvalidFMRIError(e)
+
+                if len(pubs) > 1:
+                        # Don't allow removal of packages from different
+                        # publishers at the same time.  Current transaction
+                        # model relies on a single publisher at a time and
+                        # transport is mapped the same way.
+                        raise RepositoryUnsupportedOperationError()
+
+                if not pub and pubs:
+                        # Use publisher specified in one of the FMRIs instead
+                        # of default publisher.
+                        pub = list(pubs)[0]
+
+                try:
+                        rstore = self.get_pub_rstore(pub)
+                except RepositoryUnknownPublisher, e:
+                        for p in plist:
+                                if not pfmri.publisher:
+                                        # No publisher given in FMRI and no
+                                        # default publisher so treat as
+                                        # invalid FMRI.
+                                        raise RepositoryUnqualifiedFMRIError(
+                                            pfmri)
+                        raise
+
+                # Before moving on, assign publisher for every FMRI that doesn't
+                # have one already.
+                for p in plist:
+                        if not pfmri.publisher:
+                                pfmri.publisher = rstore.publisher
+
+                rstore.remove_packages(packages, progtrack=progtrack)
+
         def add_content(self, pub=None, refresh_index=False):
                 """Looks for packages added to the repository that are not in
                 the catalog, adds them, and then updates search data by default.
--- a/src/pkgrepo.py	Tue Apr 19 10:22:24 2011 -0700
+++ b/src/pkgrepo.py	Tue Apr 19 13:29:05 2011 -0700
@@ -39,6 +39,7 @@
 tmpdirs = []
 
 import atexit
+import collections
 import copy
 import errno
 import getopt
@@ -58,6 +59,7 @@
 import pkg
 import pkg.catalog
 import pkg.client.api_errors as apx
+import pkg.client.progress
 import pkg.client.publisher as publisher
 import pkg.client.transport.transport as transport
 import pkg.misc as misc
@@ -72,6 +74,7 @@
         for d in tmpdirs:
                 shutil.rmtree(d, True)
 
+
 def error(text, cmd=None):
         """Emit an error message prefixed by the command name """
 
@@ -95,6 +98,19 @@
         logger.error(ws + pkg_cmd + text_nows)
 
 
+def get_tracker(quiet=False):
+        if quiet:
+                progtrack = pkg.client.progress.QuietProgressTracker()
+        else:
+                try:
+                        progtrack = \
+                            pkg.client.progress.FancyUNIXProgressTracker()
+                except pkg.client.progress.ProgressTrackerException:
+                        progtrack = \
+                            pkg.client.progress.CommandLineProgressTracker()
+        return progtrack
+
+
 def usage(usage_error=None, cmd=None, retcode=2, full=False):
         """Emit a usage message and optionally prefix it with a more
         specific error message.  Causes program to exit.
@@ -116,21 +132,24 @@
 Subcommands:
      pkgrepo create [--version] uri_or_path
 
-     pkgrepo add-publisher [-s repo_uri_or_path] publisher ...
+     pkgrepo add-publisher -s repo_uri_or_path publisher ...
 
-     pkgrepo get [-F format] [-p publisher ...] [-s repo_uri_or_path]
+     pkgrepo get [-F format] [-p publisher ...] -s repo_uri_or_path
          [section/property ...]
 
      pkgrepo info [-F format] [-H] [-p publisher ...]
-         [-s repo_uri_or_path]
+         -s repo_uri_or_path
 
-     pkgrepo rebuild [-p publisher ...] [-s repo_uri_or_path]
+     pkgrepo rebuild [-p publisher ...] -s repo_uri_or_path
          [--no-catalog] [--no-index]
 
-     pkgrepo refresh [-p publisher ...] [-s repo_uri_or_path]
+     pkgrepo refresh [-p publisher ...] -s repo_uri_or_path
          [--no-catalog] [--no-index]
 
-     pkgrepo set [-p publisher ...] [-s repo_uri_or_path]
+     pkgrepo remove [-n] [-p publisher ...] -s repo_uri_or_path
+         pkg_fmri_pattern ...
+
+     pkgrepo set [-p publisher ...] -s repo_uri_or_path
          section/property[+|-]=[value] ... or
          section/property[+|-]=([value]) ...
 
@@ -158,6 +177,68 @@
         return publisher.RepositoryURI(misc.parse_uri(uri))
 
 
+def subcmd_remove(conf, args):
+        subcommand = "remove"
+
+        opts, pargs = getopt.getopt(args, "np:s:")
+
+        dry_run = False
+        pubs = set()
+        for opt, arg in opts:
+                if opt == "-n":
+                        dry_run = True
+                elif opt == "-p":
+                        pubs.add(arg)
+                elif opt == "-s":
+                        conf["repo_uri"] = parse_uri(arg)
+
+        if not pargs:
+                usage(_("At least one package pattern must be provided."),
+                    cmd=subcommand)
+
+        # Get repository object.
+        if not conf.get("repo_uri", None):
+                usage(_("A package repository location must be provided "
+                    "using -s."), cmd=subcommand)
+        repo = get_repo(conf, read_only=False, subcommand=subcommand)
+
+        if "all" in pubs:
+                pubs = set()
+
+        # Find matching packages.
+        try:
+                matching, refs = repo.get_matching_fmris(pargs, pubs=pubs)
+        except apx.PackageMatchErrors, e:
+                error(str(e), cmd=subcommand)
+                return EXIT_OOPS
+
+        if dry_run:
+                # Don't make any changes; display list of packages to be
+                # removed and exit.
+                packages = set(f for m in matching.values() for f in m)
+                count = len(packages)
+                plist = "\n".join("\t%s" % p for p in sorted(packages))
+                logger.info(_("%(count)d package(s) will be removed:\n"
+                    "%(plist)s") % locals())
+                return EXIT_OK
+
+        progtrack = get_tracker()
+        packages = collections.defaultdict(list)
+        for m in matching.values():
+                for f in m:
+                        packages[f.publisher].append(f)
+
+        for pub in packages:
+                logger.info(_("Removing packages for publisher %s ...") % pub)
+                repo.remove_packages(packages[pub], progtrack=progtrack,
+                    pub=pub)
+                if len(packages) > 1:
+                        # Add a newline between each publisher.
+                        logger.info("")
+
+        return EXIT_OK
+
+
 def print_col_listing(desired_field_order, field_data, field_values, out_format,
     def_fmt, omit_headers):
         """Print a columnar listing defined by provided values."""
--- a/src/tests/cli/t_pkgrepo.py	Tue Apr 19 10:22:24 2011 -0700
+++ b/src/tests/cli/t_pkgrepo.py	Tue Apr 19 13:29:05 2011 -0700
@@ -52,6 +52,8 @@
 
         tree10 = """
             open [email protected],5.11-0
+            add file tmp/empty mode=0555 owner=root group=bin path=/etc/empty
+            add file tmp/truck1 mode=0444 owner=root group=bin path=/etc/trailer
             close
         """
 
@@ -89,10 +91,15 @@
             close
         """
 
+        fhashes = {
+             "tmp/empty": "5f5fb715934e0fa2bfb5611fd941d33228027006",
+             "tmp/truck1": "c9e257b659ace6c3fbc4d334f49326b3889fd109",
+             "tmp/truck2": "c07fd27b5b57f8131f42e5f2c719a469d9fc71c5",
+        }
+
         def setUp(self):
                 pkg5unittest.SingleDepotTestCase.setUp(self)
-                self.make_misc_files(["tmp/empty", "tmp/truck1",
-                    "tmp/truck2"])
+                self.make_misc_files(["tmp/empty", "tmp/truck1", "tmp/truck2"])
 
         def test_00_base(self):
                 """Verify pkgrepo handles basic option and subcommand parsing
@@ -1143,6 +1150,155 @@
 """
                 self.assertEqualDiff(expected, self.output)
 
+        def test_09_remove_packages(self):
+                """Verify that remove subcommand works as expected."""
+
+                # Create a repository and then copy it somewhere for testing
+                # to make it easy to restore the original as needed.
+                src_repo = os.path.join(self.test_root, "remove-repo")
+                self.create_repo(src_repo)
+
+                self.pkgrepo("set -s %s publisher/prefix=test" % src_repo)
+                published = self.pkgsend_bulk(src_repo, (self.tree10,
+                    self.amber10, self.amber20, self.truck10, self.truck20,
+                    self.zoo10))
+                self.pkgrepo("set -s %s publisher/prefix=test2" % src_repo)
+                published += self.pkgsend_bulk(src_repo, (self.tree10,
+                    self.zoo10))
+
+                dest_repo = os.path.join(self.test_root, "test-repo")
+                shutil.copytree(src_repo, dest_repo)
+
+                # Verify that specifying something other than a filesystem-
+                # based repository fails.
+                self.pkgrepo("remove -s %s tree" % self.durl, exit=2)
+
+                # Verify that non-matching patterns result in error.
+                self.pkgrepo("remove -s %s nosuchpackage" % dest_repo, exit=1)
+                self.pkgrepo("remove -s %s tree nosuchpackage" % dest_repo,
+                    exit=1)
+
+                # Verify that -n works as expected.
+                self.pkgrepo("remove -n -s %s zoo" % dest_repo)
+                # Since package was not removed, this succeeds.
+                self.pkgrepo("remove -n -s %s zoo" % dest_repo) 
+
+                # Verify that -p works as expected.
+                self.pkgrepo("remove -s %s -p nosuchpub zoo" % dest_repo,
+                    exit=1)
+                self.pkgrepo("remove -s %s -p test -p test2 zoo" % dest_repo)
+                self.pkgrepo("remove -s %s -p test zoo" % dest_repo, exit=1)
+                self.pkgrepo("remove -s %s -p test2 zoo" % dest_repo, exit=1)
+
+                # Restore repository for next test.
+                shutil.rmtree(dest_repo)
+                shutil.copytree(src_repo, dest_repo)
+
+                # Verify a single version of a package can be removed and that
+                # package files will not be removed since older versions still
+                # reference them.  (This also tests that packages that only
+                # exist for one publisher in a multi-publisher repository can
+                # be removed.)
+                repo = self.get_repo(dest_repo)
+                mpath = repo.manifest(published[4])
+                self.pkgrepo("remove -s %s [email protected]" % dest_repo)
+
+                # The manifest should no longer exist.
+                self.assert_(not os.path.exists(mpath))
+
+                # These two files are in use by other packages so should still
+                # exist.
+                repo.file(self.fhashes["tmp/empty"])
+                repo.file(self.fhashes["tmp/truck1"])
+
+                # This file was only referenced by [email protected] so should be gone.
+                self.assertRaises(sr.RepositoryFileNotFoundError, repo.file,
+                    self.fhashes["tmp/truck2"])
+
+                # Restore repository for next test.
+                shutil.rmtree(dest_repo)
+                shutil.copytree(src_repo, dest_repo)
+
+                # Verify that all versions of a specific package can be removed
+                # and that only files not referenced by other packages are
+                # removed.
+                self.pkgrepo("remove -s %s truck" % dest_repo)
+
+                # This file is still in use by other packages.
+                repo = self.get_repo(dest_repo)
+                repo.file(self.fhashes["tmp/empty"])
+                repo.file(self.fhashes["tmp/truck1"])
+
+                # These files should have been removed since other packages
+                # don't reference them.
+                self.assertRaises(sr.RepositoryFileNotFoundError, repo.file,
+                    self.fhashes["tmp/truck2"])
+
+                # Restore repository for next test.
+                shutil.rmtree(dest_repo)
+                shutil.copytree(src_repo, dest_repo)
+
+                # Verify that removing all packages that reference files
+                # results in all files being removed and an empty file_root
+                # for the repository.
+                repo = self.get_repo(dest_repo)
+                mpaths = []
+                for f in published:
+                        if "tree" in f or "truck" in f:
+                                mpaths.append(repo.manifest(f))
+
+                self.pkgrepo("remove -s %s tree truck" % dest_repo)
+
+                self.assertRaises(sr.RepositoryFileNotFoundError, repo.file,
+                    self.fhashes["tmp/empty"])
+                self.assertRaises(sr.RepositoryFileNotFoundError, repo.file,
+                    self.fhashes["tmp/truck1"])
+                self.assertRaises(sr.RepositoryFileNotFoundError, repo.file,
+                    self.fhashes["tmp/truck2"])
+
+                # Verify that directories for manifests no longer exist since
+                # all versions were removed.
+                for mpath in mpaths:
+                        pdir = os.path.dirname(mpath)
+                        self.assert_(not os.path.exists(pdir))
+
+                # Verify that entries for each package that was removed no
+                # longer exist in the catalog, but do exist in the catalog's
+                # updatelog.
+                repo = self.get_repo(dest_repo)
+                for pfx in ("test", "test2"):
+                        c = repo.get_catalog(pub=pfx)
+                        for f in c.fmris():
+                                self.assert_(f.pkg_name not in ("tree",
+                                    "truck"))
+
+                        removed = set()
+                        for name in c.updates:
+                                ulog = pkg.catalog.CatalogUpdate(name,
+                                    meta_root=c.meta_root, sign=False)
+
+                                for pfmri, op_type, op_time, md in ulog.updates():
+                                        if op_type == ulog.REMOVE:
+                                                removed.add(str(pfmri))
+
+                        expected = set(
+                            f
+                            for f in published
+                            if ("tree" in f or "truck" in f) and \
+                                f.startswith("pkg://%s/" % pfx)
+                        )
+                        self.assertEqualDiff(expected, removed)
+
+                # Verify repository file_root is empty.
+                for rstore in repo.rstores:
+                        if not rstore.publisher:
+                                continue
+                        self.assert_(not os.listdir(rstore.file_root))
+
+                # Cleanup.
+                shutil.rmtree(src_repo)
+                shutil.rmtree(dest_repo)
+
 
 if __name__ == "__main__":
         unittest.main()