15037 action parsing can't handle unicode objects which don't decode to ascii
authorShawn Walker <shawn.walker@oracle.com>
Fri, 07 May 2010 16:26:13 -0500
changeset 1890 011afb71c52a
parent 1889 e670eae1a5f4
child 1891 14057bf2a87d
15037 action parsing can't handle unicode objects which don't decode to ascii 13739 manifest set_content fails when provided unicode strings
src/modules/actions/_actions.c
src/modules/manifest.py
src/tests/api/t_catalog.py
src/tests/api/t_manifest.py
src/tests/gui/t_pm_addrepo.py
src/tests/gui/t_pm_helpabout.py
src/tests/gui/t_pm_install_py
src/tests/gui/t_pm_rmrepo.py
src/tests/gui/t_pm_start.py
src/tests/gui/t_pm_uninstall.py
src/tests/gui/testutils.py
src/tests/pkg5unittest.py
--- a/src/modules/actions/_actions.c	Mon May 03 21:54:24 2010 -0400
+++ b/src/modules/actions/_actions.c	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <Python.h>
@@ -92,7 +91,10 @@
 static PyObject *
 _fromstr(PyObject *self, PyObject *args)
 {
-	char *s, *str, *keystr, *slashmap = NULL;
+	char *s = NULL;
+	char *str = NULL;
+	char *keystr = NULL;
+	char *slashmap = NULL;
 	int strl;
 	int i, ks, vs, keysize;
 	char quote;
@@ -112,13 +114,20 @@
 #define malformed(msg) set_malformederr(str, i, (msg))
 #define invalid(msg) set_invaliderr(str, (msg))
 #define CLEANUP_REFS \
+	PyMem_Free(str);\
 	Py_XDECREF(key);\
 	Py_XDECREF(type);\
 	Py_XDECREF(attr);\
 	Py_XDECREF(attrs);\
 	Py_XDECREF(hash);
 
-	if (PyArg_ParseTuple(args, "s#", &str, &strl) == 0) {
+	/*
+	 * The action string is currently assumed to be a stream of bytes that
+	 * are valid UTF-8.  This method works regardless of whether the string
+	 * object provided is a Unicode object, string object, or a character
+	 * buffer.
+	 */
+	if (PyArg_ParseTuple(args, "et#", "utf-8", &str, &strl) == 0) {
 		PyErr_SetString(PyExc_ValueError, "could not parse argument");
 		return (NULL);
 	}
@@ -126,15 +135,20 @@
 	s = strpbrk(str, " \t");
 
 	i = strl;
-	if (s == NULL)
+	if (s == NULL) {
+		PyMem_Free(str);
 		return (malformed("no attributes"));
+	}
 
-	if ((type = PyString_FromStringAndSize(str, s - str)) == NULL)
+	if ((type = PyString_FromStringAndSize(str, s - str)) == NULL) {
+		PyMem_Free(str);
 		return (NULL);
+	}
 
 	ks = vs = s - str;
 	state = WS;
 	if ((attrs = PyDict_New()) == NULL) {
+		PyMem_Free(str);
 		Py_DECREF(type);
 		return (NULL);
 	}
@@ -212,8 +226,10 @@
 				if (slashmap == NULL) {
 					int smlen = strl - (i - vs);
 					slashmap = calloc(1, smlen + 1);
-					if (slashmap == NULL)
+					if (slashmap == NULL) {
+						PyMem_Free(str);
 						return (PyErr_NoMemory());
+					}
 				}
 				i++;
 				if (str[i] == '\\' || str[i] == quote) {
@@ -228,6 +244,7 @@
 					attrlen = i - vs;
 					sattr = calloc(1, attrlen + 1);
 					if (sattr == NULL) {
+						PyMem_Free(str);
 						free(slashmap);
 						return (PyErr_NoMemory());
 					}
@@ -311,6 +328,7 @@
 		}
 	}
 
+	PyMem_Free(str);
 	if (hash == NULL)
 		hash = Py_None;
 
--- a/src/modules/manifest.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/modules/manifest.py	Fri May 07 16:26:13 2010 -0500
@@ -21,8 +21,7 @@
 #
 
 #
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 #
 
 from collections import namedtuple
@@ -37,7 +36,7 @@
 import pkg.portable as portable
 import pkg.variant as variant
 
-from pkg.misc import EmptyI, expanddirs, PKG_FILE_MODE, PKG_DIR_MODE
+from pkg.misc import EmptyDict, EmptyI, expanddirs, PKG_FILE_MODE, PKG_DIR_MODE
 from pkg.actions.attribute import AttributeAction
 
 ManifestDifference = namedtuple("ManifestDifference", "added changed removed")
@@ -88,6 +87,7 @@
                 self.variants = {}   # variants seen in package
                 self.facets = {}     # facets seen in package
                 self.attributes = {} # package-wide attributes
+                self.signatures = EmptyDict
 
         def __str__(self):
                 r = ""
@@ -341,7 +341,7 @@
                 # can't be in a manifest twice.  (The problem of having the same
                 # action more than once in packages that can be installed
                 # together has to be solved somewhere else, though.)
-                if isinstance(content, str):
+                if isinstance(content, basestring):
                         if signatures:
                                 # Generate manifest signature based upon input
                                 # content, but only if signatures were
@@ -513,7 +513,11 @@
                 manifest content, and returns a hash value."""
 
                 sha_1 = hashlib.sha1()
-                sha_1.update(mfstcontent)
+                if isinstance(mfstcontent, unicode):
+                        # Byte stream expected, so pass encoded.
+                        sha_1.update(mfstcontent.encode("utf-8"))
+                else:
+                        sha_1.update(mfstcontent)
 
                 return sha_1.hexdigest()
 
--- a/src/tests/api/t_catalog.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/api/t_catalog.py	Fri May 07 16:26:13 2010 -0500
@@ -21,8 +21,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -108,7 +107,7 @@
 
         def __gen_manifest(self, f):
                 m = manifest.Manifest()
-                lines = (
+                lines = unicode(
                     "depend [email protected] type=require\n"
                     "set name=facet.devel value=true\n"
                     "set name=info.classification "
@@ -124,8 +123,8 @@
                     "set name=pkg.summary value=\"Sparc Summary %s\""
                     " variant.arch=sparc\n"
                     "set name=pkg.summary:th value=\"ซอฟต์แวร์ %s\"\n"
-                    "set name=pkg.description value=\"Desc %s\"\n" % \
-                    (f, f, f, f, f))
+                    "set name=pkg.description value=\"Desc %s\"\n", "utf-8") % \
+                    (f, f, f, f, f)
 
                 if f.pkg_name == "zpkg":
                         lines += "set name=pkg.depend.install-hold value=test\n"
--- a/src/tests/api/t_manifest.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/api/t_manifest.py	Fri May 07 16:26:13 2010 -0500
@@ -1,4 +1,5 @@
 #!/usr/bin/python
+# -*- coding: utf-8 -*-
 #
 # CDDL HEADER START
 #
@@ -20,8 +21,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import unittest
 import tempfile
@@ -46,13 +46,13 @@
         def setUp(self):
                 pkg5unittest.Pkg5TestCase.setUp(self)
 
-                self.m1 = manifest.Manifest();
+                self.m1 = manifest.Manifest()
                 self.m1_contents = """\
 set com.sun,test=true
 depend type=require fmri=pkg:/library/libc
 file fff555fff mode=0555 owner=sch group=staff path=/usr/bin/i386/sort isa=i386
 """
-                self.m2 = manifest.Manifest();
+                self.m2 = manifest.Manifest()
                 self.m2_contents = """\
 set com.sun,test=false
 set com.sun,data=true
@@ -118,6 +118,23 @@
                 str(self.m1).index("group=staff")
                 str(self.m1).index("isa=i386")
 
+                # Verify set_content with a byte string with unicode data
+                # works.
+                bstr = "set name=pkg.summary:th value=\"ซอฟต์แวร์ \""
+                m = manifest.Manifest()
+                m.set_content(bstr)
+                output = list(m.as_lines())[0].rstrip()
+                self.assertEqual(bstr, output)
+                self.assert_(isinstance(output, str))
+
+                # Verify set_content with a Unicode string results in a
+                # byte string (for now).
+                m = manifest.Manifest()
+                m.set_content(unicode(bstr, "utf-8"))
+                output = list(m.as_lines())[0].rstrip()
+                self.assertEqual(bstr, output)
+                self.assert_(isinstance(output, str))
+
         def test_diffs1(self):
                 """ humanized_differences runs to completion """
 
@@ -342,6 +359,20 @@
                 self.assertRaises(api_errors.BadManifestSignatures,
                     self.m2.validate, signatures=self.m2_signatures)
 
+                # Verify a manifest that has its content set using a byte string
+                # has the same signature as that of one set with a Unicode
+                # string when the content is the same.
+                bstr = "set name=pkg.summary:th value=\"ซอฟต์แวร์ \""
+                m1 = manifest.Manifest()
+                m1.set_content(bstr, signatures=True)
+                output1 = "".join(m1.as_lines())
+
+                m2 = manifest.Manifest()
+                m2.set_content(unicode(bstr, "utf-8"), signatures=True)
+                output2 = "".join(m2.as_lines())
+                self.assertEqualDiff(output1, output2)
+                self.assertEqualDiff(m1.signatures, m2.signatures)
+
 
 if __name__ == "__main__":
         unittest.main()
--- a/src/tests/gui/t_pm_addrepo.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_addrepo.py	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -36,6 +35,9 @@
 
 class TestPkgGuiAddRepoBasics(pkg5unittest.SingleDepotTestCase):
 
+        # pygtk requires unicode as the default encoding.
+        default_utf8 = True
+
         foo10 = """
             open [email protected],5.11-0
             add set name="description" value="Some package1 description"
--- a/src/tests/gui/t_pm_helpabout.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_helpabout.py	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -38,6 +37,9 @@
         # Only start/stop the depot once (instead of for every test)
         persistent_setup = True
 
+        # pygtk requires unicode as the default encoding.
+        default_utf8 = True
+
         foo10 = """
             open [email protected],5.11-0
             add set name="description" value="Some package1 description"
--- a/src/tests/gui/t_pm_install_py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_install_py	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 
 from cli import testutils
 
@@ -37,6 +36,9 @@
         # Only start/stop the depot once (instead of for every test)
         persistent_depot = True
 
+        # pygtk requires unicode as the default encoding.
+        default_utf8 = True
+
         foo10 = """
             open [email protected],5.11-0
             add set name="description" value="Some package1 description"
--- a/src/tests/gui/t_pm_rmrepo.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_rmrepo.py	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -36,6 +35,9 @@
 
 class TestPkgGuiRmRepoBasics(pkg5unittest.ManyDepotTestCase):
 
+        # pygtk requires unicode as the default encoding.
+        default_utf8 = True
+
         foo1 = """
             open foo@1,5.11-0
             close """
--- a/src/tests/gui/t_pm_start.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_start.py	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -36,6 +35,9 @@
 
 class TestPkgGuiStartBasics(pkg5unittest.SingleDepotTestCase):
 
+        # pygtk requires unicode as the default encoding.
+        default_utf8 = True
+
         foo10 = """
             open [email protected],5.11-0
             add set name="description" value="Some package description"
--- a/src/tests/gui/t_pm_uninstall.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_uninstall.py	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import testutils
 if __name__ == "__main__":
@@ -36,6 +35,9 @@
 
 class TestPkgGuiUninstallBasics(pkg5unittest.SingleDepotTestCase):
 
+        # pygtk requires unicode as the default encoding.
+        default_utf8 = True
+
         foo10 = """
             open [email protected],5.11-0
             add set name="description" value="Some package1 description"
--- a/src/tests/gui/testutils.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/testutils.py	Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
 # CDDL HEADER END
 #
 
-# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 
 import os
 import sys
--- a/src/tests/pkg5unittest.py	Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/pkg5unittest.py	Fri May 07 16:26:13 2010 -0500
@@ -991,6 +991,11 @@
                 unittest.TestSuite.__init__(self, tests)
                 self.timing = {}
 
+                # The site module deletes the function to change the
+                # default encoding so a forced reload of sys has to
+                # be done at least once.
+                reload(sys)
+
         def cleanup_and_die(self, inst, info):
                 print >> sys.stderr, \
                     "\nCtrl-C: Attempting cleanup during %s" % info
@@ -1009,9 +1014,22 @@
                         persistent_setup = getattr(self._tests[0],
                             "persistent_setup", False)
                 except IndexError:
-                        # No tests, thats ok.
+                        # No tests; that's ok.
                         return
 
+                # This is needed because the import of some modules (such as
+                # pygtk or pango) causes the default encoding for Python to be
+                # changed which can can cause tests to succeed when they should
+                # fail due to unicode issues:
+                #     https://bugzilla.gnome.org/show_bug.cgi?id=132040
+                default_utf8 = getattr(self._tests[0], "default_utf8", False)
+                if not default_utf8:
+                        # Now reset to the default a standard Python
+                        # distribution uses.
+                        sys.setdefaultencoding("ascii")
+                else:
+                        sys.setdefaultencoding("utf-8")
+
                 def setUp_donothing():
                         pass