15037 action parsing can't handle unicode objects which don't decode to ascii
13739 manifest set_content fails when provided unicode strings
--- a/src/modules/actions/_actions.c Mon May 03 21:54:24 2010 -0400
+++ b/src/modules/actions/_actions.c Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <Python.h>
@@ -92,7 +91,10 @@
static PyObject *
_fromstr(PyObject *self, PyObject *args)
{
- char *s, *str, *keystr, *slashmap = NULL;
+ char *s = NULL;
+ char *str = NULL;
+ char *keystr = NULL;
+ char *slashmap = NULL;
int strl;
int i, ks, vs, keysize;
char quote;
@@ -112,13 +114,20 @@
#define malformed(msg) set_malformederr(str, i, (msg))
#define invalid(msg) set_invaliderr(str, (msg))
#define CLEANUP_REFS \
+ PyMem_Free(str);\
Py_XDECREF(key);\
Py_XDECREF(type);\
Py_XDECREF(attr);\
Py_XDECREF(attrs);\
Py_XDECREF(hash);
- if (PyArg_ParseTuple(args, "s#", &str, &strl) == 0) {
+ /*
+ * The action string is currently assumed to be a stream of bytes that
+ * are valid UTF-8. This method works regardless of whether the string
+ * object provided is a Unicode object, string object, or a character
+ * buffer.
+ */
+ if (PyArg_ParseTuple(args, "et#", "utf-8", &str, &strl) == 0) {
PyErr_SetString(PyExc_ValueError, "could not parse argument");
return (NULL);
}
@@ -126,15 +135,20 @@
s = strpbrk(str, " \t");
i = strl;
- if (s == NULL)
+ if (s == NULL) {
+ PyMem_Free(str);
return (malformed("no attributes"));
+ }
- if ((type = PyString_FromStringAndSize(str, s - str)) == NULL)
+ if ((type = PyString_FromStringAndSize(str, s - str)) == NULL) {
+ PyMem_Free(str);
return (NULL);
+ }
ks = vs = s - str;
state = WS;
if ((attrs = PyDict_New()) == NULL) {
+ PyMem_Free(str);
Py_DECREF(type);
return (NULL);
}
@@ -212,8 +226,10 @@
if (slashmap == NULL) {
int smlen = strl - (i - vs);
slashmap = calloc(1, smlen + 1);
- if (slashmap == NULL)
+ if (slashmap == NULL) {
+ PyMem_Free(str);
return (PyErr_NoMemory());
+ }
}
i++;
if (str[i] == '\\' || str[i] == quote) {
@@ -228,6 +244,7 @@
attrlen = i - vs;
sattr = calloc(1, attrlen + 1);
if (sattr == NULL) {
+ PyMem_Free(str);
free(slashmap);
return (PyErr_NoMemory());
}
@@ -311,6 +328,7 @@
}
}
+ PyMem_Free(str);
if (hash == NULL)
hash = Py_None;
--- a/src/modules/manifest.py Mon May 03 21:54:24 2010 -0400
+++ b/src/modules/manifest.py Fri May 07 16:26:13 2010 -0500
@@ -21,8 +21,7 @@
#
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
#
from collections import namedtuple
@@ -37,7 +36,7 @@
import pkg.portable as portable
import pkg.variant as variant
-from pkg.misc import EmptyI, expanddirs, PKG_FILE_MODE, PKG_DIR_MODE
+from pkg.misc import EmptyDict, EmptyI, expanddirs, PKG_FILE_MODE, PKG_DIR_MODE
from pkg.actions.attribute import AttributeAction
ManifestDifference = namedtuple("ManifestDifference", "added changed removed")
@@ -88,6 +87,7 @@
self.variants = {} # variants seen in package
self.facets = {} # facets seen in package
self.attributes = {} # package-wide attributes
+ self.signatures = EmptyDict
def __str__(self):
r = ""
@@ -341,7 +341,7 @@
# can't be in a manifest twice. (The problem of having the same
# action more than once in packages that can be installed
# together has to be solved somewhere else, though.)
- if isinstance(content, str):
+ if isinstance(content, basestring):
if signatures:
# Generate manifest signature based upon input
# content, but only if signatures were
@@ -513,7 +513,11 @@
manifest content, and returns a hash value."""
sha_1 = hashlib.sha1()
- sha_1.update(mfstcontent)
+ if isinstance(mfstcontent, unicode):
+ # Byte stream expected, so pass encoded.
+ sha_1.update(mfstcontent.encode("utf-8"))
+ else:
+ sha_1.update(mfstcontent)
return sha_1.hexdigest()
--- a/src/tests/api/t_catalog.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/api/t_catalog.py Fri May 07 16:26:13 2010 -0500
@@ -21,8 +21,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
import testutils
if __name__ == "__main__":
@@ -108,7 +107,7 @@
def __gen_manifest(self, f):
m = manifest.Manifest()
- lines = (
+ lines = unicode(
"depend [email protected] type=require\n"
"set name=facet.devel value=true\n"
"set name=info.classification "
@@ -124,8 +123,8 @@
"set name=pkg.summary value=\"Sparc Summary %s\""
" variant.arch=sparc\n"
"set name=pkg.summary:th value=\"ซอฟต์แวร์ %s\"\n"
- "set name=pkg.description value=\"Desc %s\"\n" % \
- (f, f, f, f, f))
+ "set name=pkg.description value=\"Desc %s\"\n", "utf-8") % \
+ (f, f, f, f, f)
if f.pkg_name == "zpkg":
lines += "set name=pkg.depend.install-hold value=test\n"
--- a/src/tests/api/t_manifest.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/api/t_manifest.py Fri May 07 16:26:13 2010 -0500
@@ -1,4 +1,5 @@
#!/usr/bin/python
+# -*- coding: utf-8 -*-
#
# CDDL HEADER START
#
@@ -20,8 +21,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
import unittest
import tempfile
@@ -46,13 +46,13 @@
def setUp(self):
pkg5unittest.Pkg5TestCase.setUp(self)
- self.m1 = manifest.Manifest();
+ self.m1 = manifest.Manifest()
self.m1_contents = """\
set com.sun,test=true
depend type=require fmri=pkg:/library/libc
file fff555fff mode=0555 owner=sch group=staff path=/usr/bin/i386/sort isa=i386
"""
- self.m2 = manifest.Manifest();
+ self.m2 = manifest.Manifest()
self.m2_contents = """\
set com.sun,test=false
set com.sun,data=true
@@ -118,6 +118,23 @@
str(self.m1).index("group=staff")
str(self.m1).index("isa=i386")
+ # Verify set_content with a byte string with unicode data
+ # works.
+ bstr = "set name=pkg.summary:th value=\"ซอฟต์แวร์ \""
+ m = manifest.Manifest()
+ m.set_content(bstr)
+ output = list(m.as_lines())[0].rstrip()
+ self.assertEqual(bstr, output)
+ self.assert_(isinstance(output, str))
+
+ # Verify set_content with a Unicode string results in a
+ # byte string (for now).
+ m = manifest.Manifest()
+ m.set_content(unicode(bstr, "utf-8"))
+ output = list(m.as_lines())[0].rstrip()
+ self.assertEqual(bstr, output)
+ self.assert_(isinstance(output, str))
+
def test_diffs1(self):
""" humanized_differences runs to completion """
@@ -342,6 +359,20 @@
self.assertRaises(api_errors.BadManifestSignatures,
self.m2.validate, signatures=self.m2_signatures)
+ # Verify a manifest that has its content set using a byte string
+ # has the same signature as that of one set with a Unicode
+ # string when the content is the same.
+ bstr = "set name=pkg.summary:th value=\"ซอฟต์แวร์ \""
+ m1 = manifest.Manifest()
+ m1.set_content(bstr, signatures=True)
+ output1 = "".join(m1.as_lines())
+
+ m2 = manifest.Manifest()
+ m2.set_content(unicode(bstr, "utf-8"), signatures=True)
+ output2 = "".join(m2.as_lines())
+ self.assertEqualDiff(output1, output2)
+ self.assertEqualDiff(m1.signatures, m2.signatures)
+
if __name__ == "__main__":
unittest.main()
--- a/src/tests/gui/t_pm_addrepo.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_addrepo.py Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
import testutils
if __name__ == "__main__":
@@ -36,6 +35,9 @@
class TestPkgGuiAddRepoBasics(pkg5unittest.SingleDepotTestCase):
+ # pygtk requires unicode as the default encoding.
+ default_utf8 = True
+
foo10 = """
open [email protected],5.11-0
add set name="description" value="Some package1 description"
--- a/src/tests/gui/t_pm_helpabout.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_helpabout.py Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
import testutils
if __name__ == "__main__":
@@ -38,6 +37,9 @@
# Only start/stop the depot once (instead of for every test)
persistent_setup = True
+ # pygtk requires unicode as the default encoding.
+ default_utf8 = True
+
foo10 = """
open [email protected],5.11-0
add set name="description" value="Some package1 description"
--- a/src/tests/gui/t_pm_install_py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_install_py Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
from cli import testutils
@@ -37,6 +36,9 @@
# Only start/stop the depot once (instead of for every test)
persistent_depot = True
+ # pygtk requires unicode as the default encoding.
+ default_utf8 = True
+
foo10 = """
open [email protected],5.11-0
add set name="description" value="Some package1 description"
--- a/src/tests/gui/t_pm_rmrepo.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_rmrepo.py Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
import testutils
if __name__ == "__main__":
@@ -36,6 +35,9 @@
class TestPkgGuiRmRepoBasics(pkg5unittest.ManyDepotTestCase):
+ # pygtk requires unicode as the default encoding.
+ default_utf8 = True
+
foo1 = """
open foo@1,5.11-0
close """
--- a/src/tests/gui/t_pm_start.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_start.py Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
import testutils
if __name__ == "__main__":
@@ -36,6 +35,9 @@
class TestPkgGuiStartBasics(pkg5unittest.SingleDepotTestCase):
+ # pygtk requires unicode as the default encoding.
+ default_utf8 = True
+
foo10 = """
open [email protected],5.11-0
add set name="description" value="Some package description"
--- a/src/tests/gui/t_pm_uninstall.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/t_pm_uninstall.py Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
import testutils
if __name__ == "__main__":
@@ -36,6 +35,9 @@
class TestPkgGuiUninstallBasics(pkg5unittest.SingleDepotTestCase):
+ # pygtk requires unicode as the default encoding.
+ default_utf8 = True
+
foo10 = """
open [email protected],5.11-0
add set name="description" value="Some package1 description"
--- a/src/tests/gui/testutils.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/gui/testutils.py Fri May 07 16:26:13 2010 -0500
@@ -20,8 +20,7 @@
# CDDL HEADER END
#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
import os
import sys
--- a/src/tests/pkg5unittest.py Mon May 03 21:54:24 2010 -0400
+++ b/src/tests/pkg5unittest.py Fri May 07 16:26:13 2010 -0500
@@ -991,6 +991,11 @@
unittest.TestSuite.__init__(self, tests)
self.timing = {}
+ # The site module deletes the function to change the
+ # default encoding so a forced reload of sys has to
+ # be done at least once.
+ reload(sys)
+
def cleanup_and_die(self, inst, info):
print >> sys.stderr, \
"\nCtrl-C: Attempting cleanup during %s" % info
@@ -1009,9 +1014,22 @@
persistent_setup = getattr(self._tests[0],
"persistent_setup", False)
except IndexError:
- # No tests, thats ok.
+ # No tests; that's ok.
return
+ # This is needed because the import of some modules (such as
+ # pygtk or pango) causes the default encoding for Python to be
+ # changed which can can cause tests to succeed when they should
+ # fail due to unicode issues:
+ # https://bugzilla.gnome.org/show_bug.cgi?id=132040
+ default_utf8 = getattr(self._tests[0], "default_utf8", False)
+ if not default_utf8:
+ # Now reset to the default a standard Python
+ # distribution uses.
+ sys.setdefaultencoding("ascii")
+ else:
+ sys.setdefaultencoding("utf-8")
+
def setUp_donothing():
pass