Skip to content

Commit 7ab75e4

Browse files
committed
Merge remote-tracking branch 'origin/GT-3333_dev747368_JVM_utf8_strings'
(fixes NationalSecurityAgency#1255)
2 parents 9556e6e + 09ba78b commit 7ab75e4

File tree

3 files changed

+157
-13
lines changed

3 files changed

+157
-13
lines changed

Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
*/
1616
package ghidra.program.model.data;
1717

18-
import static ghidra.program.model.data.EndianSettingsDefinition.ENDIAN;
19-
import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER;
18+
import static ghidra.program.model.data.EndianSettingsDefinition.*;
19+
import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.*;
2020
import static ghidra.program.model.data.StringLayoutEnum.*;
21-
import static ghidra.program.model.data.TranslationSettingsDefinition.TRANSLATION;
21+
import static ghidra.program.model.data.TranslationSettingsDefinition.*;
2222

2323
import java.nio.charset.Charset;
2424
import java.util.HashMap;
@@ -155,10 +155,10 @@ public static StringDataInstance getStringDataInstance(DataType dataType, MemBuf
155155
private final String translatedValue;
156156
private final Endian endianSetting;
157157

158-
private boolean showTranslation;
159-
private RENDER_ENUM renderSetting;
158+
private final boolean showTranslation;
159+
private final RENDER_ENUM renderSetting;
160160

161-
private int length;
161+
private final int length;
162162
private final MemBuffer buf;
163163

164164
protected StringDataInstance() {
@@ -171,13 +171,16 @@ protected StringDataInstance() {
171171
stringLayout = StringLayoutEnum.FIXED_LEN;
172172
endianSetting = null;
173173
renderSetting = RENDER_ENUM.ALL;
174+
length = 0;
175+
showTranslation = false;
174176
}
175177

176178
/**
177179
* Creates a string instance using the data in the {@link MemBuffer} and the settings
178180
* pulled from the {@link AbstractStringDataType string data type}.
179-
*
180-
* @param stringDataType {@link AbstractStringDataType} common string base data type.
181+
*
182+
* @param dataType {@link DataType} of the string, either a {@link AbstractStringDataType} derived type
183+
* or an {@link ArrayStringable} element-of-char-array type.
181184
* @param settings {@link Settings} attached to the data location.
182185
* @param buf {@link MemBuffer} containing the data.
183186
* @param length Length passed from the caller to the datatype. -1 indicates a 'probe'
@@ -189,9 +192,10 @@ public StringDataInstance(DataType dataType, Settings settings, MemBuffer buf, i
189192
this.buf = buf;
190193
this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings);
191194
this.charSize = CharsetInfo.getInstance().getCharsetCharSize(charsetName);
192-
// NOTE: for now only handle padding for charSize == 1
193-
this.paddedCharSize =
194-
charSize == 1 ? getDataOrganization(dataType).getCharSize() : charSize;
195+
// NOTE: for now only handle padding for charSize == 1 and the data type is an array of elements, not a "string"
196+
this.paddedCharSize = (dataType instanceof ArrayStringable) && (charSize == 1) //
197+
? getDataOrganization(dataType).getCharSize()
198+
: charSize;
195199
this.stringLayout = getLayoutFromDataType(dataType);
196200
this.showTranslation = TRANSLATION.isShowTranslated(settings);
197201
this.translatedValue = TRANSLATION.getTranslatedValue(settings);
@@ -507,7 +511,7 @@ private StringLayoutEnum getOffcutLayout() {
507511

508512
private byte[] getBytesFromMemBuff(MemBuffer memBuffer, int copyLen) {
509513
// round copyLen down to multiple of paddedCharSize
510-
copyLen &= ~(paddedCharSize - 1);
514+
copyLen = (copyLen / paddedCharSize) * paddedCharSize;
511515

512516
byte[] bytes = new byte[copyLen];
513517
if (memBuffer.getBytes(bytes, 0) != bytes.length) {
@@ -787,7 +791,7 @@ public boolean isShowTranslation() {
787791
* @return String containing the representation of the single char.
788792
*/
789793
public String getCharRepresentation() {
790-
if (length < charSize) {
794+
if (length < charSize /* also covers case of isProbe() */ ) {
791795
return UNKNOWN_DOT_DOT_DOT;
792796
}
793797

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/* ###
2+
* IP: GHIDRA
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package ghidra.program.model.data;
17+
18+
import static org.junit.Assert.*;
19+
20+
import org.junit.Test;
21+
22+
import generic.test.AbstractGTest;
23+
import ghidra.program.model.address.AddressSpace;
24+
import ghidra.program.model.address.GenericAddressSpace;
25+
import ghidra.program.model.mem.ByteMemBufferImpl;
26+
27+
public class ArrayStringableTest extends AbstractGTest {
28+
private ByteMemBufferImpl mb(boolean isBE, int... values) {
29+
byte[] bytes = new byte[values.length];
30+
for (int i = 0; i < values.length; i++) {
31+
bytes[i] = (byte) values[i];
32+
}
33+
GenericAddressSpace gas = new GenericAddressSpace("test", 32, AddressSpace.TYPE_RAM, 1);
34+
return new ByteMemBufferImpl(gas.getAddress(0), bytes, isBE);
35+
}
36+
37+
private SettingsBuilder newset() {
38+
return new SettingsBuilder();
39+
}
40+
41+
private static class DataOrgDTM extends TestDummyDataTypeManager {
42+
private DataOrganization dataOrg;
43+
44+
public DataOrgDTM(int charSize) {
45+
DataOrganizationImpl dataOrgImpl = DataOrganizationImpl.getDefaultOrganization(null);
46+
dataOrgImpl.setCharSize(charSize);
47+
48+
this.dataOrg = dataOrgImpl;
49+
}
50+
51+
@Override
52+
public DataOrganization getDataOrganization() {
53+
return dataOrg;
54+
}
55+
}
56+
57+
private Array mkArray(DataTypeManager dtm, int count) {
58+
CharDataType charDT = new CharDataType(dtm);
59+
Array arrayDT = new ArrayDataType(charDT, count, charDT.getLength(), dtm);
60+
61+
return arrayDT;
62+
}
63+
64+
65+
private Array array10char1 = mkArray(new DataOrgDTM(1), 10);
66+
private Array array10char2 = mkArray(new DataOrgDTM(2), 10);
67+
private Array array6char4 = mkArray(new DataOrgDTM(4), 6);
68+
private Array array10char5 = mkArray(new DataOrgDTM(5), 3);
69+
70+
//-------------------------------------------------------------------------------------
71+
// get string rep of an array of various sized character elements
72+
//-------------------------------------------------------------------------------------
73+
@Test
74+
public void testGetRep_1bytechar() {
75+
// because the char size is 1, default charset will be us-ascii, which matches character element size
76+
ByteMemBufferImpl buf = mb(false, 'h', 'e', 'l', 'l', 'o', 0, 'x', 'y', 0, 0);
77+
78+
assertEquals("\"hello\"",
79+
array10char1.getRepresentation(buf, newset(), array10char1.getLength()));
80+
}
81+
82+
@Test
83+
public void testGetRep_2bytechar() {
84+
// because char size is 2, default charset will be utf-16, which matches character element size
85+
ByteMemBufferImpl buf =
86+
mb(false, 'h', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0);
87+
88+
assertEquals("u\"hello\"",
89+
array10char2.getRepresentation(buf, newset(), array10char2.getLength()));
90+
}
91+
92+
@Test
93+
public void testGetRep_4bytechar() {
94+
// because char size is 4, default charset will be utf-32, which matches character element size
95+
ByteMemBufferImpl buf = mb(false, 'h', 0, 0, 0, 'e', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0,
96+
'o', 0, 0, 0, 0, 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0);
97+
98+
assertEquals("U\"hello\"",
99+
array6char4.getRepresentation(buf, newset(), array6char4.getLength()));
100+
}
101+
102+
@Test
103+
public void testGetRep_5bytechar() {
104+
// because the char size isn't normal, charset will default to us-ascii, which does not match
105+
// the element size of the array, triggering padding-removal in the stringdatainstance code.
106+
ByteMemBufferImpl buf =
107+
mb(false, 'h', 'x', 'x', 'x', 'x', 'e', 'x', 'x', 'x', 'x', 0, 0, 0, 0, 0);
108+
109+
assertEquals("\"he\"",
110+
array10char5.getRepresentation(buf, newset(), array10char5.getLength()));
111+
}
112+
}

Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/StringDataTypeTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,19 @@ public class StringDataTypeTest extends AbstractGTest {
4545
private PascalStringDataType pascalString = new PascalStringDataType();
4646
private PascalUnicodeDataType pascalUtf16String = new PascalUnicodeDataType();
4747

48+
private static class DataOrgDTM extends TestDummyDataTypeManager {
49+
private DataOrganization dataOrg;
50+
51+
public DataOrgDTM(DataOrganization dataOrg) {
52+
this.dataOrg = dataOrg;
53+
}
54+
55+
@Override
56+
public DataOrganization getDataOrganization() {
57+
return dataOrg;
58+
}
59+
}
60+
4861
private ByteMemBufferImpl mb(boolean isBE, int... values) {
4962
byte[] bytes = new byte[values.length];
5063
for (int i = 0; i < values.length; i++) {
@@ -216,6 +229,21 @@ public void testGetStringValue_utf8() {
216229
assertEquals("ab\ucc01\u1202", actual);
217230
}
218231

232+
@Test
233+
public void testGetStringValue_utf8_2bytechar_dataorg() {
234+
// test UTF-8 when the dataorg specifies a 2byte character (ie. JVM)
235+
ByteMemBufferImpl buf = mb(false, 'a', 'b', 'c');
236+
237+
DataOrganizationImpl dataOrg = DataOrganizationImpl.getDefaultOrganization(null);
238+
dataOrg.setCharSize(2);
239+
DataOrgDTM dtm = new DataOrgDTM(dataOrg);
240+
StringUTF8DataType wideCharUTF8DT = new StringUTF8DataType(dtm);
241+
242+
String actual = (String) wideCharUTF8DT.getValue(buf, newset(), buf.getLength());
243+
244+
assertEquals("abc", actual);
245+
}
246+
219247
@Test
220248
public void testGetStringValue_utf16_le() {
221249
ByteMemBufferImpl buf = mb(false, //

0 commit comments

Comments
 (0)