Compare commits

..

3 Commits

Author SHA1 Message Date
Benedek László 0c697627aa completed parsing 2024-04-12 23:19:43 +02:00
Benedek László e72ae7bebb neater code 2024-04-12 21:54:28 +02:00
Benedek László d5e3237bb2 StackMapFrames 2024-04-12 21:51:28 +02:00
3 changed files with 340 additions and 163 deletions

9
.clang-format Normal file
View File

@ -0,0 +1,9 @@
BasedOnStyle: Chromium
IndentWidth: 2
ColumnLimit: 160
SpaceAfterCStyleCast: false
UseTab: Never
AllowShortIfStatementsOnASingleLine: false
AlignTrailingComments: false
SpacesBeforeTrailingComments: 1
AlignConsecutiveMacros: Consecutive

View File

@ -4,23 +4,27 @@
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SWAP_ENDIANNESS
void read_u2(void* ptr, size_t n, FILE* file) {
u2 buffer[n];
fread(&buffer, sizeof(u2), n, file);
#ifdef SWAP_ENDIANNESS
for (size_t i = 0; i < n; i++) {
buffer[i] = (buffer[i] >> 8) | (buffer[i] << 8);
}
#endif
memcpy(ptr, &buffer, n * sizeof(u2));
}
void read_u4(void* ptr, size_t n, FILE* file) {
u4 buffer[n];
fread(&buffer, sizeof(u4), n, file);
#ifdef SWAP_ENDIANNESS
for (size_t i = 0; i < n; i++) {
buffer[i] = ((buffer[i] >> 24) & 0xff) | // move byte 3 to byte 0
@ -29,9 +33,140 @@ void read_u4(void *ptr, size_t n, FILE *file) {
((buffer[i] << 24) & 0xff000000);
}
#endif
memcpy(ptr, &buffer, n * sizeof(u4));
}
stack_frame_type stack_frame_type_enum(u2 frame_type) {
if (0 <= frame_type && frame_type <= 63) {
return SAME;
} else if (64 <= frame_type && frame_type <= 127) {
return SAME_LOCALS_1_STACK_ITEM_FRAME;
} else if (frame_type == 247) {
return SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED;
} else if (248 <= frame_type && frame_type <= 250) {
return CHOP;
} else if (frame_type == 251) {
return SAME_FRAME_EXTENDED;
} else if (252 <= frame_type && frame_type <= 254) {
return APPEND;
} else if (frame_type == 255) {
return FULL_FRAME;
} else {
printf("unknown stack frame type: %hu\n", frame_type);
assert(0);
}
}
void parse_attribute(ClassFile* cf, attribute_info* base, FILE* file) {
char* name = (char*)cf->constant_pool[base->attribute_name_index - 1].info.utf8_info.bytes;
if (strcmp("ConstantValue", name) == 0) {
read_u2(&(base->info.constant_value_attribute.constantvalue_index), 1, file);
} else if (strcmp("Code", name) == 0) {
read_u2(&(base->info.code_attribute.max_stack), 2, file);
read_u4(&(base->info.code_attribute.code_length), 1, file);
base->info.code_attribute.code = (u1*)malloc(base->info.code_attribute.code_length);
fread(base->info.code_attribute.code, base->info.code_attribute.code_length, 1, file);
read_u2(&(base->info.code_attribute.exception_table_length), 1, file);
base->info.code_attribute.exception_table = malloc(sizeof(u2) * 4 * base->info.code_attribute.exception_table_length);
for (u2 i = 0; i < base->info.code_attribute.exception_table_length; i++) {
read_u2(&(base->info.code_attribute.exception_table[i].start_pc), 4, file);
}
read_u2(&(base->info.code_attribute.attributes_count), 1, file);
base->info.code_attribute.attributes = malloc(sizeof(attribute_info) * base->info.code_attribute.attributes_count);
for (u2 i = 0; i < base->info.code_attribute.attributes_count; i++) {
attribute_info* attr = &(base->info.code_attribute.attributes[i]);
read_u2(&(attr->attribute_name_index), 1, file);
read_u4(&(attr->attribute_length), 1, file);
parse_attribute(cf, attr, file);
}
} else if (strcmp("StackMapTable", name) == 0) {
read_u2(&(base->info.stack_map_table_attribute.number_of_entries), 1, file);
base->info.stack_map_table_attribute.entries = malloc(sizeof(stack_map_frame) * base->info.stack_map_table_attribute.number_of_entries);
for (u2 i = 0; i < base->info.stack_map_table_attribute.number_of_entries; i++) {
stack_map_frame* entry = &(base->info.stack_map_table_attribute.entries[i]);
fread(&(entry->frame_type), 1, 1, file);
u1 frame_type = entry->frame_type;
switch (stack_frame_type_enum(frame_type)) {
case SAME:
break;
case SAME_LOCALS_1_STACK_ITEM_FRAME:
fread(&(entry->same_locals_1_stack_item_frame.stack[0].tag), 1, 1, file);
if (entry->same_locals_1_stack_item_frame.stack[0].tag == ITEM_Object) {
read_u2(&(entry->same_locals_1_stack_item_frame.stack[0].Object_variable_info.cpool_index), 1, file);
} else if (entry->same_locals_1_stack_item_frame.stack[0].tag == ITEM_Uninitialized) {
read_u2(&(entry->same_locals_1_stack_item_frame.stack[0].Uninitialized_variable_info.offset), 1, file);
}
break;
case SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED:
read_u2(&(entry->same_locals_1_stack_item_frame_extended.offset_delta), 1, file);
fread(&(entry->same_locals_1_stack_item_frame_extended.stack[0].tag), 1, 1, file);
if (entry->same_locals_1_stack_item_frame_extended.stack[0].tag == ITEM_Object) {
read_u2(&(entry->same_locals_1_stack_item_frame_extended.stack[0].Object_variable_info.cpool_index), 1, file);
} else if (entry->same_locals_1_stack_item_frame_extended.stack[0].tag == ITEM_Uninitialized) {
read_u2(&(entry->same_locals_1_stack_item_frame_extended.stack[0].Uninitialized_variable_info.offset), 1, file);
}
break;
case CHOP:
read_u2(&(entry->chop_frame.offset_delta), 1, file);
break;
case SAME_FRAME_EXTENDED:
read_u2(&(entry->same_frame_extended.offset_delta), 1, file);
break;
case APPEND:
read_u2(&(entry->append_frame.offset_delta), 1, file);
u2 count = frame_type - 251;
entry->append_frame.locals = malloc(sizeof(verification_type_info) * count);
for (u2 j = 0; j < count; j++) {
fread(&(entry->append_frame.locals[j].tag), 1, 1, file);
if (entry->append_frame.locals[j].tag == ITEM_Object) {
read_u2(&(entry->append_frame.locals[j].Object_variable_info.cpool_index), 1, file);
} else if (entry->append_frame.locals[j].tag == ITEM_Uninitialized) {
read_u2(&(entry->append_frame.locals[j].Uninitialized_variable_info.offset), 1, file);
}
}
break;
case FULL_FRAME:
read_u2(&(entry->full_frame.offset_delta), 1, file);
read_u2(&(entry->full_frame.number_of_locals), 1, file);
entry->full_frame.locals = malloc(sizeof(verification_type_info) * entry->full_frame.number_of_locals);
for (u2 j = 0; j < entry->full_frame.number_of_locals; j++) {
fread(&(entry->full_frame.locals[j].tag), 1, 1, file);
if (entry->full_frame.locals[j].tag == ITEM_Object) {
read_u2(&(entry->full_frame.locals[j].Object_variable_info.cpool_index), 1, file);
} else if (entry->full_frame.locals[j].tag == ITEM_Uninitialized) {
read_u2(&(entry->full_frame.locals[j].Uninitialized_variable_info.offset), 1, file);
}
}
read_u2(&(entry->full_frame.number_of_stack_items), 1, file);
entry->full_frame.stack = malloc(sizeof(verification_type_info) * entry->full_frame.number_of_stack_items);
for (u2 j = 0; j < entry->full_frame.number_of_stack_items; j++) {
fread(&(entry->full_frame.stack[j].tag), 1, 1, file);
if (entry->full_frame.stack[j].tag == ITEM_Object) {
read_u2(&(entry->full_frame.stack[j].Object_variable_info.cpool_index), 1, file);
} else if (entry->full_frame.stack[j].tag == ITEM_Uninitialized) {
read_u2(&(entry->full_frame.stack[j].Uninitialized_variable_info.offset), 1, file);
}
}
break;
}
}
} else if (strcmp("NestHost", name) == 0) {
read_u2(&(base->info.nest_host_attribute.host_class_index), 1, file);
} else if (strcmp("NestMembers", name) == 0) {
read_u2(&(base->info.nest_members_attribute.number_of_classes), 1, file);
base->info.nest_members_attribute.classes = malloc(sizeof(u2) * base->info.nest_members_attribute.number_of_classes);
read_u2(base->info.nest_members_attribute.classes, base->info.nest_members_attribute.number_of_classes, file);
} else if (strcmp("PermittedSubclasses", name) == 0) {
read_u2(&(base->info.permitted_subclasses_attribute.number_of_classes), 1, file);
base->info.permitted_subclasses_attribute.classes = malloc(sizeof(u2) * base->info.permitted_subclasses_attribute.number_of_classes);
read_u2(base->info.permitted_subclasses_attribute.classes, base->info.permitted_subclasses_attribute.number_of_classes, file);
} else {
printf("unhandled attribute type: %s\n", name);
fseek(file, base->attribute_length, SEEK_CUR);
}
}
ClassFile* ClassFile_load(const char* path) {
FILE* file = fopen(path, "rb");
if (file == NULL)
@ -45,94 +180,77 @@ ClassFile *ClassFile_load(const char *path) {
read_u2(&(cf->minor_version), 3, file);
// constant_pool
cf->constant_pool =
(cp_info *)malloc(sizeof(cp_info) * (cf->constant_pool_count - 1));
cf->constant_pool = (cp_info*)malloc(sizeof(cp_info) * (cf->constant_pool_count - 1));
for (u2 i = 0; i < (cf->constant_pool_count - 1); i++) {
// cp_info.tag 2bytes
fread(&(cf->constant_pool[i].tag), 1, 1, file);
size_t size;
switch (cf->constant_pool[i].tag) {
cp_info* cp = &(cf->constant_pool[i]);
switch (cp->tag) {
case CONSTANT_Class:
read_u2(&(cf->constant_pool[i].info.class_info.name_index), 1, file);
read_u2(&(cp->info.class_info.name_index), 1, file);
break;
case CONSTANT_Fieldref:
read_u2(&(cf->constant_pool[i].info.fieldref_info.class_index), 1, file);
read_u2(&(cf->constant_pool[i].info.fieldref_info.name_and_type_index), 1,
file);
read_u2(&(cp->info.fieldref_info.class_index), 1, file);
read_u2(&(cp->info.fieldref_info.name_and_type_index), 1, file);
break;
case CONSTANT_Methodref:
read_u2(&(cf->constant_pool[i].info.methodref_info.class_index), 1, file);
read_u2(&(cf->constant_pool[i].info.methodref_info.name_and_type_index),
1, file);
read_u2(&(cp->info.methodref_info.class_index), 1, file);
read_u2(&(cp->info.methodref_info.name_and_type_index), 1, file);
break;
case CONSTANT_InterfaceMethodref:
read_u2(&(cf->constant_pool[i].info.interface_methodred_info.class_index),
1, file);
read_u2(&(cf->constant_pool[i]
.info.interface_methodred_info.name_and_type_index),
1, file);
read_u2(&(cp->info.interface_methodref_info.class_index), 1, file);
read_u2(&(cp->info.interface_methodref_info.name_and_type_index), 1, file);
break;
case CONSTANT_String:
read_u2(&(cf->constant_pool[i].info.string_info.string_index), 1, file);
read_u2(&(cp->info.string_info.string_index), 1, file);
break;
case CONSTANT_Integer:
read_u4(&(cf->constant_pool[i].info.integer_info.bytes), 1, file);
read_u4(&(cp->info.integer_info.bytes), 1, file);
break;
case CONSTANT_Float:
read_u4(&(cf->constant_pool[i].info.float_info.bytes), 1, file);
read_u4(&(cp->info.float_info.bytes), 1, file);
break;
case CONSTANT_Long:
read_u4(&(cf->constant_pool[i].info.long_info.high_bytes), 1, file);
read_u4(&(cf->constant_pool[i].info.long_info.low_bytes), 1, file);
read_u4(&(cp->info.long_info.high_bytes), 1, file);
read_u4(&(cp->info.long_info.low_bytes), 1, file);
break;
case CONSTANT_Double:
read_u4(&(cf->constant_pool[i].info.double_info.high_bytes), 1, file);
read_u4(&(cf->constant_pool[i].info.double_info.low_bytes), 1, file);
read_u4(&(cp->info.double_info.high_bytes), 1, file);
read_u4(&(cp->info.double_info.low_bytes), 1, file);
break;
case CONSTANT_NameAndType:
read_u2(&(cf->constant_pool[i].info.name_and_type_info.name_index), 1,
file);
read_u2(&(cf->constant_pool[i].info.name_and_type_info.descriptor_index),
1, file);
read_u2(&(cp->info.name_and_type_info.name_index), 1, file);
read_u2(&(cp->info.name_and_type_info.descriptor_index), 1, file);
break;
case CONSTANT_Utf8:
read_u2(&(cf->constant_pool[i].info.utf8_info.length), 1, file);
fread(&(cf->constant_pool[i].info.utf8_info.bytes),
cf->constant_pool[i].info.utf8_info.length, 1, file);
read_u2(&(cp->info.utf8_info.length), 1, file);
cp->info.utf8_info.bytes = (u1*)malloc(cp->info.utf8_info.length);
fread(cp->info.utf8_info.bytes, cp->info.utf8_info.length, 1, file);
break;
case CONSTANT_MethodHandle:
fread(&(cf->constant_pool[i].info.method_handle_info.reference_kind), 1,
1, file);
read_u2(&(cf->constant_pool[i].info.method_handle_info.reference_index),
1, file);
fread(&(cp->info.method_handle_info.reference_kind), 1, 1, file);
read_u2(&(cp->info.method_handle_info.reference_index), 1, file);
break;
case CONSTANT_MethodType:
read_u2(&(cf->constant_pool[i].info.method_type_info.descriptor_index), 1,
file);
read_u2(&(cp->info.method_type_info.descriptor_index), 1, file);
break;
case CONSTANT_Dynamic:
read_u2(
&(cf->constant_pool[i].info.dynamic_info.bootstrap_method_attr_index),
1, file);
read_u2(&(cf->constant_pool[i].info.dynamic_info.name_and_type_index), 1,
file);
read_u2(&(cp->info.dynamic_info.bootstrap_method_attr_index), 1, file);
read_u2(&(cp->info.dynamic_info.name_and_type_index), 1, file);
break;
case CONSTANT_InvokeDynamic:
read_u2(&(cf->constant_pool[i]
.info.invoke_dynamic_info.bootstrap_method_attr_index),
1, file);
read_u2(
&(cf->constant_pool[i].info.invoke_dynamic_info.name_and_type_index),
1, file);
read_u2(&(cp->info.invoke_dynamic_info.bootstrap_method_attr_index), 1, file);
read_u2(&(cp->info.invoke_dynamic_info.name_and_type_index), 1, file);
break;
case CONSTANT_Module:
read_u2(&(cf->constant_pool[i].info.module_info.name_index), 1, file);
read_u2(&(cp->info.module_info.name_index), 1, file);
break;
case CONSTANT_Package:
read_u2(&(cf->constant_pool[i].info.package_info.name_index), 1, file);
read_u2(&(cp->info.package_info.name_index), 1, file);
break;
default:
printf("unknown constant tag: %hhu\n", cf->constant_pool[i].tag);
printf("unknown constant tag: %hhu\n", cp->tag);
// assert(0);
}
}
@ -154,18 +272,12 @@ ClassFile *ClassFile_load(const char *path) {
// access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes,
// attributes_count 2bytes
read_u2(&(cf->fields[i].access_flags), 4, file);
cf->fields[i].attributes = (attribute_info *)malloc(
sizeof(attribute_info) * cf->fields[i].attributes_count);
cf->fields[i].attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->fields[i].attributes_count);
for (u2 j = 0; j < cf->fields[i].attributes_count; j++) {
// attribute_name_index 2bytes, attribute_length 4bytes
read_u2(&(cf->fields[i].attributes[j].attribute_name_index), 1, file);
read_u4(&(cf->fields[i].attributes[j].attribute_length), 1, file);
// TODO: parse attributes by tag
// this has an unknown amount of layers, should be moved out to a function
cf->fields[i].attributes[j].info =
(u1 *)malloc(cf->fields[i].attributes[j].attribute_length);
fread(cf->fields[i].attributes[j].info,
cf->fields[i].attributes[j].attribute_length, 1, file);
parse_attribute(cf, &(cf->fields[i].attributes[j]), file);
}
}
@ -178,18 +290,12 @@ ClassFile *ClassFile_load(const char *path) {
// access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes,
// attributes_count 2bytes
read_u2(&(cf->methods[i].access_flags), 4, file);
cf->methods[i].attributes = (attribute_info *)malloc(
sizeof(attribute_info) * cf->methods[i].attributes_count);
cf->methods[i].attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->methods[i].attributes_count);
for (u2 j = 0; j < cf->methods[i].attributes_count; j++) {
// attribute_name_index 2bytes, attribute_length 4bytes
read_u2(&(cf->methods[i].attributes[j].attribute_name_index), 1, file);
read_u4(&(cf->methods[i].attributes[j].attribute_length), 1, file);
// TODO: parse attributes by tag
// this has an unknown amount of layers, should be moved out to a function
cf->methods[i].attributes[j].info =
(u1 *)malloc(cf->methods[i].attributes[j].attribute_length);
fread(cf->methods[i].attributes[j].info,
cf->methods[i].attributes[j].attribute_length, 1, file);
parse_attribute(cf, &(cf->methods[i].attributes[j]), file);
}
}
@ -197,16 +303,12 @@ ClassFile *ClassFile_load(const char *path) {
read_u2(&(cf->attributes_count), 1, file);
// attributes
cf->attributes =
(attribute_info *)malloc(sizeof(attribute_info) * cf->attributes_count);
cf->attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->attributes_count);
for (u2 i = 0; i < cf->attributes_count; i++) {
// attribute_name_index 2bytes, attribute_length 4bytes
read_u2(&(cf->attributes[i].attribute_name_index), 1, file);
read_u4(&(cf->attributes[i].attribute_length), 1, file);
cf->attributes[i].info = (u1 *)malloc(cf->attributes[i].attribute_length);
// TODO: parse attributes by tag
// this has an unknown amount of layers, should be moved out to a function
fread(cf->attributes[i].info, cf->attributes[i].attribute_length, 1, file);
parse_attribute(cf, &(cf->attributes[i]), file);
}
fclose(file);
@ -215,7 +317,8 @@ ClassFile *ClassFile_load(const char *path) {
}
void ClassFile_info(const ClassFile* cf) {
printf("magic=%X\n"
printf(
"magic=%X\n"
"minor_version=%hu\n"
"major_version=%hu\n"
"constant_pool_count=%hu\n"
@ -226,8 +329,6 @@ void ClassFile_info(const ClassFile *cf) {
"fields_count=%hu\n"
"methods_count=%hu\n"
"attributes_count=%hu\n",
cf->magic, cf->minor_version, cf->major_version,
cf->constant_pool_count, cf->access_flags, cf->this_class,
cf->super_class, cf->interfaces_count, cf->fields_count,
cf->methods_count, cf->attributes_count);
cf->magic, cf->minor_version, cf->major_version, cf->constant_pool_count, cf->access_flags, cf->this_class, cf->super_class, cf->interfaces_count,
cf->fields_count, cf->methods_count, cf->attributes_count);
}

View File

@ -116,7 +116,7 @@ typedef struct {
CONSTANT_Class_info class_info;
CONSTANT_Fieldref_info fieldref_info;
CONSTANT_Methodref_info methodref_info;
CONSTANT_InterfaceMethodref_info interface_methodred_info;
CONSTANT_InterfaceMethodref_info interface_methodref_info;
CONSTANT_String_info string_info;
CONSTANT_Integer_info integer_info;
CONSTANT_Float_info float_info;
@ -138,8 +138,6 @@ typedef struct {
} ConstantValue_attribute;
typedef struct {
u2 attribute_name_index;
u4 attribute_length;
u2 max_stack;
u2 max_locals;
u4 code_length;
@ -155,17 +153,92 @@ typedef struct {
void* attributes;
} Code_attribute;
// TODO: stack_map_frame
typedef enum {
ITEM_Top = 0,
ITEM_Integer = 1,
ITEM_Float = 2,
ITEM_Double = 3,
ITEM_Long = 4,
ITEM_Null = 5,
ITEM_UninitializedThis = 6,
ITEM_Object = 7,
ITEM_Uninitialized = 8,
} VERIFICATION_TAG;
typedef struct {
u1 tag;
union {
struct {
} Top_variable_info;
struct {
} Integer_variable_info;
struct {
} Float_variable_info;
struct {
} Long_variable_info;
struct {
} Double_variable_info;
struct {
} Null_variable_info;
struct {
} UninitializedThis_variable_info;
struct {
u2 cpool_index;
} Object_variable_info;
struct {
u2 offset;
} Uninitialized_variable_info;
};
} verification_type_info;
typedef enum {
SAME,
SAME_LOCALS_1_STACK_ITEM_FRAME,
SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED,
CHOP,
SAME_FRAME_EXTENDED,
APPEND,
FULL_FRAME,
} stack_frame_type;
typedef struct {
u1 frame_type;
union {
struct {
} same_frame; // 0-63
struct {
verification_type_info stack[1];
} same_locals_1_stack_item_frame; // 64-127
struct {
u2 offset_delta;
verification_type_info stack[1];
} same_locals_1_stack_item_frame_extended; // 247
struct {
u2 offset_delta;
} chop_frame; // 248-250
struct {
u2 offset_delta;
} same_frame_extended; // 251
struct {
u2 offset_delta;
verification_type_info* locals; // [frame_type - 251]
} append_frame; // 252-254
struct {
u2 offset_delta;
u2 number_of_locals;
verification_type_info* locals;
u2 number_of_stack_items;
verification_type_info* stack;
} full_frame; // 255
};
} stack_map_frame;
typedef struct {
u2 attribute_name_index;
u4 attribute_length;
u2 number_of_entries;
//stack_map_frame *entries;
stack_map_frame* entries;
} StackMapTable_attribute;
typedef struct {
u2 attribute_name_index;
u4 attribute_length;
u2 num_bootstrap_methods;
struct {
u2 bootstrap_method_ref;
@ -175,21 +248,15 @@ typedef struct {
} BootstrapMethods_attribute;
typedef struct {
u2 attribute_name_index;
u4 attribute_length;
u2 host_class_index;
} NestHost_attribute;
typedef struct {
u2 attribute_name_index;
u4 attribute_length;
u2 number_of_classes;
u2* classes;
} NestMembers_attribute;
typedef struct {
u2 attribute_name_index;
u4 attribute_length;
u2 number_of_classes;
u2* classes;
} PermittedSubclasses_attribute;