java-llvm/classfile.c

347 lines
16 KiB
C
Raw Normal View History

2024-04-12 12:50:11 +00:00
#include "classfile.h"
2024-04-12 13:49:14 +00:00
#include <assert.h>
2024-04-12 12:50:11 +00:00
#include <memory.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
2024-04-12 19:51:28 +00:00
#include <string.h>
2024-04-12 12:50:11 +00:00
#define SWAP_ENDIANNESS
2024-04-12 19:51:28 +00:00
void read_u2(void* ptr, size_t n, FILE* file) {
2024-04-12 12:50:11 +00:00
u2 buffer[n];
fread(&buffer, sizeof(u2), n, file);
#ifdef SWAP_ENDIANNESS
for (size_t i = 0; i < n; i++) {
buffer[i] = (buffer[i] >> 8) | (buffer[i] << 8);
}
#endif
2024-04-12 13:49:14 +00:00
memcpy(ptr, &buffer, n * sizeof(u2));
2024-04-12 12:50:11 +00:00
}
2024-04-12 19:51:28 +00:00
void read_u4(void* ptr, size_t n, FILE* file) {
2024-04-12 12:50:11 +00:00
u4 buffer[n];
2024-04-12 13:49:14 +00:00
fread(&buffer, sizeof(u4), n, file);
2024-04-12 12:50:11 +00:00
#ifdef SWAP_ENDIANNESS
for (size_t i = 0; i < n; i++) {
2024-04-12 19:51:28 +00:00
buffer[i] = ((buffer[i] >> 24) & 0xff) | // move byte 3 to byte 0
2024-04-12 12:50:11 +00:00
((buffer[i] << 8) & 0xff0000) | // move byte 1 to byte 2
2024-04-12 19:51:28 +00:00
((buffer[i] >> 8) & 0xff00) | // move byte 2 to byte 1
2024-04-12 12:50:11 +00:00
((buffer[i] << 24) & 0xff000000);
}
#endif
2024-04-12 13:49:14 +00:00
memcpy(ptr, &buffer, n * sizeof(u4));
2024-04-12 12:50:11 +00:00
}
2024-04-12 19:51:28 +00:00
stack_frame_type stack_frame_type_enum(u2 frame_type) {
if (0 <= frame_type && frame_type <= 63) {
return SAME;
} else if (64 <= frame_type && frame_type <= 127) {
return SAME_LOCALS_1_STACK_ITEM_FRAME;
} else if (frame_type == 247) {
return SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED;
} else if (248 <= frame_type && frame_type <= 250) {
return CHOP;
} else if (frame_type == 251) {
return SAME_FRAME_EXTENDED;
} else if (252 <= frame_type && frame_type <= 254) {
return APPEND;
} else if (frame_type == 255) {
return FULL_FRAME;
} else {
printf("unknown stack frame type: %hu\n", frame_type);
assert(0);
}
}
void parse_attribute(ClassFile* cf, attribute_info* base, FILE* file) {
char* name = (char*)cf->constant_pool[base->attribute_name_index - 1].info.utf8_info.bytes;
printf("%s\n", name);
if (strcmp("ConstantValue", name) == 0) {
read_u2(&(base->info.constant_value_attribute.constantvalue_index), 1, file);
} else if (strcmp("Code", name) == 0) {
read_u2(&(base->info.code_attribute.max_stack), 2, file);
read_u4(&(base->info.code_attribute.code_length), 1, file);
base->info.code_attribute.code = (u1*)malloc(base->info.code_attribute.code_length);
fread(base->info.code_attribute.code, base->info.code_attribute.code_length, 1, file);
read_u2(&(base->info.code_attribute.exception_table_length), 1, file);
base->info.code_attribute.exception_table = malloc(sizeof(u2) * 4 * base->info.code_attribute.exception_table_length);
for (u2 i = 0; i < base->info.code_attribute.exception_table_length; i++) {
read_u2(&(base->info.code_attribute.exception_table[i].start_pc), 4, file);
}
read_u2(&(base->info.code_attribute.attributes_count), 1, file);
base->info.code_attribute.attributes = malloc(sizeof(attribute_info) * base->info.code_attribute.attributes_count);
for (u2 i = 0; i < base->info.code_attribute.attributes_count; i++) {
attribute_info* attr = &(base->info.code_attribute.attributes[i]);
read_u2(&(attr->attribute_name_index), 1, file);
read_u4(&(attr->attribute_length), 1, file);
parse_attribute(cf, attr, file);
}
} else if (strcmp("StackMapTable", name) == 0) {
read_u2(&(base->info.stack_map_table_attribute.number_of_entries), 1, file);
base->info.stack_map_table_attribute.entries = malloc(sizeof(stack_map_frame) * base->info.stack_map_table_attribute.number_of_entries);
for (u2 i = 0; i < base->info.stack_map_table_attribute.number_of_entries; i++) {
2024-04-12 19:54:28 +00:00
stack_map_frame* entry = &(base->info.stack_map_table_attribute.entries[i]);
fread(&(entry->frame_type), 1, 1, file);
u1 frame_type = entry->frame_type;
2024-04-12 19:51:28 +00:00
switch (stack_frame_type_enum(frame_type)) {
case SAME:
break;
case SAME_LOCALS_1_STACK_ITEM_FRAME:
2024-04-12 19:54:28 +00:00
fread(&(entry->same_locals_1_stack_item_frame.stack[0].tag), 1, 1, file);
if (entry->same_locals_1_stack_item_frame.stack[0].tag == ITEM_Object) {
read_u2(&(entry->same_locals_1_stack_item_frame.stack[0].Object_variable_info.cpool_index), 1, file);
} else if (entry->same_locals_1_stack_item_frame.stack[0].tag == ITEM_Uninitialized) {
read_u2(&(entry->same_locals_1_stack_item_frame.stack[0].Uninitialized_variable_info.offset), 1, file);
2024-04-12 19:51:28 +00:00
}
break;
case SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED:
2024-04-12 19:54:28 +00:00
read_u2(&(entry->same_locals_1_stack_item_frame_extended.offset_delta), 1, file);
fread(&(entry->same_locals_1_stack_item_frame_extended.stack[0].tag), 1, 1, file);
if (entry->same_locals_1_stack_item_frame_extended.stack[0].tag == ITEM_Object) {
read_u2(&(entry->same_locals_1_stack_item_frame_extended.stack[0].Object_variable_info.cpool_index), 1, file);
} else if (entry->same_locals_1_stack_item_frame_extended.stack[0].tag == ITEM_Uninitialized) {
read_u2(&(entry->same_locals_1_stack_item_frame_extended.stack[0].Uninitialized_variable_info.offset), 1, file);
2024-04-12 19:51:28 +00:00
}
break;
case CHOP:
2024-04-12 19:54:28 +00:00
read_u2(&(entry->chop_frame.offset_delta), 1, file);
2024-04-12 19:51:28 +00:00
break;
case SAME_FRAME_EXTENDED:
2024-04-12 19:54:28 +00:00
read_u2(&(entry->same_frame_extended.offset_delta), 1, file);
2024-04-12 19:51:28 +00:00
break;
case APPEND:
2024-04-12 19:54:28 +00:00
read_u2(&(entry->append_frame.offset_delta), 1, file);
2024-04-12 19:51:28 +00:00
u2 count = frame_type - 251;
2024-04-12 19:54:28 +00:00
entry->append_frame.locals = malloc(sizeof(verification_type_info) * count);
2024-04-12 19:51:28 +00:00
for (u2 j = 0; j < count; j++) {
2024-04-12 19:54:28 +00:00
fread(&(entry->append_frame.locals[j].tag), 1, 1, file);
if (entry->append_frame.locals[j].tag == ITEM_Object) {
read_u2(&(entry->append_frame.locals[j].Object_variable_info.cpool_index), 1, file);
} else if (entry->append_frame.locals[j].tag == ITEM_Uninitialized) {
read_u2(&(entry->append_frame.locals[j].Uninitialized_variable_info.offset), 1, file);
2024-04-12 19:51:28 +00:00
}
}
break;
case FULL_FRAME:
2024-04-12 19:54:28 +00:00
read_u2(&(entry->full_frame.offset_delta), 1, file);
read_u2(&(entry->full_frame.number_of_locals), 1, file);
entry->full_frame.locals = malloc(sizeof(verification_type_info) * entry->full_frame.number_of_locals);
for (u2 j = 0; j < entry->full_frame.number_of_locals; j++) {
fread(&(entry->full_frame.locals[j].tag), 1, 1, file);
if (entry->full_frame.locals[j].tag == ITEM_Object) {
read_u2(&(entry->full_frame.locals[j].Object_variable_info.cpool_index), 1, file);
} else if (entry->full_frame.locals[j].tag == ITEM_Uninitialized) {
read_u2(&(entry->full_frame.locals[j].Uninitialized_variable_info.offset), 1, file);
2024-04-12 19:51:28 +00:00
}
}
2024-04-12 19:54:28 +00:00
read_u2(&(entry->full_frame.number_of_stack_items), 1, file);
entry->full_frame.stack = malloc(sizeof(verification_type_info) * entry->full_frame.number_of_stack_items);
for (u2 j = 0; j < entry->full_frame.number_of_stack_items; j++) {
fread(&(entry->full_frame.stack[j].tag), 1, 1, file);
if (entry->full_frame.stack[j].tag == ITEM_Object) {
read_u2(&(entry->full_frame.stack[j].Object_variable_info.cpool_index), 1, file);
} else if (entry->full_frame.stack[j].tag == ITEM_Uninitialized) {
read_u2(&(entry->full_frame.stack[j].Uninitialized_variable_info.offset), 1, file);
2024-04-12 19:51:28 +00:00
}
}
break;
}
}
} else if (strcmp("NestHost", name) == 0) {
printf("NestHost not implemented.\n");
assert(0);
} else if (strcmp("NestMembers", name)) {
printf("NestMembers not implemented.\n");
assert(0);
} else if (strcmp("PermittedSubclasses", name) == 0) {
printf("PermittedSubclasses not implemented.\n");
assert(0);
} else {
printf("unhandled attribute type: %s", name);
}
}
ClassFile* ClassFile_load(const char* path) {
FILE* file = fopen(path, "rb");
2024-04-12 12:50:11 +00:00
if (file == NULL)
return NULL;
2024-04-12 19:51:28 +00:00
ClassFile* cf = (ClassFile*)malloc(sizeof(ClassFile));
2024-04-12 12:50:11 +00:00
// magic 4bytes, minor_version 2bytes, major_version 2bytes,
// constant_pool_count 2bytes
2024-04-12 13:49:14 +00:00
read_u4(&(cf->magic), 1, file);
read_u2(&(cf->minor_version), 3, file);
2024-04-12 12:50:11 +00:00
// constant_pool
2024-04-12 19:51:28 +00:00
cf->constant_pool = (cp_info*)malloc(sizeof(cp_info) * (cf->constant_pool_count - 1));
2024-04-12 12:50:11 +00:00
for (u2 i = 0; i < (cf->constant_pool_count - 1); i++) {
// cp_info.tag 2bytes
2024-04-12 13:49:14 +00:00
fread(&(cf->constant_pool[i].tag), 1, 1, file);
2024-04-12 12:50:11 +00:00
size_t size;
switch (cf->constant_pool[i].tag) {
2024-04-12 19:51:28 +00:00
case CONSTANT_Class:
read_u2(&(cf->constant_pool[i].info.class_info.name_index), 1, file);
break;
case CONSTANT_Fieldref:
read_u2(&(cf->constant_pool[i].info.fieldref_info.class_index), 1, file);
read_u2(&(cf->constant_pool[i].info.fieldref_info.name_and_type_index), 1, file);
break;
case CONSTANT_Methodref:
read_u2(&(cf->constant_pool[i].info.methodref_info.class_index), 1, file);
read_u2(&(cf->constant_pool[i].info.methodref_info.name_and_type_index), 1, file);
break;
case CONSTANT_InterfaceMethodref:
read_u2(&(cf->constant_pool[i].info.interface_methodred_info.class_index), 1, file);
read_u2(&(cf->constant_pool[i].info.interface_methodred_info.name_and_type_index), 1, file);
break;
case CONSTANT_String:
read_u2(&(cf->constant_pool[i].info.string_info.string_index), 1, file);
break;
case CONSTANT_Integer:
read_u4(&(cf->constant_pool[i].info.integer_info.bytes), 1, file);
break;
case CONSTANT_Float:
read_u4(&(cf->constant_pool[i].info.float_info.bytes), 1, file);
break;
case CONSTANT_Long:
read_u4(&(cf->constant_pool[i].info.long_info.high_bytes), 1, file);
read_u4(&(cf->constant_pool[i].info.long_info.low_bytes), 1, file);
break;
case CONSTANT_Double:
read_u4(&(cf->constant_pool[i].info.double_info.high_bytes), 1, file);
read_u4(&(cf->constant_pool[i].info.double_info.low_bytes), 1, file);
break;
case CONSTANT_NameAndType:
read_u2(&(cf->constant_pool[i].info.name_and_type_info.name_index), 1, file);
read_u2(&(cf->constant_pool[i].info.name_and_type_info.descriptor_index), 1, file);
break;
case CONSTANT_Utf8:
read_u2(&(cf->constant_pool[i].info.utf8_info.length), 1, file);
cf->constant_pool[i].info.utf8_info.bytes = (u1*)malloc(cf->constant_pool[i].info.utf8_info.length);
fread(cf->constant_pool[i].info.utf8_info.bytes, cf->constant_pool[i].info.utf8_info.length, 1, file);
break;
case CONSTANT_MethodHandle:
fread(&(cf->constant_pool[i].info.method_handle_info.reference_kind), 1, 1, file);
read_u2(&(cf->constant_pool[i].info.method_handle_info.reference_index), 1, file);
break;
case CONSTANT_MethodType:
read_u2(&(cf->constant_pool[i].info.method_type_info.descriptor_index), 1, file);
break;
case CONSTANT_Dynamic:
read_u2(&(cf->constant_pool[i].info.dynamic_info.bootstrap_method_attr_index), 1, file);
read_u2(&(cf->constant_pool[i].info.dynamic_info.name_and_type_index), 1, file);
break;
case CONSTANT_InvokeDynamic:
read_u2(&(cf->constant_pool[i].info.invoke_dynamic_info.bootstrap_method_attr_index), 1, file);
read_u2(&(cf->constant_pool[i].info.invoke_dynamic_info.name_and_type_index), 1, file);
break;
case CONSTANT_Module:
read_u2(&(cf->constant_pool[i].info.module_info.name_index), 1, file);
break;
case CONSTANT_Package:
read_u2(&(cf->constant_pool[i].info.package_info.name_index), 1, file);
break;
default:
printf("unknown constant tag: %hhu\n", cf->constant_pool[i].tag);
// assert(0);
2024-04-12 12:50:11 +00:00
}
}
// access_flags 2bytes, this_class 2bytes, super_class 2bytes, interface_count
2024-04-12 13:49:14 +00:00
// 2byte
read_u2(&(cf->access_flags), 4, file);
2024-04-12 12:50:11 +00:00
// interfaces
2024-04-12 19:51:28 +00:00
cf->interfaces = (u2*)malloc(2 * cf->interfaces_count);
2024-04-12 13:49:14 +00:00
read_u2(cf->interfaces, cf->interfaces_count, file);
2024-04-12 12:50:11 +00:00
// fields_count 2bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->fields_count), 1, file);
2024-04-12 12:50:11 +00:00
// fields
2024-04-12 19:51:28 +00:00
cf->fields = (field_info*)malloc(sizeof(field_info) * cf->fields_count);
2024-04-12 12:50:11 +00:00
for (u2 i = 0; i < cf->fields_count; i++) {
// access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes,
// attributes_count 2bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->fields[i].access_flags), 4, file);
2024-04-12 19:51:28 +00:00
cf->fields[i].attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->fields[i].attributes_count);
2024-04-12 12:50:11 +00:00
for (u2 j = 0; j < cf->fields[i].attributes_count; j++) {
// attribute_name_index 2bytes, attribute_length 4bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->fields[i].attributes[j].attribute_name_index), 1, file);
read_u4(&(cf->fields[i].attributes[j].attribute_length), 1, file);
2024-04-12 15:07:31 +00:00
// TODO: parse attributes by tag
// this has an unknown amount of layers, should be moved out to a function
2024-04-12 19:51:28 +00:00
// cf->fields[i].attributes[j].info =
// (u1 *)malloc(cf->fields[i].attributes[j].attribute_length);
// fread(cf->fields[i].attributes[j].info,
// cf->fields[i].attributes[j].attribute_length, 1, file);
parse_attribute(cf, &(cf->fields[i].attributes[j]), file);
2024-04-12 12:50:11 +00:00
}
}
// methods_count 2bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->methods_count), 1, file);
2024-04-12 12:50:11 +00:00
// methods
2024-04-12 19:51:28 +00:00
cf->methods = (method_info*)malloc(sizeof(method_info) * cf->methods_count);
2024-04-12 12:50:11 +00:00
for (u2 i = 0; i < cf->methods_count; i++) {
// access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes,
// attributes_count 2bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->methods[i].access_flags), 4, file);
2024-04-12 19:51:28 +00:00
cf->methods[i].attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->methods[i].attributes_count);
2024-04-12 12:50:11 +00:00
for (u2 j = 0; j < cf->methods[i].attributes_count; j++) {
// attribute_name_index 2bytes, attribute_length 4bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->methods[i].attributes[j].attribute_name_index), 1, file);
read_u4(&(cf->methods[i].attributes[j].attribute_length), 1, file);
2024-04-12 15:07:31 +00:00
// TODO: parse attributes by tag
// this has an unknown amount of layers, should be moved out to a function
2024-04-12 19:51:28 +00:00
// cf->methods[i].attributes[j].info =
// (u1 *)malloc(cf->methods[i].attributes[j].attribute_length);
// fread(cf->methods[i].attributes[j].info,
// cf->methods[i].attributes[j].attribute_length, 1, file);
parse_attribute(cf, &(cf->methods[i].attributes[j]), file);
2024-04-12 12:50:11 +00:00
}
}
// attributes_count 2bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->attributes_count), 1, file);
2024-04-12 12:50:11 +00:00
// attributes
2024-04-12 19:51:28 +00:00
cf->attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->attributes_count);
2024-04-12 12:50:11 +00:00
for (u2 i = 0; i < cf->attributes_count; i++) {
// attribute_name_index 2bytes, attribute_length 4bytes
2024-04-12 13:49:14 +00:00
read_u2(&(cf->attributes[i].attribute_name_index), 1, file);
read_u4(&(cf->attributes[i].attribute_length), 1, file);
2024-04-12 19:51:28 +00:00
// cf->attributes[i].info = (u1
// *)malloc(cf->attributes[i].attribute_length);
// TODO: parse attributes by tag
// this has an unknown amount of layers, should be moved out to a function
// fread(cf->attributes[i].info, cf->attributes[i].attribute_length, 1,
// file);
parse_attribute(cf, &(cf->attributes[i]), file);
2024-04-12 12:50:11 +00:00
}
fclose(file);
return cf;
}
2024-04-12 19:51:28 +00:00
void ClassFile_info(const ClassFile* cf) {
printf(
"magic=%X\n"
"minor_version=%hu\n"
"major_version=%hu\n"
"constant_pool_count=%hu\n"
"access_flags=%hx\n"
"this_class=%hu\n"
"super_class=%hu\n"
"interfaces_count=%hu\n"
"fields_count=%hu\n"
"methods_count=%hu\n"
"attributes_count=%hu\n",
cf->magic, cf->minor_version, cf->major_version, cf->constant_pool_count, cf->access_flags, cf->this_class, cf->super_class, cf->interfaces_count,
cf->fields_count, cf->methods_count, cf->attributes_count);
2024-04-12 12:50:11 +00:00
}