#include "classfile.h" #include #include #include #include #include #include #include #define SWAP_ENDIANNESS void read_u2(void* ptr, size_t n, FILE* file) { u2 buffer[n]; fread(&buffer, sizeof(u2), n, file); #ifdef SWAP_ENDIANNESS for (size_t i = 0; i < n; i++) { buffer[i] = (buffer[i] >> 8) | (buffer[i] << 8); } #endif memcpy(ptr, &buffer, n * sizeof(u2)); } void read_u4(void* ptr, size_t n, FILE* file) { u4 buffer[n]; fread(&buffer, sizeof(u4), n, file); #ifdef SWAP_ENDIANNESS for (size_t i = 0; i < n; i++) { buffer[i] = ((buffer[i] >> 24) & 0xff) | // move byte 3 to byte 0 ((buffer[i] << 8) & 0xff0000) | // move byte 1 to byte 2 ((buffer[i] >> 8) & 0xff00) | // move byte 2 to byte 1 ((buffer[i] << 24) & 0xff000000); } #endif memcpy(ptr, &buffer, n * sizeof(u4)); } uint64_t merge_bytes(uint32_t high, uint32_t low) { return ((uint64_t)high) << 32 | low; } stack_frame_type stack_frame_type_enum(u2 frame_type) { if (0 <= frame_type && frame_type <= 63) { return SAME; } else if (64 <= frame_type && frame_type <= 127) { return SAME_LOCALS_1_STACK_ITEM_FRAME; } else if (frame_type == 247) { return SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED; } else if (248 <= frame_type && frame_type <= 250) { return CHOP; } else if (frame_type == 251) { return SAME_FRAME_EXTENDED; } else if (252 <= frame_type && frame_type <= 254) { return APPEND; } else if (frame_type == 255) { return FULL_FRAME; } else { printf("unknown stack frame type: %hu\n", frame_type); assert(0); } } ATTRIBUTE_TAGS attribute_tag(char* name) { for (size_t i = 1; i < ATTRIBUTE_TAGS_END; i++) { if (strcmp(ATTRIBUTE_TAGS_STRINGS[i - 1], name) == 0) return (ATTRIBUTE_TAGS)i; } return UNKNOWN; } char* constant_to_string(ClassFile* cf, cp_info info) {} void parse_attribute(ClassFile* cf, attribute_info* base, FILE* file) { char* name = (char*)cf->constant_pool[base->attribute_name_index - 1].info.utf8_info.bytes; base->_tag = attribute_tag(name); switch (base->_tag) { case UNKNOWN: printf("unhandled attribute type: %s\n", name); fseek(file, base->attribute_length, SEEK_CUR); break; case CONSTANT_VALUE: read_u2(&(base->info.constant_value_attribute.constantvalue_index), 1, file); break; case CODE: read_u2(&(base->info.code_attribute.max_stack), 2, file); read_u4(&(base->info.code_attribute.code_length), 1, file); base->info.code_attribute.code = (u1*)malloc(base->info.code_attribute.code_length); fread(base->info.code_attribute.code, base->info.code_attribute.code_length, 1, file); read_u2(&(base->info.code_attribute.exception_table_length), 1, file); base->info.code_attribute.exception_table = malloc(sizeof(u2) * 4 * base->info.code_attribute.exception_table_length); for (u2 i = 0; i < base->info.code_attribute.exception_table_length; i++) { read_u2(&(base->info.code_attribute.exception_table[i].start_pc), 4, file); } read_u2(&(base->info.code_attribute.attributes_count), 1, file); base->info.code_attribute.attributes = malloc(sizeof(attribute_info) * base->info.code_attribute.attributes_count); for (u2 i = 0; i < base->info.code_attribute.attributes_count; i++) { attribute_info* attr = &(base->info.code_attribute.attributes[i]); read_u2(&(attr->attribute_name_index), 1, file); read_u4(&(attr->attribute_length), 1, file); parse_attribute(cf, attr, file); } break; case STACK_MAP_TABLE: read_u2(&(base->info.stack_map_table_attribute.number_of_entries), 1, file); base->info.stack_map_table_attribute.entries = malloc(sizeof(stack_map_frame) * base->info.stack_map_table_attribute.number_of_entries); for (u2 i = 0; i < base->info.stack_map_table_attribute.number_of_entries; i++) { stack_map_frame* entry = &(base->info.stack_map_table_attribute.entries[i]); fread(&(entry->frame_type), 1, 1, file); u1 frame_type = entry->frame_type; switch (stack_frame_type_enum(frame_type)) { case SAME: break; case SAME_LOCALS_1_STACK_ITEM_FRAME: fread(&(entry->same_locals_1_stack_item_frame.stack[0].tag), 1, 1, file); if (entry->same_locals_1_stack_item_frame.stack[0].tag == ITEM_Object) { read_u2(&(entry->same_locals_1_stack_item_frame.stack[0].Object_variable_info.cpool_index), 1, file); } else if (entry->same_locals_1_stack_item_frame.stack[0].tag == ITEM_Uninitialized) { read_u2(&(entry->same_locals_1_stack_item_frame.stack[0].Uninitialized_variable_info.offset), 1, file); } break; case SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED: read_u2(&(entry->same_locals_1_stack_item_frame_extended.offset_delta), 1, file); fread(&(entry->same_locals_1_stack_item_frame_extended.stack[0].tag), 1, 1, file); if (entry->same_locals_1_stack_item_frame_extended.stack[0].tag == ITEM_Object) { read_u2(&(entry->same_locals_1_stack_item_frame_extended.stack[0].Object_variable_info.cpool_index), 1, file); } else if (entry->same_locals_1_stack_item_frame_extended.stack[0].tag == ITEM_Uninitialized) { read_u2(&(entry->same_locals_1_stack_item_frame_extended.stack[0].Uninitialized_variable_info.offset), 1, file); } break; case CHOP: read_u2(&(entry->chop_frame.offset_delta), 1, file); break; case SAME_FRAME_EXTENDED: read_u2(&(entry->same_frame_extended.offset_delta), 1, file); break; case APPEND: read_u2(&(entry->append_frame.offset_delta), 1, file); u2 count = frame_type - 251; entry->append_frame.locals = malloc(sizeof(verification_type_info) * count); for (u2 j = 0; j < count; j++) { fread(&(entry->append_frame.locals[j].tag), 1, 1, file); if (entry->append_frame.locals[j].tag == ITEM_Object) { read_u2(&(entry->append_frame.locals[j].Object_variable_info.cpool_index), 1, file); } else if (entry->append_frame.locals[j].tag == ITEM_Uninitialized) { read_u2(&(entry->append_frame.locals[j].Uninitialized_variable_info.offset), 1, file); } } break; case FULL_FRAME: read_u2(&(entry->full_frame.offset_delta), 1, file); read_u2(&(entry->full_frame.number_of_locals), 1, file); entry->full_frame.locals = malloc(sizeof(verification_type_info) * entry->full_frame.number_of_locals); for (u2 j = 0; j < entry->full_frame.number_of_locals; j++) { fread(&(entry->full_frame.locals[j].tag), 1, 1, file); if (entry->full_frame.locals[j].tag == ITEM_Object) { read_u2(&(entry->full_frame.locals[j].Object_variable_info.cpool_index), 1, file); } else if (entry->full_frame.locals[j].tag == ITEM_Uninitialized) { read_u2(&(entry->full_frame.locals[j].Uninitialized_variable_info.offset), 1, file); } } read_u2(&(entry->full_frame.number_of_stack_items), 1, file); entry->full_frame.stack = malloc(sizeof(verification_type_info) * entry->full_frame.number_of_stack_items); for (u2 j = 0; j < entry->full_frame.number_of_stack_items; j++) { fread(&(entry->full_frame.stack[j].tag), 1, 1, file); if (entry->full_frame.stack[j].tag == ITEM_Object) { read_u2(&(entry->full_frame.stack[j].Object_variable_info.cpool_index), 1, file); } else if (entry->full_frame.stack[j].tag == ITEM_Uninitialized) { read_u2(&(entry->full_frame.stack[j].Uninitialized_variable_info.offset), 1, file); } } break; } } break; case BOOTSTRAP_METHODS: { BootstrapMethods_attribute* attr = &base->info.bootstrap_methods_attribute; read_u2(&(attr->num_bootstrap_methods), 1, file); attr->bootstrap_methods = malloc(sizeof(BootstrapMethod) * attr->num_bootstrap_methods); for (u2 i = 0; i < attr->num_bootstrap_methods; i++) { BootstrapMethod* method = &attr->bootstrap_methods[i]; read_u2(&(method->bootstrap_method_ref), 2, file); method->bootstrap_arguments = malloc(sizeof(u2) * method->num_bootstrap_arguments); read_u2(method->bootstrap_arguments, method->num_bootstrap_arguments, file); } break; } case NEST_HOST: read_u2(&(base->info.nest_host_attribute.host_class_index), 1, file); break; case NEST_MEMBERS: read_u2(&(base->info.nest_members_attribute.number_of_classes), 1, file); base->info.nest_members_attribute.classes = malloc(sizeof(u2) * base->info.nest_members_attribute.number_of_classes); read_u2(base->info.nest_members_attribute.classes, base->info.nest_members_attribute.number_of_classes, file); break; case PERMITTED_SUBCLASSES: read_u2(&(base->info.permitted_subclasses_attribute.number_of_classes), 1, file); base->info.permitted_subclasses_attribute.classes = malloc(sizeof(u2) * base->info.permitted_subclasses_attribute.number_of_classes); read_u2(base->info.permitted_subclasses_attribute.classes, base->info.permitted_subclasses_attribute.number_of_classes, file); break; case ATTRIBUTE_TAGS_END: assert(0); } } ClassFile* ClassFile_load(const char* path) { FILE* file = fopen(path, "rb"); if (file == NULL) return NULL; ClassFile* cf = (ClassFile*)malloc(sizeof(ClassFile)); // magic 4bytes, minor_version 2bytes, major_version 2bytes, // constant_pool_count 2bytes read_u4(&(cf->magic), 1, file); read_u2(&(cf->minor_version), 3, file); // constant_pool cf->constant_pool = (cp_info*)malloc(sizeof(cp_info) * (cf->constant_pool_count - 1)); for (u2 i = 0; i < (cf->constant_pool_count - 1); i++) { // cp_info.tag 2bytes fread(&(cf->constant_pool[i].tag), 1, 1, file); size_t size; cp_info* cp = &(cf->constant_pool[i]); switch (cp->tag) { case CONSTANT_Class: read_u2(&(cp->info.class_info.name_index), 1, file); break; case CONSTANT_Fieldref: read_u2(&(cp->info.fieldref_info.class_index), 1, file); read_u2(&(cp->info.fieldref_info.name_and_type_index), 1, file); break; case CONSTANT_Methodref: read_u2(&(cp->info.methodref_info.class_index), 1, file); read_u2(&(cp->info.methodref_info.name_and_type_index), 1, file); break; case CONSTANT_InterfaceMethodref: read_u2(&(cp->info.interface_methodref_info.class_index), 1, file); read_u2(&(cp->info.interface_methodref_info.name_and_type_index), 1, file); break; case CONSTANT_String: read_u2(&(cp->info.string_info.string_index), 1, file); break; case CONSTANT_Integer: read_u4(&(cp->info.integer_info.bytes), 1, file); break; case CONSTANT_Float: read_u4(&(cp->info.float_info.bytes), 1, file); break; case CONSTANT_Long: read_u4(&(cp->info.long_info.high_bytes), 1, file); read_u4(&(cp->info.long_info.low_bytes), 1, file); i++; // Oracle docs: 4.4.5: 'In retrospect, making 8-byte constants take two constant pool entries was a poor choice.' break; case CONSTANT_Double: read_u4(&(cp->info.double_info.high_bytes), 1, file); read_u4(&(cp->info.double_info.low_bytes), 1, file); i++; // Oracle docs: 4.4.5: 'In retrospect, making 8-byte constants take two constant pool entries was a poor choice.' break; case CONSTANT_NameAndType: read_u2(&(cp->info.name_and_type_info.name_index), 1, file); read_u2(&(cp->info.name_and_type_info.descriptor_index), 1, file); break; case CONSTANT_Utf8: read_u2(&(cp->info.utf8_info.length), 1, file); cp->info.utf8_info.bytes = (u1*)malloc(cp->info.utf8_info.length); fread(cp->info.utf8_info.bytes, cp->info.utf8_info.length, 1, file); break; case CONSTANT_MethodHandle: fread(&(cp->info.method_handle_info.reference_kind), 1, 1, file); read_u2(&(cp->info.method_handle_info.reference_index), 1, file); break; case CONSTANT_MethodType: read_u2(&(cp->info.method_type_info.descriptor_index), 1, file); break; case CONSTANT_Dynamic: read_u2(&(cp->info.dynamic_info.bootstrap_method_attr_index), 1, file); read_u2(&(cp->info.dynamic_info.name_and_type_index), 1, file); break; case CONSTANT_InvokeDynamic: read_u2(&(cp->info.invoke_dynamic_info.bootstrap_method_attr_index), 1, file); read_u2(&(cp->info.invoke_dynamic_info.name_and_type_index), 1, file); break; case CONSTANT_Module: read_u2(&(cp->info.module_info.name_index), 1, file); break; case CONSTANT_Package: read_u2(&(cp->info.package_info.name_index), 1, file); break; default: printf("unknown constant tag: %hhu\n", cp->tag); assert(0); } } // access_flags 2bytes, this_class 2bytes, super_class 2bytes, interface_count // 2byte read_u2(&(cf->access_flags), 4, file); // interfaces cf->interfaces = (u2*)malloc(2 * cf->interfaces_count); read_u2(cf->interfaces, cf->interfaces_count, file); // fields_count 2bytes read_u2(&(cf->fields_count), 1, file); // fields cf->fields = (field_info*)malloc(sizeof(field_info) * cf->fields_count); for (u2 i = 0; i < cf->fields_count; i++) { // access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes, // attributes_count 2bytes read_u2(&(cf->fields[i].access_flags), 4, file); cf->fields[i].attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->fields[i].attributes_count); for (u2 j = 0; j < cf->fields[i].attributes_count; j++) { // attribute_name_index 2bytes, attribute_length 4bytes read_u2(&(cf->fields[i].attributes[j].attribute_name_index), 1, file); read_u4(&(cf->fields[i].attributes[j].attribute_length), 1, file); parse_attribute(cf, &(cf->fields[i].attributes[j]), file); } } // methods_count 2bytes read_u2(&(cf->methods_count), 1, file); // methods cf->methods = (method_info*)malloc(sizeof(method_info) * cf->methods_count); for (u2 i = 0; i < cf->methods_count; i++) { // access_flags 2bytes, name_index 2bytes, descriptor_index 2bytes, // attributes_count 2bytes read_u2(&(cf->methods[i].access_flags), 4, file); cf->methods[i].attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->methods[i].attributes_count); for (u2 j = 0; j < cf->methods[i].attributes_count; j++) { // attribute_name_index 2bytes, attribute_length 4bytes read_u2(&(cf->methods[i].attributes[j].attribute_name_index), 1, file); read_u4(&(cf->methods[i].attributes[j].attribute_length), 1, file); parse_attribute(cf, &(cf->methods[i].attributes[j]), file); } } // attributes_count 2bytes read_u2(&(cf->attributes_count), 1, file); // attributes cf->attributes = (attribute_info*)malloc(sizeof(attribute_info) * cf->attributes_count); for (u2 i = 0; i < cf->attributes_count; i++) { // attribute_name_index 2bytes, attribute_length 4bytes read_u2(&(cf->attributes[i].attribute_name_index), 1, file); read_u4(&(cf->attributes[i].attribute_length), 1, file); parse_attribute(cf, &(cf->attributes[i]), file); } fclose(file); return cf; } void ClassFile_info(const ClassFile* cf) { printf( "magic=%X\n" "minor_version=%hu\n" "major_version=%hu\n" "constant_pool_count=%hu\n" "access_flags=%hx\n" "this_class=%hu\n" "super_class=%hu\n" "interfaces_count=%hu\n" "fields_count=%hu\n" "methods_count=%hu\n" "attributes_count=%hu\n", cf->magic, cf->minor_version, cf->major_version, cf->constant_pool_count, cf->access_flags, cf->this_class, cf->super_class, cf->interfaces_count, cf->fields_count, cf->methods_count, cf->attributes_count); // constants printf("Constant pool:\n"); for (u2 i = 0; i < cf->constant_pool_count - 1; i++) { char name[6]; sprintf(name, "#%d", i + 1); printf("%5s = %s\t", name, CONSTANT_TAGS_STRINGS[cf->constant_pool[i].tag]); uint64_t merged; char* buffer; switch (cf->constant_pool[i].tag) { case CONSTANT_Utf8: printf("\t%s\n", cf->constant_pool[i].info.utf8_info.bytes); break; case CONSTANT_Integer: printf("%d\n", cf->constant_pool[i].info.integer_info.bytes); break; case CONSTANT_Float: printf("%f\n", (float)cf->constant_pool[i].info.float_info.bytes); break; case CONSTANT_Long: merged = merge_bytes(cf->constant_pool[i].info.long_info.high_bytes, cf->constant_pool[i].info.long_info.low_bytes); printf("\t%ld\n", *(long*)&merged); i++; break; case CONSTANT_Double: merged = merge_bytes(cf->constant_pool[i].info.double_info.high_bytes, cf->constant_pool[i].info.double_info.low_bytes); printf("\t%lf\n", *(double*)&merged); i++; break; case CONSTANT_Class: printf("\t#%hu\t\t\t// %s\n", cf->constant_pool[i].info.class_info.name_index, cf->constant_pool[cf->constant_pool[i].info.class_info.name_index - 1].info.utf8_info.bytes); break; case CONSTANT_String: printf("\t#%hu\t\t\t// %s\n", cf->constant_pool[i].info.string_info.string_index, cf->constant_pool[cf->constant_pool[i].info.string_info.string_index - 1].info.utf8_info.bytes); break; case CONSTANT_Fieldref: case CONSTANT_Methodref: case CONSTANT_InterfaceMethodref: buffer = ClassFile_resolve_Fieldref_Methodref_InterfaceMethodref(cf, cf->constant_pool[i].info.fieldref_info); printf("#%hu.#%hu\t\t\t// %s", cf->constant_pool[i].info.fieldref_info.class_index, cf->constant_pool[i].info.fieldref_info.name_and_type_index, buffer); free(buffer); break; case CONSTANT_NameAndType: buffer = ClassFile_resolve_NameAnyType(cf, cf->constant_pool[i].info.name_and_type_info); printf("#%hu:#%hu\t\t\t// %s", cf->constant_pool[i].info.name_and_type_info.name_index, cf->constant_pool[i].info.name_and_type_info.descriptor_index, buffer); free(buffer); default: putchar('\n'); } } // class char* this_name = (char*)cf->constant_pool[cf->constant_pool[cf->this_class - 1].info.class_info.name_index - 1].info.utf8_info.bytes; char* super_name = cf->super_class != 0 ? (char*)cf->constant_pool[cf->constant_pool[cf->super_class - 1].info.class_info.name_index - 1].info.utf8_info.bytes : NULL; char* type = (cf->access_flags & ACC_INTERFACE ? "interface" : (cf->access_flags & ACC_ENUM ? "enum" : (cf->access_flags & ACC_MODULE ? "module" : "class"))); printf("%s%s%s%s %s%s%s ", cf->access_flags & ACC_PUBLIC ? "public " : "", cf->access_flags & ACC_FINAL ? "final " : "", cf->access_flags & ACC_ABSTRACT ? "abstract " : "", type, this_name, super_name ? " extends " : "", super_name ? super_name : ""); // interfaces if (cf->interfaces_count > 0) printf("implements "); for (u2 i = 0; i < cf->interfaces_count; i++) { printf("%s ", cf->constant_pool[cf->interfaces[i] - 1].info.utf8_info.bytes); } printf("{\n"); // fields for (u2 i = 0; i < cf->fields_count; i++) { u2 flags = cf->fields[i].access_flags; char* access = flags & ACC_PUBLIC ? "public" : flags & ACC_PRIVATE ? "private" : flags & ACC_PROTECTED ? "protected" : ""; char* descriptor = (char*)cf->constant_pool[cf->fields[i].descriptor_index - 1].info.utf8_info.bytes; char* name = (char*)cf->constant_pool[cf->fields[i].name_index - 1].info.utf8_info.bytes; printf("\t%s %s%s%s%s %s", access, flags & ACC_STATIC ? "static " : "", flags & ACC_FINAL ? "final " : "", flags & ACC_VOLATLE ? "volatile " : "", descriptor, name); // TODO: print field attributes for (u2 j = 0; j < cf->fields[i].attributes_count; j++) { } printf(";\n"); } // methods for (u2 i = 0; i < cf->methods_count; i++) { u2 flags = cf->methods[i].access_flags; char* access = flags & ACC_PUBLIC ? "public" : flags & ACC_PRIVATE ? "private" : flags & ACC_PROTECTED ? "protected" : ""; char* descriptor = (char*)cf->constant_pool[cf->methods[i].descriptor_index - 1].info.utf8_info.bytes; char* name = (char*)cf->constant_pool[cf->methods[i].name_index - 1].info.utf8_info.bytes; printf("\t%s %s%s%s%s %s", access, flags & ACC_STATIC ? "static " : "", flags & ACC_FINAL ? "final " : "", flags & ACC_VOLATLE ? "volatile " : "", descriptor, name); // TODO: print field attributes for (u2 j = 0; j < cf->methods[i].attributes_count; j++) { } printf(";\n"); } } char* ClassFile_resolve_NameAnyType(const ClassFile* cf, CONSTANT_NameAndType_info info) { CONSTANT_Utf8_info name = cf->constant_pool[info.name_index - 1].info.utf8_info; CONSTANT_Utf8_info descriptor = cf->constant_pool[info.descriptor_index - 1].info.utf8_info; char* buffer = (char*)malloc(name.length + descriptor.length + 2); sprintf(buffer, "%s:%s", name.bytes, descriptor.bytes); return buffer; } char* ClassFile_resolve_Fieldref_Methodref_InterfaceMethodref(const ClassFile* cf, CONSTANT_Fieldref_info info) { CONSTANT_Class_info class_info = cf->constant_pool[info.class_index - 1].info.class_info; CONSTANT_NameAndType_info name_and_type_info = cf->constant_pool[info.name_and_type_index - 1].info.name_and_type_info; CONSTANT_Utf8_info class_name = cf->constant_pool[class_info.name_index - 1].info.utf8_info; CONSTANT_Utf8_info name = cf->constant_pool[name_and_type_info.name_index - 1].info.utf8_info; CONSTANT_Utf8_info descriptor = cf->constant_pool[name_and_type_info.descriptor_index - 1].info.utf8_info; char* buffer = (char*)malloc(class_name.length + name.length + descriptor.length + 3); sprintf(buffer, "%s.%s:%s\n", class_name.bytes, name.bytes, descriptor.bytes); return buffer; }