.CC
From REWiki
Contents |
Games
The .CC file format is used by Might and Magic 3, Might and Magic 4 and Might and Magic 5. It contains all required game data (images, sounds, scripts, ...)
Structure
A .CC file has a scrambled header which contains the directory. Files are not accessed by name but by a hash code calculated from the file name.
The actual file data in MM4 and MM5 is XOR'ed by 0x35, presumably to prevent people from reading important pieces of text by simply opening the file in a hex editor. For MM3 it seems that there is either a different XOR value, or none at all.
Graphical representation of the file structure :
Header
The first uint16 in the file, NUMFILES, gives the number of files. Each file entry has 8 bytes, so the total header size is 2 + NUMFILES*8. Here's a code fragment to de-scramble the header, converted directly from the assembly code of one of the game's executable:
void decode_header_buf(unsigned char *buf, unsigned int l)
{
unsigned char al, ah=0xAC;
int i;
for (i = 0; i < l; i++)
{
al = buf[i];
al = (((int)al << 2) | ((int)al >> 6)) & 0xff; // assembly: ROL al,2
al = (al + ah) & 0xff;
buf[i] = al;
ah = (ah + 0x67) & 0xff;
}
}
After this transformation, each of the file-entries looks like this:
Byte 0-1: file hash Byte 2-4: 24-bit file offset, LSB first Byte 5-6: 16-bit file length, LSB first Byte 7 : apparently always zero
The file hash works using this algorithm (yes, I know it's ugly, it's taken directly from assembly and not optimized in any way):
char *buf = "filename.ext";
unsigned char h0, h1, h2;
unsigned int temp;
h0 = h1 = 0;
while (*buf != 0)
{
// swap h0 and h1
h2 = h1; h1 = h0; h0 = h2;
// rotate left 16bit value h0=msb, h1=lsb
h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff); // new h0
h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff); // new h1
h0 = h2;
h1 = h3;
// h2 = toupper(h2)
h2 = (*buf++) & 0x7f;
if (h2 >= 0x60)
h2 = h2 - 0x20;
// add char to h1
temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
temp = (temp + h2) & 0xffff;
// these are the new h0, h1
h0 = (temp >> 8) & 0xff;
h1 = temp & 0xff;
}
After that, h0 and h1 contain the file hash, which is checked against bytes 0 and 1 of each directory entry.
The hashing code in pseudo-C would look something like this:
int16 hash = 0;
while (*buf != 0)
{
swap_bytes(hash); // swap lo- and hi bytes
rotate_left(hash, 1); // rotate left by 1 bit
h2 = toupper(*buf++); // get next char, normalize to upper case
hash += h2; // add to hash value
}
File data
The files are simply stored at the offset given in the table. In MM4 and MM5, each byte of a file is XOR'ed with 0x35 to prevent snooping. For MM3, the XOR value (if any) still needs to be found.
Original assembly code fragments
The assembly code to descramble the header (ES:DI points to the buffer to be converted):
seg021:80E9 mov ah, 0ACh seg021:80EB nop seg021:80EC seg021:80EC @@decode_loop: seg021:80EC mov al, es:[di] seg021:80EF rol al, 1 seg021:80F1 rol al, 1 seg021:80F3 add al, ah seg021:80F5 stosb seg021:80F6 add ah, 67h seg021:80F9 loop @@decode_loop
The assembly code to generate the hash of a file name
seg021:82B8 mov ds, cs:load_file_name_segm seg021:82BD mov si, cs:load_file_name_offs seg021:82C2 sub bp, bp seg021:82C4 sub bx, bx seg021:82C6 sub cx, cx seg021:82C8 mov cl, 4 seg021:82CA mov dx, 7F60h seg021:82CD seg021:82CD @@loop1: seg021:82CD lodsb seg021:82CE and al, dh seg021:82D0 cmp al, dl seg021:82D2 jb @@local1 seg021:82D4 sub al, 20h ; convert to upper case seg021:82D6 seg021:82D6 @@local1: seg021:82D6 cmp al, cs:[bx+7F1Ch] seg021:82DB jz @@local2 seg021:82DD mov bp, 2 seg021:82E0 seg021:82E0 @@local2: seg021:82E0 inc bx seg021:82E1 loop @@loop1 seg021:82E3 sub si, 4 seg021:82E6 sub ax, ax seg021:82E8 sub bx, bx seg021:82EA seg021:82EA @@loop2: ; filename hashing loop seg021:82EA lodsb seg021:82EB or ax, ax seg021:82ED jz @@local4 seg021:82EF and al, dh seg021:82F1 cmp al, dl seg021:82F3 jb @@local3 seg021:82F5 sub al, 20h ; convert to upper case seg021:82F7 seg021:82F7 @@local3: seg021:82F7 xchg bl, bh seg021:82F9 rol bx, 1 seg021:82FB add bx, ax seg021:82FD jmp short @@loop2 seg021:82FF ; --------------------------------------------------------------------------- seg021:82FF seg021:82FF @@local4: ; hash code is in BX
(at seg021:7F1Ch there's the string "MAZE", I don't know what that's used for...)
Extraction program
I've written this small program to extract files from a .CC file. The syntax is :
program infile.cc file.ext
where 'program' is the name of the compiled program, 'infile.cc' is a "CC" file from MM3, MM4 or MM5 and 'file.ext' is the name of an embedded file in the CC file. For example in MM3, you can try text01.maz, scroll.icn...
#include <stdio.h>
#include <stdlib.h>
void decode_buffer(unsigned char *buf, unsigned int l)
{
unsigned char al, ah=0xAC;
unsigned int i;
for (i = 0; i < l; i++)
{
al = buf[i];
al = (((int)al << 2) | ((int)al >> 6)) & 0xff;
al = (al + ah) & 0xff;
buf[i] = al;
ah = (ah + 0x67) & 0xff;
}
}
int main(int argc, char *argv[])
{
unsigned char *buf;
unsigned int buflen,i,j;
unsigned char c1, c2;
unsigned char h0, h1, h2, h3;
unsigned long offset;
unsigned int length;
unsigned int temp;
unsigned char xor_key=0x035;
if (argc < 3)
{
printf("Syntax: %s infile.cc file.ext\n", argv[0]);
exit(-1);
}
FILE *fi = fopen(argv[1], "rb");
h0 = h1 = 0;
buf = (unsigned char*)(argv[2]);
while (*buf != 0)
{
// swap h0 and h1
h2 = h1; h1 = h0; h0 = h2;
// rotate left
h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff);
h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff);
h0 = h2;
h1 = h3;
// calc toUpper(h1)
h2 = (*buf++) & 0x7f;
if (h2 >= 0x60)
h2 = h2 - 0x20;
// add char to h1
temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
temp = (temp + h2) & 0xffff;
h0 = (temp >> 8) & 0xff;
h1 = temp & 0xff;
}
c1 = fgetc(fi);
c2 = fgetc(fi);
buflen = (unsigned int)c1 | ((unsigned int)c2 << 8);
printf("%s contains %d files.\n", argv[1], buflen);
buf = (unsigned char *)malloc(8 * buflen);
printf("Hash for '%s' is 0x%02x%02x.\n", argv[2], h0, h1);
fread(buf, 8, buflen, fi);
decode_buffer(buf, 8*buflen);
for (i = 0; i < 8*buflen; i+=8)
{
if (((buf[i] & 0xff) == h1) && ((buf[i+1] & 0xff) == h0)) break;
}
if (i == 8*buflen)
{
printf("File not found.\n");
exit(-1);
}
i += 2;
offset = (unsigned long)buf[i++] & 0xff;
offset |= (((unsigned long)buf[i++]) << 8) & 0xff00;
offset |= (((unsigned long)buf[i++]) << 16) & 0xff0000;
length = (unsigned long)buf[i++] & 0xff;
length |= (((unsigned long)buf[i++]) << 8) & 0xff00;
printf("Entry #%d, offset=%06x, length=%04x\n", i, offset, length);
FILE *fo = fopen(argv[2], "wb");
fseek(fi, offset, SEEK_SET);
for (i = 0; i < length; i++)
{
fputc((fgetc(fi) ^ xor_key) & 0xff, fo);
}
fclose(fi);
fclose(fo);
free(buf);
return 0;
}
Listing files
Based on the same code and functions, here are some variants for various needs...
Here is a code for 'decrypting' all header information. Call it with either only the .cc file as argument, or with an optional file which contains a list of filenames. If you supply that list (for example taken from here), the program will print out the filenames it has identified.
#include <stdio.h>
#include <stdlib.h>
void decode_buffer(unsigned char *buf, unsigned int l)
{
unsigned char al, ah=0xAC;
unsigned int i;
for (i = 0; i < l; i++)
{
al = buf[i];
al = (((int)al << 2) | ((int)al >> 6)) & 0xff;
al = (al + ah) & 0xff;
buf[i] = al;
ah = (ah + 0x67) & 0xff;
}
}
struct mm_file {
char name[20];
unsigned short hash;
} filenames[1024];
unsigned int numNames = 0;
unsigned short dohash(char *buf)
{
unsigned char h0, h1, h2, h3;
unsigned int temp;
h0 = h1 = 0;
while (*buf != 0) {
// swap h0 and h1
h2 = h1; h1 = h0; h0 = h2;
// rotate left
h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff);
h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff);
h0 = h2;
h1 = h3;
// calc toUpper(h1)
h2 = (*buf++) & 0x7f;
if (h2 >= 0x60)
h2 = h2 - 0x20;
// add char to h1
temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
temp = (temp + h2) & 0xffff;
h0 = (temp >> 8) & 0xff;
h1 = temp & 0xff;
}
return h0 * 256 + h1;
}
int main(int argc, char *argv[])
{
unsigned char *buf;
unsigned int buflen,i,j,k;
unsigned char c1, c2;
unsigned char h0, h1, h2, h3;
unsigned long offset;
unsigned int length;
unsigned int hash;
unsigned int temp;
unsigned char xor_key=0x035;
char fnbuffer[30];
if (argc < 2) {
printf("Syntax: %s file.cc [filenames.txt]\n", argv[0]);
exit(-1);
}
FILE *fi = fopen(argv[1], "rb");
if (argc > 2)
{
FILE *fnames = fopen(argv[2], "rb");
while (!feof(fnames))
{
fgets(fnbuffer, 20, fnames);
sscanf(fnbuffer, "%s", filenames[numNames].name);
filenames[numNames].hash = dohash(filenames[numNames].name);
numNames++;
}
fclose(fnames);
}
c1 = fgetc(fi);
c2 = fgetc(fi);
buflen = (unsigned int)c1 | ((unsigned int)c2 << 8);
printf("%s contains %d files.\n", argv[1], buflen);
buf = (unsigned char *)malloc(8 * buflen);
fread(buf, 8, buflen, fi);
decode_buffer(buf, 8*buflen);
j = 0;
for (i = 0; i < buflen; i++) {
hash = (unsigned long)buf[(i*8)] & 0xff;
hash |= (((unsigned long)buf[(i*8)+1]) << 8) & 0xff00;
offset = (unsigned long)buf[(i*8)+2] & 0xff;
offset |= (((unsigned long)buf[(i*8)+3]) << 8) & 0xff00;
offset |= (((unsigned long)buf[(i*8)+4]) << 16) & 0xff0000;
length = (unsigned long)buf[(i*8)+5] & 0xff;
length |= (((unsigned long)buf[(i*8)+6]) << 8) & 0xff00;
printf("Entry #%d, hash=%04x offset=%06x, length=%04x", j, hash, offset, length);
for (k = 0; k < numNames; k++)
{
if (filenames[k].hash == hash)
{
printf(", %s", filenames[k].name);
break;
}
}
printf("\n");
j++;
}
fclose(fi);
free(buf);
return 0;
}
And this is a code for just hashing a file name (so you can try to find the hash in header) :
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
char *buf;
unsigned int buflen,i,j;
unsigned char c1, c2;
unsigned char h0, h1, h2, h3;
unsigned long offset;
unsigned int length;
unsigned int hash;
unsigned int temp;
unsigned char xor=0x35;
if (argc < 2) {
printf("Syntax: %s file.ext %c\n", argv[0]);
exit(-1);
}
h0 = h1 = 0;
buf = argv[1];
//xor = argv[2][0];
while (*buf != 0) {
// swap h0 and h1
h2 = h1; h1 = h0; h0 = h2;
// rotate left
h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff);
h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff);
h0 = h2;
h1 = h3;
// calc toUpper(h1)
h2 = (*buf++) & 0x7f;
if (h2 >= 0x60)
h2 = h2 - 0x20;
// add char to h1
temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
temp = (temp + h2) & 0xffff;
h0 = (temp >> 8) & 0xff;
h1 = temp & 0xff;
}
printf("Hash for '%s' is 0x%02x%02x.\n", argv[1], h0, h1);
return 0;
}
File analysis
For the moment MM3.CC is briefly analysed in MM3.CC and DARK.CC in DARK.CC
