We just moved to a different server. Please be patient until all files and pages are restored and the MediaWiki software has been updated. Thank you

.CC

From REWiki
Jump to: navigation, search

Contents

Games

The .CC file format is used by Might and Magic 3, Might and Magic 4 and Might and Magic 5. It contains all required game data (images, sounds, scripts, ...)

Structure

A .CC file has a scrambled header which contains the directory. Files are not accessed by name but by a hash code calculated from the file name. The actual file data in MM4 and MM5 is XOR'ed by 0x35, presumably to prevent people from reading important pieces of text by simply opening the file in a hex editor. For MM3 it seems that there is either a different XOR value, or none at all. Graphical representation of the file structure : CCstructure.PNG

Header

The first uint16 in the file, NUMFILES, gives the number of files. Each file entry has 8 bytes, so the total header size is 2 + NUMFILES*8. Here's a code fragment to de-scramble the header, converted directly from the assembly code of one of the game's executable:

void decode_header_buf(unsigned char *buf, unsigned int l)
{
  unsigned char al, ah=0xAC;
  int i;
  
  for (i = 0; i < l; i++)
  {
    al = buf[i];
    al = (((int)al << 2) | ((int)al >> 6)) & 0xff; // assembly: ROL al,2
    al = (al + ah) & 0xff;
    buf[i] = al;
    ah = (ah + 0x67) & 0xff;
  } 
}

After this transformation, each of the file-entries looks like this:

Byte 0-1: file hash
Byte 2-4: 24-bit file offset, LSB first
Byte 5-6: 16-bit file length, LSB first
Byte 7  : apparently always zero

The file hash works using this algorithm (yes, I know it's ugly, it's taken directly from assembly and not optimized in any way):

  char *buf = "filename.ext";
  unsigned char h0, h1, h2;
  unsigned int temp;

  h0 = h1 = 0;
  while (*buf != 0)
  {
    // swap h0 and h1
    h2 = h1; h1 = h0; h0 = h2; 
    // rotate left 16bit value h0=msb, h1=lsb
    h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff); // new h0
    h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff); // new h1
    h0 = h2;
    h1 = h3;
    // h2 = toupper(h2)
    h2 = (*buf++) & 0x7f;
    if (h2 >= 0x60)
      h2 = h2 - 0x20;
    // add char to h1
    temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
    temp = (temp + h2) & 0xffff;
    // these are the new h0, h1
    h0 = (temp >> 8) & 0xff;
    h1 = temp & 0xff; 
  }

After that, h0 and h1 contain the file hash, which is checked against bytes 0 and 1 of each directory entry. The hashing code in pseudo-C would look something like this:

int16 hash = 0;
while (*buf != 0)
{
  swap_bytes(hash);     // swap lo- and hi bytes
  rotate_left(hash, 1); // rotate left by 1 bit
  h2 = toupper(*buf++); // get next char, normalize to upper case
  hash += h2;           // add to hash value
}

File data

The files are simply stored at the offset given in the table. In MM4 and MM5, each byte of a file is XOR'ed with 0x35 to prevent snooping. For MM3, the XOR value (if any) still needs to be found.

Original assembly code fragments

The assembly code to descramble the header (ES:DI points to the buffer to be converted):

seg021:80E9                   mov   ah, 0ACh
seg021:80EB                   nop
seg021:80EC 
seg021:80EC @@decode_loop:
seg021:80EC                   mov   al, es:[di]
seg021:80EF                   rol   al, 1
seg021:80F1                   rol   al, 1
seg021:80F3                   add   al, ah
seg021:80F5                   stosb
seg021:80F6                   add   ah, 67h
seg021:80F9                   loop  @@decode_loop

The assembly code to generate the hash of a file name

seg021:82B8                   mov   ds, cs:load_file_name_segm
seg021:82BD                   mov   si, cs:load_file_name_offs
seg021:82C2                   sub   bp, bp
seg021:82C4                   sub   bx, bx
seg021:82C6                   sub   cx, cx
seg021:82C8                   mov   cl, 4
seg021:82CA                   mov   dx, 7F60h
seg021:82CD 
seg021:82CD @@loop1:
seg021:82CD                   lodsb
seg021:82CE                   and   al, dh
seg021:82D0                   cmp   al, dl
seg021:82D2                   jb    @@local1
seg021:82D4                   sub   al, 20h  ; convert to upper case
seg021:82D6 
seg021:82D6 @@local1:
seg021:82D6                   cmp   al, cs:[bx+7F1Ch]
seg021:82DB                   jz    @@local2
seg021:82DD                   mov   bp, 2
seg021:82E0 
seg021:82E0 @@local2:
seg021:82E0                   inc   bx
seg021:82E1                   loop  @@loop1
seg021:82E3                   sub   si, 4
seg021:82E6                   sub   ax, ax
seg021:82E8                   sub   bx, bx
seg021:82EA 
seg021:82EA @@loop2:    ; filename hashing loop
seg021:82EA                   lodsb
seg021:82EB                   or    ax, ax
seg021:82ED                   jz    @@local4
seg021:82EF                   and   al, dh
seg021:82F1                   cmp   al, dl
seg021:82F3                   jb    @@local3
seg021:82F5                   sub   al, 20h  ; convert to upper case
seg021:82F7 
seg021:82F7 @@local3:
seg021:82F7                   xchg  bl, bh
seg021:82F9                   rol   bx, 1
seg021:82FB                   add   bx, ax
seg021:82FD                   jmp   short @@loop2
seg021:82FF ; ---------------------------------------------------------------------------
seg021:82FF 
seg021:82FF @@local4:         ; hash code is in BX

(at seg021:7F1Ch there's the string "MAZE", I don't know what that's used for...)

Extraction program

I've written this small program to extract files from a .CC file. The syntax is :
program infile.cc file.ext
where 'program' is the name of the compiled program, 'infile.cc' is a "CC" file from MM3, MM4 or MM5 and 'file.ext' is the name of an embedded file in the CC file. For example in MM3, you can try text01.maz, scroll.icn...

#include <stdio.h>
#include <stdlib.h>

void decode_buffer(unsigned char *buf, unsigned int l)
{
  unsigned char al, ah=0xAC;
  unsigned int i;
  
  for (i = 0; i < l; i++)
  {
    al = buf[i];
    al = (((int)al << 2) | ((int)al >> 6)) & 0xff;
    al = (al + ah) & 0xff;
    buf[i] = al;
    ah = (ah + 0x67) & 0xff;
  } 
}

int main(int argc, char *argv[])
{
  unsigned char *buf;
  unsigned int buflen,i,j;
  unsigned char c1, c2;
  unsigned char h0, h1, h2, h3;
  unsigned long offset;
  unsigned int length;
  unsigned int temp;
  unsigned char xor_key=0x035;

  if (argc < 3)
  {
    printf("Syntax: %s infile.cc file.ext\n", argv[0]);
    exit(-1);
  }
  FILE *fi = fopen(argv[1], "rb");

  h0 = h1 = 0;
  buf = (unsigned char*)(argv[2]);
  while (*buf != 0)
  {
    // swap h0 and h1
    h2 = h1; h1 = h0; h0 = h2; 
    // rotate left
    h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff);
    h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff);
    h0 = h2;
    h1 = h3;
    // calc toUpper(h1)
    h2 = (*buf++) & 0x7f;
    if (h2 >= 0x60)
      h2 = h2 - 0x20;
    // add char to h1
    temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
    temp = (temp + h2) & 0xffff;
    h0 = (temp >> 8) & 0xff;
    h1 = temp & 0xff; 
  }

  c1 = fgetc(fi);
  c2 = fgetc(fi);
  buflen = (unsigned int)c1 | ((unsigned int)c2 << 8);
  printf("%s contains %d files.\n", argv[1], buflen); 
  buf = (unsigned char *)malloc(8 * buflen);
  printf("Hash for '%s' is 0x%02x%02x.\n", argv[2], h0, h1);
  fread(buf, 8, buflen, fi);
  decode_buffer(buf, 8*buflen);
  for (i = 0; i < 8*buflen; i+=8)
  {
    if (((buf[i] & 0xff) == h1) && ((buf[i+1] & 0xff) == h0)) break;
  }
  if (i == 8*buflen)
  {
    printf("File not found.\n");
    exit(-1);
  }
  i += 2;
  offset = (unsigned long)buf[i++] & 0xff;
  offset |= (((unsigned long)buf[i++]) << 8) & 0xff00;
  offset |= (((unsigned long)buf[i++]) << 16) & 0xff0000; 
  length = (unsigned long)buf[i++] & 0xff;
  length |= (((unsigned long)buf[i++]) << 8) & 0xff00;
  printf("Entry #%d, offset=%06x, length=%04x\n", i, offset, length); 

  FILE *fo = fopen(argv[2], "wb");
  fseek(fi, offset, SEEK_SET);
  for (i = 0; i < length; i++)
  {
    fputc((fgetc(fi) ^ xor_key) & 0xff, fo); 
  }
  fclose(fi);
  fclose(fo);
  free(buf);
  return 0;
}

Listing files

Based on the same code and functions, here are some variants for various needs...
Here is a code for 'decrypting' all header information. Call it with either only the .cc file as argument, or with an optional file which contains a list of filenames. If you supply that list (for example taken from here), the program will print out the filenames it has identified.

#include <stdio.h>
#include <stdlib.h>

void decode_buffer(unsigned char *buf, unsigned int l)
{
  unsigned char al, ah=0xAC;
  unsigned int i;
  
  for (i = 0; i < l; i++)
  {
    al = buf[i];
    al = (((int)al << 2) | ((int)al >> 6)) & 0xff;
    al = (al + ah) & 0xff;
    buf[i] = al;
    ah = (ah + 0x67) & 0xff;
  } 
}

struct mm_file {
  char name[20];
  unsigned short hash;
} filenames[1024];

unsigned int numNames = 0;

unsigned short dohash(char *buf)
{
  unsigned char h0, h1, h2, h3;
  unsigned int temp;

  h0 = h1 = 0;

  while (*buf != 0) {
    // swap h0 and h1
    h2 = h1; h1 = h0; h0 = h2; 
    // rotate left
    h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff);
    h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff);
    h0 = h2;
    h1 = h3;
    // calc toUpper(h1)
    h2 = (*buf++) & 0x7f;
    if (h2 >= 0x60)
    h2 = h2 - 0x20;
    // add char to h1
    temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
    temp = (temp + h2) & 0xffff;
    h0 = (temp >> 8) & 0xff;
    h1 = temp & 0xff; 
  }
  return h0 * 256 + h1;
}  

int main(int argc, char *argv[])
{
	unsigned char *buf;
	unsigned int buflen,i,j,k;
	unsigned char c1, c2;
	unsigned char h0, h1, h2, h3;
	unsigned long offset;
	unsigned int length;
	unsigned int hash;
	unsigned int temp;
	unsigned char xor_key=0x035;
	char fnbuffer[30];

	if (argc < 2) {
		printf("Syntax: %s file.cc [filenames.txt]\n", argv[0]);
		exit(-1);
	}
	FILE *fi = fopen(argv[1], "rb");

	if (argc > 2)
	{
	  FILE *fnames = fopen(argv[2], "rb");
	  while (!feof(fnames))
	  {
	    fgets(fnbuffer, 20, fnames);
	    sscanf(fnbuffer, "%s", filenames[numNames].name);
	    filenames[numNames].hash = dohash(filenames[numNames].name);
	    numNames++;
	  }
	  fclose(fnames);
	}

	c1 = fgetc(fi);
	c2 = fgetc(fi);
	buflen = (unsigned int)c1 | ((unsigned int)c2 << 8);
	printf("%s contains %d files.\n", argv[1], buflen); 

	buf = (unsigned char *)malloc(8 * buflen);

	fread(buf, 8, buflen, fi);
	decode_buffer(buf, 8*buflen);
	j = 0;
	for (i = 0; i < buflen; i++) {
		hash = (unsigned long)buf[(i*8)] & 0xff;
		hash |= (((unsigned long)buf[(i*8)+1]) << 8) & 0xff00;
		offset = (unsigned long)buf[(i*8)+2] & 0xff;
		offset |= (((unsigned long)buf[(i*8)+3]) << 8) & 0xff00;
		offset |= (((unsigned long)buf[(i*8)+4]) << 16) & 0xff0000; 
		length = (unsigned long)buf[(i*8)+5] & 0xff;
		length |= (((unsigned long)buf[(i*8)+6]) << 8) & 0xff00;
		printf("Entry #%d, hash=%04x offset=%06x, length=%04x", j, hash, offset, length); 
		for (k = 0; k < numNames; k++)
		{
		  if (filenames[k].hash == hash)
		  {
		    printf(", %s", filenames[k].name);
		    break;
		  }
		}
		printf("\n");
		j++;
	}

	fclose(fi);
	free(buf);
	return 0;
}

And this is a code for just hashing a file name (so you can try to find the hash in header) :

#include <stdio.h>
#include <stdlib.h>


int main(int argc, char *argv[])
{
	char *buf;
	unsigned int buflen,i,j;
	unsigned char c1, c2;
	unsigned char h0, h1, h2, h3;
	unsigned long offset;
	unsigned int length;
	unsigned int hash;
	unsigned int temp;
	unsigned char xor=0x35;

	if (argc < 2) {
		printf("Syntax: %s file.ext %c\n", argv[0]);
		exit(-1);
	}
		
	h0 = h1 = 0;
	buf = argv[1];

	//xor = argv[2][0];
	while (*buf != 0) {
		// swap h0 and h1
		h2 = h1; h1 = h0; h0 = h2; 
		// rotate left
		h2 = (((unsigned int)h0 << 1) & 0xff) | (((unsigned int)h1 >> 7) & 0xff);
		h3 = (((unsigned int)h1 << 1) & 0xff) | (((unsigned int)h0 >> 7) & 0xff);
		h0 = h2;
		h1 = h3;
		// calc toUpper(h1)
		h2 = (*buf++) & 0x7f;
		if (h2 >= 0x60)
		h2 = h2 - 0x20;
		// add char to h1
		temp = ((unsigned int)h0 << 8) | (unsigned int)h1;
		temp = (temp + h2) & 0xffff;
		h0 = (temp >> 8) & 0xff;
		h1 = temp & 0xff; 
	}
	printf("Hash for '%s' is 0x%02x%02x.\n", argv[1], h0, h1);

	return 0;
}

File analysis

For the moment MM3.CC is briefly analysed in MM3.CC and DARK.CC in DARK.CC

Personal tools