// ================================================================================================
//
// This simple client application demonstrates how to simply interface with the LumenVox TTS Server
// to synthesize some specified text into a specified wav file.
//
// Both C and C++ interfaces are implemented here (so that you can compare the differences).
// Uncomment the #define USE_CPP_API line if you want to use the C++ interface, otherwise the
// C interface will be used.
//
// Modified: Randal Rand
//           randal@lumenvox.com
//           858-707-0707, say "randal"
// Date 06.27.2011
// Modification notes: Made the application more flexible to handle command line input from the
//                     user.  Also, added some error checking after TTS API method calls.
//
// ================================================================================================



#include <string>
#include <stdio.h>
#include <string.h>
#include <LV_SRE.h>
#include <LV_TTS.h>

#ifndef WIN32
#define stricmp strcasecmp
#endif


// Uncomment the next line to use the C++ implementation (Commented = C style)
#define USE_CPP_API
#ifdef USE_CPP_API
#include <LVTTSClient.h>
#endif




void PrintUsage(const char *prog_name)
{
	printf("Usage: %s <OPTION> [LANGUAGE] [GENDER] [VOICE] <output_audio_file>\n",  prog_name);
	printf("    where <OPTION> is one of:\n");
	printf("      -t <text-to-speak>\n");
	printf("      -s <SSML-file-path>\n");
	printf("    where [LANGUAGE] is the language that the voice speaks\n");
	printf("      -l en-US\n");
	printf("    where [GENDER] is that of the voice\n");
	printf("      -g male\n");
	printf("    where [VOICE] is one of the voices you've installed.\n");
	printf("      -v Paul\n");
	printf("    where <output_audio_file> is the name to give the file where the audio is to be saved.\n");
	printf("      -o some_audio_file_name.wav\n");
}





// For writing audio to wav file
int InitializeWavFile(FILE *fd, int sample_rate);

int AppendSamplesToWaveFile(FILE *fd, short *samples, int num_samples);


int main(int argc, char *argv[])
{
	if(argc < 5)
	{
		PrintUsage(argv[0]);

		return 1;
	}

	// only using one variable to hold the text to be spoken
	std::string text_to_speak = "";

	const char *text_input_option = "";
	const char *SSML_file_path = "";
	const char *language_to_use = NULL;
	const char *gender_to_use = NULL;
	const char *name_of_voice_to_use = NULL;
	const char *output_audio_file = "";


	//=========================================================================
	// Process the command line parameters
	//=========================================================================
	for(int cmd_ln_cnt = 0; cmd_ln_cnt < argc; cmd_ln_cnt++)
	{
		if(((  stricmp(argv[cmd_ln_cnt], "-h") == 0
			|| stricmp(argv[cmd_ln_cnt], "/h") == 0) && strlen(argv[cmd_ln_cnt]) == 2)
			|| stricmp(argv[cmd_ln_cnt], "-help") == 0
			|| stricmp(argv[cmd_ln_cnt], "/help") == 0)
		{
			PrintUsage(argv[0]);
			return 1;
		}
		else if(((  stricmp(argv[cmd_ln_cnt], "-t") == 0
			|| stricmp(argv[cmd_ln_cnt], "/t") == 0) && strlen(argv[cmd_ln_cnt]) == 2)
			|| stricmp(argv[cmd_ln_cnt], "-t") == 0
			|| stricmp(argv[cmd_ln_cnt], "/t") == 0)
		{
			if(argv[cmd_ln_cnt+1]!=NULL)
			{
				text_to_speak = argv[cmd_ln_cnt+1];
				text_input_option = "t";
				cmd_ln_cnt++;
			}
		}
		else if(((  stricmp(argv[cmd_ln_cnt], "-s") == 0
			|| stricmp(argv[cmd_ln_cnt], "/s") == 0) && strlen(argv[cmd_ln_cnt]) == 2)
			|| stricmp(argv[cmd_ln_cnt], "-s") == 0
			|| stricmp(argv[cmd_ln_cnt], "/s") == 0)
		{
			if(argv[cmd_ln_cnt+1]!=NULL)
			{
				SSML_file_path = argv[cmd_ln_cnt+1];
				text_input_option = "s";
				cmd_ln_cnt++;

			}
		}
		else if(((  stricmp(argv[cmd_ln_cnt], "-l") == 0
			|| stricmp(argv[cmd_ln_cnt], "/l") == 0) && strlen(argv[cmd_ln_cnt]) == 2)
			|| stricmp(argv[cmd_ln_cnt], "-l") == 0
			|| stricmp(argv[cmd_ln_cnt], "/l") == 0)
		{
			if(argv[cmd_ln_cnt+1]!=NULL)
			{
				language_to_use = argv[cmd_ln_cnt+1];
				cmd_ln_cnt++;

			}
		}
		else if(((  stricmp(argv[cmd_ln_cnt], "-g") == 0
			|| stricmp(argv[cmd_ln_cnt], "/g") == 0) && strlen(argv[cmd_ln_cnt]) == 2)
			|| stricmp(argv[cmd_ln_cnt], "-g") == 0
			|| stricmp(argv[cmd_ln_cnt], "/g") == 0)
		{
			if(argv[cmd_ln_cnt+1]!=NULL)
			{
				gender_to_use = argv[cmd_ln_cnt+1];
				cmd_ln_cnt++;

			}
		}
		else if(((  stricmp(argv[cmd_ln_cnt], "-v") == 0
			|| stricmp(argv[cmd_ln_cnt], "/v") == 0) && strlen(argv[cmd_ln_cnt]) == 2)
			|| stricmp(argv[cmd_ln_cnt], "-v") == 0
			|| stricmp(argv[cmd_ln_cnt], "/v") == 0)
		{
			if(argv[cmd_ln_cnt+1]!=NULL)
			{
				name_of_voice_to_use = argv[cmd_ln_cnt+1];
				cmd_ln_cnt++;

			}
		}
		else if(((  stricmp(argv[cmd_ln_cnt], "-o") == 0
			|| stricmp(argv[cmd_ln_cnt], "/o") == 0) && strlen(argv[cmd_ln_cnt]) == 2)
			|| stricmp(argv[cmd_ln_cnt], "-o") == 0
			|| stricmp(argv[cmd_ln_cnt], "/o") == 0)
		{
			if(argv[cmd_ln_cnt+1]!=NULL)
			{
				output_audio_file = argv[cmd_ln_cnt+1];
				cmd_ln_cnt++;

			}
		}
	}

	if(text_input_option[0] == 's')
	{
		// Read the SSML file and store the contents in 'text_to_speak' variable
		FILE *f = fopen(SSML_file_path, "rb");

		if(f == NULL)
		{
			fprintf(stderr, "Cannot open SSML file: %s\n", SSML_file_path);
			return 2;
		}

		fseek(f, 0, SEEK_END);
		int fileSize = ftell(f);
		fseek(f, 0, SEEK_SET);
		char *fileBuf = new char[fileSize+1];
		int actualSize = (int)fread(fileBuf, 1,fileSize, f);
		fclose(f);

		fileBuf[actualSize] = '\0';
		text_to_speak = fileBuf;
		delete [] fileBuf;
	}

	// Specify the format of the audio file saved to disk (which may be different than the audio generated by the TTS synthesis)
	SYNTHESIS_SOUND_FORMAT output_audio_format = SFMT_ULAW;

	// Specify the format of the audio being requested from the TTS server
	int SynthesizedSoundFormat = SFMT_PCM;

	// This buffer will hold the synthesized audio
	unsigned char* synthesized_audio_buffer;
	
	// Length of bytes of synthesized output from the TTS server
	int TotalSynthesizedAudioBytes = 0;

	// Select 8 KHz sample rate for TTS audio generation
	int audioSamplingRate = 8000; // Hz

	// Return value from calling TTS requests below
	LV_TTS_RETURN_CODE retval = 0;

	printf("Synthesizing input text...\n%s\n", text_to_speak.c_str());

	// Initialize the ttsClient interface
#ifdef USE_CPP_API
	printf("CPP API Version\n");

	// LVTTSClient is the main object used to connect to the TTS server
	LVTTSClient ttsClient;

	// Initialize the TTS client object with the default or user-specified parameters
	retval = ttsClient.Initialize(language_to_use, gender_to_use, name_of_voice_to_use, audioSamplingRate);

	if(retval != LV_SUCCESS)
	{
		printf("Creating a TTS Client failed with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
		return -10;
	}

	// Tell the TTS client object what sound format to use
	ttsClient.SetPropertyEx(PROP_EX_SYNTH_SOUND_FORMAT, PROP_EX_VALUE_TYPE_INT_PTR, &SynthesizedSoundFormat);

	// Perform text-to-speech operation
	retval = ttsClient.Synthesize(text_to_speak.c_str(), LV_TTS_BLOCK);

	if(retval != LV_SUCCESS)
	{
		printf("Call to Synthesize failed with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
		return -11;
	}

	ttsClient.GetSynthesizedAudioBufferLength(&TotalSynthesizedAudioBytes);
#else

	// ttsClient is the handle used to associate with this specific TTS client connection (NULL means error)

	// Initialize the TTS client object with the default or user-specified parameters
	HTTSCLIENT ttsClient = LV_TTS_CreateClient(language_to_use, gender_to_use, name_of_voice_to_use, audioSamplingRate, &retval);

	if(ttsClient == NULL)
	{
		printf("Creating a TTS Client failed with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
		return -10;
	}

	retval = LV_TTS_SetPropertyEx(ttsClient, PROP_EX_SYNTH_SOUND_FORMAT, PROP_EX_VALUE_TYPE_INT_PTR, &SynthesizedSoundFormat, PROP_EX_TARGET_PORT);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to set ttsClient property for sound format with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}

	// Perform text-to-speech operation
	retval = LV_TTS_Synthesize(ttsClient, text_to_speak.c_str(), LV_TTS_BLOCK);

	if(retval != LV_SUCCESS)
	{
		printf("Call to Synthesize failed with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
		return -11;
	}

	retval = LV_TTS_GetSynthesizedAudioBufferLength(ttsClient, &TotalSynthesizedAudioBytes);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_GetSynthesizedAudioBufferLength with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}
#endif

	// Allocate required space for the audio buffer, to be populated by the TTS synthesis
	synthesized_audio_buffer = new unsigned char[TotalSynthesizedAudioBytes + 1];


	// Display the obtained results
#ifdef USE_CPP_API
	retval = ttsClient.GetSynthesizedAudioFormat((int *)&output_audio_format);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_GetSynthesizedAudioFormat with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}

	// Get the number of SSML Speech-Markers in the generated audio (use GetSSMLMarkOffsetInBuffer/GetSSMLMarkName for additional information)
	int NumSSMLSpeechMarkers = 0;
	retval = ttsClient.GetSSMLMarksCount(&NumSSMLSpeechMarkers);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_GetSSMLMarksCount with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}

	printf("Number of SSML markers : %d\n", NumSSMLSpeechMarkers);

	// Rather than getting the entire audio in one chunk as shown here, your application might prefer
	// to take several smaller chunks, for example if streaming out somewhere. This can be done by
	// specifying a smaller second parameter to GetSynthesizedAudioBuffer. The implementation was
	// designed to be similar to an fread type of operation...

	int bytes_returned = 0;

	// Copy all of the synthesized audio from the TTS server into synthesized_audio_buffer. This was generated by the earlier
	// call to Synthesize(). You don't need to read all of this data in one chunk as we're doing here. If you prefer, you
	// can get a little at a time, and keep calling this routine to get the next chunk - if you want to do this, specify the
	// number of bytes you want in each chunk using the second parameter. The implementation was
	// designed to be similar to an fread type of operation
	retval = ttsClient.GetSynthesizedAudioBuffer(synthesized_audio_buffer, TotalSynthesizedAudioBytes, &bytes_returned);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_GetSynthesizedAudioBuffer with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}

#else
	retval = LV_TTS_GetSynthesizedAudioFormat(ttsClient, (int *)&output_audio_format);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_GetSynthesizedAudioFormat with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}

	// Get the number of SSML Speech-Markers in the generated audio (use GetSSMLMarkOffsetInBuffer/GetSSMLMarkName for additional information)
	int NumSSMLSpeechMarkers = 0;
	retval = LV_TTS_GetSSMLMarksCount(ttsClient, &NumSSMLSpeechMarkers);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_GetSSMLMarksCount with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}

	printf("Number of SSML markers : %d\n", NumSSMLSpeechMarkers);

	// Rather than getting the entire audio in one chunk as shown here, your application might prefer
	// to take several smaller chunks, for example if streaming out somewhere. This can be done by
	// specifying a smaller second parameter to GetSynthesizedAudioBuffer. The implementation was
	// designed to be similar to an fread type of operation...

	int bytes_returned = 0;
	retval = LV_TTS_GetSynthesizedAudioBuffer(ttsClient, synthesized_audio_buffer, TotalSynthesizedAudioBytes, &bytes_returned);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_GetSynthesizedAudioBuffer with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}

#endif



	if(bytes_returned == TotalSynthesizedAudioBytes)
	{
		if(strlen(output_audio_file) > 0)
		{
			// The user specified an output filename

			FILE *outFile = fopen(output_audio_file, "w+b");

			if(outFile != NULL)
			{
				switch(output_audio_format)
				{
				case SFMT_ULAW:
				case SFMT_ALAW:
					fwrite(synthesized_audio_buffer, sizeof(char), TotalSynthesizedAudioBytes / sizeof(char), outFile);
					break;

				case SFMT_PCM:
					InitializeWavFile(outFile, audioSamplingRate);
					AppendSamplesToWaveFile(outFile, (short*)(synthesized_audio_buffer), TotalSynthesizedAudioBytes / sizeof(short));
					break;

				default:
					fprintf(stderr, "Unknown audio format returned\n");
					break;
				}

				fseek(outFile, 0, SEEK_END);
				printf("Synthesized Audio file saved (%d bytes)\n", ftell(outFile));

				fclose(outFile);

			}
			else
			{
				fprintf(stderr, "Could not write synthesized audio to %s\n", output_audio_file);
			}
		}
		else
		{
			// The user did not specify an output filename - just report the number of bytes generated

			printf("Synthesized Audio file generated without an output file (%d bytes)\n", bytes_returned);
		}
	}
	else
	{
		fprintf(stderr, "An error occurred, unexpected audio buffer size\n");
	}

	delete [] synthesized_audio_buffer;


	// Destroy the client
#ifdef USE_CPP_API
	// Destructor will automatically be called in C++ when ttsClient goes out of scope, so don't do anything here
#else
	retval = LV_TTS_DestroyClient(ttsClient);

	if(retval != LV_SUCCESS)
	{
		printf("Failed to LV_TTS_DestroyClient with error code %d (%s)\n", retval, LV_SRE_ReturnErrorString(retval));
	}
#endif

	// flush out any pending background operations like writing callsre files
	// LV_SRE_Shutdown() is to be called once per load-unload of the speechport dll/so
	LV_SRE_Shutdown();

	return 0;
}



// number of PCM channels
#define NUM_CHANNELS		1

// RIFF PCM sample format
#define RIFF_FORMAT_PCM		0x001


// This routine creates a simple WAV header in the specified file
int InitializeWavFile(FILE *fd, int sample_rate)
{
	const char *info;
	short d_short;
	int d_int;
	int num_bytes;

	info = "RIFF";
	fwrite(info, 4, 1, fd);

	num_bytes = 8 + 16 + 12;
	fwrite(&num_bytes, 4, 1,fd);

	info = "WAVE";
	fwrite(info, 1, 4, fd);

	info = "fmt ";
	fwrite(info, 1, 4, fd);

	num_bytes = 16;
	fwrite(&num_bytes, 4, 1, fd);

	d_short = RIFF_FORMAT_PCM;
	fwrite(&d_short, 2, 1, fd);

	d_short = NUM_CHANNELS;
	fwrite(&d_short, 2, 1, fd);

	d_int = sample_rate;
	fwrite(&d_int, 4, 1, fd);

	d_int = (sample_rate * NUM_CHANNELS * sizeof(short));
	fwrite(&d_int, 4, 1, fd);

	d_short = (NUM_CHANNELS * (short)sizeof(short));
	fwrite(&d_short, 2, 1, fd);

	d_short = 2 * 8;
	fwrite(&d_short, 2, 1, fd);

	info = "data";
	fwrite(info, 1, 4, fd);

	d_int = 0;
	fwrite(&d_int, 4, 1, fd);

	return 0;
}


int AppendSamplesToWaveFile(FILE *fd, short *samples, int num_samples)
{
	char info[4];
	int file_bytes, data_bytes, n;
	long data_offset = 0;

	fseek(fd, 0L, SEEK_SET);

	/* Check */

	if(fread(info, 1, 4, fd) != 4 || strncmp(info, "RIFF", 4) != 0)
		return -1;

	fread(&file_bytes, 4, 1, fd);

	if(fread(info, 1, 4, fd) != 4 || strncmp(info, "WAVE", 4) != 0)
		return -1;

	if(fread(info, 1, 4, fd) != 4 || strncmp(info, "fmt ", 4) != 0)
		return -1;

	if(fread(&n, 4, 1, fd) == 1)
		data_offset = ftell(fd) + n;

	fseek(fd, data_offset, SEEK_SET);

	if(fread(info, 1, 4, fd) != 4 || strncmp(info, "data", 4) != 0)
		return -1;

	fread(&data_bytes, 4, 1, fd);

	/* Update */

	n = sizeof(short) * NUM_CHANNELS * num_samples;

	fseek(fd, 8 + file_bytes, SEEK_SET);
	fwrite(samples, sizeof(short), NUM_CHANNELS * num_samples, fd);

	file_bytes += n;
	data_bytes += n;

	fseek(fd, 4, SEEK_SET);
	fwrite(&file_bytes, 4, 1, fd);

	fseek(fd, 4 + data_offset, SEEK_SET);
	fwrite(&data_bytes, 4, 1, fd);

	return 0;
}

