diff --git a/rhonda/STTEngine.cpp b/rhonda/STTEngine.cpp new file mode 100644 index 0000000..1058039 --- /dev/null +++ b/rhonda/STTEngine.cpp @@ -0,0 +1,464 @@ +/* + +Code to use Speech to text engine, this code can use Google API or Bing API + +By defaut I use 8820 Hz for sample rate for Bing Engine to have smaller file sier but works too with other sample rate. + + + + + + + + + +*/ + +// Aralox - http://stackoverflow.com/questions/25307487/how-to-use-libcurl-with-google-speech-api-what-is-the-equivalent-for-data-bin/25310710#25310710 + +#include +#include +#include +#include +#include + + +#ifdef WIN32 +#include +#else +#include +#endif + +#include +//#include + +#include "STTEngine.h" +#include "libs/slre.h" +#include "fonction.h" +#include "prog.h" + + +//#pragma comment(lib, "Rpcrt4.lib") + +std::string Uid; +std::string ApiKey; + + +void SetSTTApiKey(char *s) +{ + ApiKey = s; +#ifdef WIN32 + { + UUID id; + RPC_CSTR szUuid = NULL; + UuidCreate(&id); + UuidToStringA(&id, &szUuid); + Uid = (char*)szUuid; + RpcStringFreeA(&szUuid); + } +#else + { + uuid_t uuid; + uuid_generate_random(uuid); + char s[37]; + uuid_unparse(uuid, s); + Uid = (char*)s; + } +#endif +} + + +struct WriteThis { + const char *readptr; + long sizeleft; + long totalsize; +}; + +struct MemoryStruct { + char *memory; + size_t size; +}; + + +static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) +{ + size_t realsize = size * nmemb; + struct MemoryStruct *mem = (struct MemoryStruct *)userp; + + mem->memory = (char *)realloc(mem->memory, mem->size + realsize + 1); + if(mem->memory == NULL) { + /* out of memory! */ + printf("not enough memory (realloc returned NULL)\n"); + return 0; + } + + memcpy(&(mem->memory[mem->size]), contents, realsize); + mem->size += realsize; + mem->memory[mem->size] = 0; + + return realsize; +} + + +int lastpercent = -1; +static size_t read_callback(void *ptr, size_t size, size_t nmemb, void *userp) +{ + + struct WriteThis *pooh = (struct WriteThis *)userp; + + int percent = 100*(pooh->totalsize - pooh->sizeleft)/pooh->totalsize; + if ((percent != lastpercent) && (percent % 5 == 0)) + { + printf("Avancement %d\n",percent); + lastpercent = percent; + } + + if (size*nmemb < 1) return 0; + + if (pooh->sizeleft) + { + long written = (long)(size * nmemb); + if (pooh->sizeleft < written) written = pooh->sizeleft; + memcpy(ptr, ((char*)(pooh->readptr)), written); + pooh->readptr += written; + pooh->sizeleft -= written; + return written; + } + +/* + if (pooh->sizeleft) { + *(char *)ptr = pooh->readptr[0]; // copy one single byte + pooh->readptr++; // advance pointer + pooh->sizeleft--; // less data left + return 1; // we return 1 byte at a time! + + } +*/ + return 0; /* no more data left to deliver */ +} + +int TranslateGoggle(char *Buff_flac, size_t s, char *resultat) +{ + CURL *curl; // curl handle + CURLcode res; + + int bestscore = 0; + + struct MemoryStruct data; + data.memory = (char *)malloc(1); /* will be grown as needed by the realloc above */ + data.size = 0; /* no data at this point */ + + resultat[0] = '\0'; + + curl = curl_easy_init(); + if (curl) + { + struct curl_slist *chunk = NULL; + + struct WriteThis pooh; + + char sizeHeader[255]; + + std::string apiurl; + apiurl = "https://www.google.com/speech-api/v2/recognize?output=json&lang="; + + if (GetLanguage() == 0) apiurl = apiurl + "FR-fr"; + else apiurl = apiurl + "EN-en"; + + apiurl = apiurl + "&key=" + ApiKey; + + if (s == 0) return 0; + + wprintf(L"File size %d Kb\n",s/1000); + + //chunk = curl_slist_append(chunk, "Content-Type: audio/l16; rate=44100"); + chunk = curl_slist_append(chunk, "Content-Type: audio/x-flac; rate=8820"); + + pooh.readptr = Buff_flac; + pooh.sizeleft = s; + pooh.totalsize = s; + + sprintf(sizeHeader,"Content-Length: %d",s); + chunk = curl_slist_append(chunk, sizeHeader); + + //disalbe Expect: 100-continue + chunk = curl_slist_append(chunk, "Expect:"); + + curl_easy_setopt(curl, CURLOPT_POST, 1L); + curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback); + curl_easy_setopt(curl, CURLOPT_READDATA, &pooh); + //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);// To debug + + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk); + + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&data); + + curl_easy_setopt(curl, CURLOPT_CAINFO, 0L); + curl_easy_setopt(curl, CURLOPT_CAPATH, 0L); + //curl_easy_setopt(curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); + + curl_easy_setopt(curl, CURLOPT_URL, apiurl.c_str()); + + res = curl_easy_perform(curl); + + Mywprintf(L"Resultat From Google\n %s \n",data.memory); + + + { + //parsing with regex + struct slre_cap caps[2]; + + + //Have a result with confidence ? + if (slre_match("{\"transcript\":\"([^\"]+)\",\"confidence\":([0-9\.]+)}", data.memory, data.size, caps, 2, 0) > 0) + { + char *tmp2; + int l; + + strncpy(resultat, caps[0].ptr, caps[0].len); + resultat[caps[0].len] = '\0'; + + l = caps[1].len; + if (l > 4) l = 4; + tmp2 = (char *)malloc((l + 1) * sizeof(char)); + strncpy(tmp2, caps[1].ptr + 2, l - 2); + tmp2[l - 2] = '\0'; + + bestscore = 1 + (atoi(tmp2)); + + free(tmp2); + + } + else + { + //There is no more thing to check for bing engine + bestscore = 0; + } + } + + curl_easy_cleanup(curl); + + } + + free(data.memory); + + return bestscore; +} + +//https://github.com/Microsoft/Cognitive-Documentation/blob/master/Content/en-us/Speech/API-Reference-REST/BingVoiceRecognition.md +//https://www.microsoft.com/cognitive-services/en-us/subscriptions +int TranslateBing(char *Buff_file, size_t s, char *resultat) +{ + CURL *curl; // curl handle + CURLcode res; + + int bestscore = 0; + + struct MemoryStruct data; + data.memory = (char *)malloc(1); /* will be grown as needed by the realloc above */ + data.size = 0; /* no data at this point */ + + resultat[0] = '\0'; + + //No size Return directly + if (s == 0) return 0; + + curl = curl_easy_init(); + if (curl) + { + + struct WriteThis pooh; + + char sizeHeader[600]; + char *Token = NULL; + + std::string apiurl; + std::string apiPostData; + + //************************** + // First api call + //************************** + + apiurl = "https://oxford-speech.cloudapp.net/token/issueToken"; + + apiPostData = "grant_type=client_credentials"; + apiPostData = apiPostData + "&client_id=" + ApiKey; + apiPostData = apiPostData + "&client_secret=" + ApiKey; + apiPostData = apiPostData + "&scope=https%3A%2F%2Fspeech.platform.bing.com"; + + curl_easy_setopt(curl, CURLOPT_POST, 1L); + + //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);// To debug + + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&data); + + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); + + curl_easy_setopt(curl, CURLOPT_URL, apiurl.c_str()); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, apiPostData.c_str()); + + res = curl_easy_perform(curl); + + { + //parsing with regex + struct slre_cap caps[1]; + + //Have a result with confidence ? + if (slre_match("\"access_token\":\"([^\"]+)\"", data.memory, data.size, caps, 1, 0) > 0) + { + Token = (char *)malloc((caps[0].len + 1) * sizeof(char)); + strncpy(Token, caps[0].ptr, caps[0].len); + Token[caps[0].len] = '\0'; + } + } + + curl_easy_cleanup(curl); + + if (!Token) return 0; + + //************************************ + // Second API call + //************************************ + + free(data.memory); + data.memory = (char *)malloc(1); /* will be grown as needed by the realloc above */ + data.size = 0; /* no data at this point */ + + curl = curl_easy_init(); + if (curl) + { + struct curl_slist *chunk = NULL; + + apiurl = "https://speech.platform.bing.com/recognize?scenarios=ulm&appid=f84e364c-ec34-4773-a783-73707bd9a585&device.os=wp7&version=3.0&format=json&locale="; + + if (GetLanguage() == 0) apiurl = apiurl + "fr-FR"; + else apiurl = apiurl + "en-US"; + + //Device ID + apiurl = apiurl + "&instanceid=1d4b6030-9099-11e0-91e4-0800200c9a66"; + //Client ID generated + apiurl = apiurl + "&requestid=" + Uid; + + wprintf(L"File size %d Kb\n", s / 1000); + + pooh.readptr = Buff_file; + pooh.sizeleft = s; + pooh.totalsize = s; + + curl_easy_setopt(curl, CURLOPT_POST, 1L); + + //Header part + snprintf(sizeHeader, sizeof(sizeHeader), "Authorization: Bearer %s", Token); + chunk = curl_slist_append(chunk, sizeHeader); + chunk = curl_slist_append(chunk, "Content-Type: audio/wav; samplerate=8000");//samplerate = 8000/16000 + sprintf(sizeHeader, "Content-Length: %d", s); + chunk = curl_slist_append(chunk, sizeHeader); + //disalbe Expect: 100-continue + chunk = curl_slist_append(chunk, "Expect:"); + //Set headers + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk); + + //curl_easy_setopt(curl, CURLOPT_POST, 1L); + curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback); + curl_easy_setopt(curl, CURLOPT_READDATA, &pooh); + //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);// To debug + + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&data); + + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0); + + + curl_easy_setopt(curl, CURLOPT_URL, apiurl.c_str()); + + res = curl_easy_perform(curl); + + Mywprintf(L"Resultat From Bing\n %s \n", data.memory); + + { + //parsing with regex + struct slre_cap caps[2]; + + + //Have a result with confidence ? + if (slre_match("\"lexical\":\"([^\"]+)\",\"confidence\":\"([^\"]+)\"", data.memory, data.size, caps, 2, 0) > 0) + { + char *tmp2; + int l; + + strncpy(resultat, caps[0].ptr, caps[0].len); + resultat[caps[0].len] = '\0'; + + l = caps[1].len; + if (l > 4) l = 4; + tmp2 = (char *)malloc((l + 1) * sizeof(char)); + strncpy(tmp2, caps[1].ptr + 2, l - 2); + tmp2[l - 2] = '\0'; + + bestscore = 1 + (atoi(tmp2)); + + free(tmp2); + + } + else + { + char *Pmemory = data.memory; + + // ok nevermind, add all the other results + while (slre_match("\"lexical\":\"([^\"]+)\"", Pmemory, data.size, caps, 1, 0) > 0) + { + char *tmp1; + char *tmp2; + int l; + + tmp1 = (char *)malloc((caps[0].len + 1) * sizeof(char)); + strncpy(tmp1, caps[0].ptr, caps[0].len); + strncpy(tmp1 + caps[0].len, "\0", 1); + + tmp2 = tmp1; + while (tmp2) + { + while ((tmp2[0] != ' ') && (tmp2[0] != '\0')) tmp2++; + if (tmp2[0] != '\0') + { + tmp2[0] = '\0'; + tmp2++; + } + else tmp2 = NULL; + + if (!mystrstr(resultat, tmp1)) + { + int l = strlen(resultat) + strlen(tmp1) + 1; + if (l < 254) + { + strcat(resultat, " "); + strcat(resultat, tmp1); + } + } + + if (tmp2) tmp1 = tmp2; + } + + l = caps[0].ptr + caps[0].len - data.memory; + Pmemory += l; + data.size -= l; + + bestscore = 50; + } + } + } + + curl_easy_cleanup(curl); + + } + + free(data.memory); + } + + return bestscore; +} \ No newline at end of file diff --git a/rhonda/translategoogle.h b/rhonda/STTEngine.h similarity index 52% rename from rhonda/translategoogle.h rename to rhonda/STTEngine.h index a7ca8c5..52ff580 100644 --- a/rhonda/translategoogle.h +++ b/rhonda/STTEngine.h @@ -3,5 +3,6 @@ int TranslateGoggle(char *buff, size_t s, char *resultat); -void SetGoogleApiKey(char *s); +void SetSTTApiKey(char *s); +int TranslateBing(char *Buff_flac, size_t s, char *resultat); diff --git a/rhonda/audio.cpp b/rhonda/audio.cpp index 49ce927..31324c8 100644 --- a/rhonda/audio.cpp +++ b/rhonda/audio.cpp @@ -25,12 +25,10 @@ #define FRAMES_PER_BUFFER (512) #define MAXSILENCE 900 #define MINSILENCE 500 -#define MINFRAME 45000 +#define MINFRAME 1.1f #define MAGICNUMBER 8000 -#define BUFFER - int fd = 0; bool PortAudioInitialised = false; @@ -389,7 +387,7 @@ int Checkamplitude(long value) { if (Enr_etat == ENR_ATTENTE) { - wprintf(L"Son detecte, Demmarage enregistrement. Valeur : %ld\n",value); + wprintf(L"Sound detected, Start recording. Value : %ld\n",value); _DisplaySpectro(-1); Enr_etat = ENR_ENCOURS; @@ -405,7 +403,7 @@ int Checkamplitude(long value) { if (Enr_etat != ENR_FINI) { - wprintf(L"Too much silence.\n"); + wprintf(L"Too much silence, stop recording.\n"); if (Enr_etat == ENR_ENCOURS) Enr_etat = ENR_FINI; else Enr_etat = ENR_RATE; } @@ -460,7 +458,7 @@ static int recordCallback(const void *inputBuffer, void *outputBuffer, unsigned if (framesLeft < framesPerBuffer) { - wprintf(L"Depassement duree max\n"); + wprintf(L"Recorded sound too long, stop recording\n"); framesToCalc = framesLeft; finished = paComplete; } @@ -546,6 +544,8 @@ cRecord::cRecord() data.recordedSamples = NULL; hdr = NULL; + Defaut_sample_rate = SAMPLE_RATE; + hdr = (WaveHeader *)malloc(sizeof(*hdr)); wprintf(L"\033[0;31mInitialise sound recorder\033[0;37m\n"); @@ -553,7 +553,7 @@ cRecord::cRecord() InitPortAudio(); //making header - hdr = genericWAVHeader(hdr, SAMPLE_RATE, 8 * sizeof(SAMPLE), NUM_CHANNELS); + hdr = genericWAVHeader(hdr, Defaut_sample_rate, 8 * sizeof(SAMPLE), NUM_CHANNELS); if (!hdr) { wprintf(L"Error allocating WAV header.\n"); @@ -615,9 +615,25 @@ void cRecord::Stop() } } +void cRecord::SetSampleRate(int v) +{ + Defaut_sample_rate = v; + + //Remake header + if (hdr) free(hdr); + hdr = (WaveHeader *)malloc(sizeof(*hdr)); + hdr = genericWAVHeader(hdr, Defaut_sample_rate, 8 * sizeof(SAMPLE), NUM_CHANNELS); + if (!hdr) + { + wprintf(L"Error allocating WAV header.\n"); + } + + wprintf(L"Setting Sample rate : %d\n", v); +} -char * cRecord::RecordFLAC(uint32_t duration,size_t *sizeflac) + +char * cRecord::RecordSound(uint32_t duration,size_t *sizeflac) { PaError err = paNoError; @@ -629,8 +645,6 @@ char * cRecord::RecordFLAC(uint32_t duration,size_t *sizeflac) Initchecker(); - - //ok start to recording data.maxFrameIndex = MaxFrames = duration * hdr->sample_rate; /* Record for a few seconds. */ data.frameIndex = 0; @@ -662,7 +676,7 @@ char * cRecord::RecordFLAC(uint32_t duration,size_t *sizeflac) Stop(); wprintf(L"Nbre de frames = %d\n",data.frameIndex); - if (data.frameIndex < MINFRAME) + if (data.frameIndex < (hdr->sample_rate* MINFRAME)) { wprintf(L"Fichier trop petit, nbre frame = %d < %d\n",data.frameIndex,MINFRAME); return NULL; @@ -693,7 +707,8 @@ char * cRecord::RecordFLAC(uint32_t duration,size_t *sizeflac) average /= (double)numSamples; #endif -#ifdef RAW +//Raw output +#if 0 { FILE *fid; fid = fopen("recorded.raw", "wb"); @@ -708,10 +723,13 @@ char * cRecord::RecordFLAC(uint32_t duration,size_t *sizeflac) fclose( fid ); printf("Wrote data to 'recorded.raw'\n"); } + return NULL; } -#elif FILE +#endif +//File output +#if 0 { - FILE* fid = fopen(fileName, "wb"); + FILE* fid = fopen("output.wav", "wb"); if(!fid) { printf("Could not open file."); @@ -724,12 +742,15 @@ char * cRecord::RecordFLAC(uint32_t duration,size_t *sizeflac) fwrite(data.recordedSamples, NUM_CHANNELS * sizeof(SAMPLE), data.frameIndex, fid); fclose(fid); } + + return NULL; } -#else +#endif +//Buffer output +#if 1 { char *WavBuffer; int size; - char * buff_flac; hdr->data_size = data.frameIndex * (NUM_CHANNELS * sizeof(SAMPLE)); size = hdr->data_size + 44; @@ -739,14 +760,11 @@ char * cRecord::RecordFLAC(uint32_t duration,size_t *sizeflac) writeWAVHeaderBuffer(WavBuffer,hdr); memcpy(WavBuffer+44,data.recordedSamples,NUM_CHANNELS * sizeof(SAMPLE) * data.frameIndex); - //convert wav buffer to flac buffer - printf("Sound recorded, convertion to flac\n"); - buff_flac = ConvertWavBufferToFlacBuffer(WavBuffer, NUM_CHANNELS * sizeof(SAMPLE) * data.frameIndex + 44, sizeflac); - - if (WavBuffer) free(WavBuffer); - WavBuffer = NULL; + //return size + *sizeflac = NUM_CHANNELS * sizeof(SAMPLE) * data.frameIndex + 44; - return buff_flac; + //Return buffer + return WavBuffer; } #endif diff --git a/rhonda/audio.h b/rhonda/audio.h index 8555c14..b261ab5 100644 --- a/rhonda/audio.h +++ b/rhonda/audio.h @@ -130,9 +130,10 @@ class cRecord cRecord(); // Méthodes - char * RecordFLAC(uint32_t duration, size_t *size); + char * RecordSound(uint32_t duration, size_t *size); void Stop(void); bool Start(void); + void SetSampleRate(int); void SetSpectro(long); @@ -146,6 +147,8 @@ class cRecord PAData data; PaStream* stream; + int Defaut_sample_rate; + //config diff --git a/rhonda/config.xml b/rhonda/config.xml index 587ced1..c1e0cbf 100644 --- a/rhonda/config.xml +++ b/rhonda/config.xml @@ -2,7 +2,11 @@ + + 0 + xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + paris http://www.commeaucinema.com/rsspage.php?feed=cine http://www.site.com/forum/index.php?type=rss;action=.xml diff --git a/rhonda/flac.c b/rhonda/flac.c index 94b0a05..e9dec54 100644 --- a/rhonda/flac.c +++ b/rhonda/flac.c @@ -131,7 +131,7 @@ char * ConvertWavBufferToFlacBuffer(char *buff_wave, size_t size_wave,size_t *si ); if (ok != FLAC__STREAM_ENCODER_INIT_STATUS_OK) { - fprintf(stderr, "ERROR: initializing encoder: %s\n", FLAC__StreamEncoderInitStatusString[init_status]); + fprintf(stderr, "ERROR: initializing encoder: %s\n", FLAC__StreamEncoderInitStatusString[ok]); ok = false; free(flac_data.buf); diff --git a/rhonda/makefile b/rhonda/makefile index daf6e1f..46b7fff 100644 --- a/rhonda/makefile +++ b/rhonda/makefile @@ -30,19 +30,19 @@ DEP_DEBUG = OUT_DEBUG = bin/Debug/rhonda INC_RELEASE = $(INC) -Ilibs -Ilibs/portaudio -CFLAGS_RELEASE = $(CFLAGS) -O3 -std=c++0x -Wall +CFLAGS_RELEASE = $(CFLAGS) -O3 -std=c++0x -Wall -Wno-write-strings RESINC_RELEASE = $(RESINC) RCFLAGS_RELEASE = $(RCFLAGS) LIBDIR_RELEASE = $(LIBDIR) -LIB_RELEASE = $(LIB)libs/libsnowboy-detect.a -lcblas libs/portaudio/libportaudio.a -llapack_atlas -lasound -ljack -lpthread -lrt -lm -ldl -lFLAC -lcurl -lwiringPi +LIB_RELEASE = $(LIB)libs/libsnowboy-detect.a -lcblas libs/portaudio/libportaudio.a -llapack_atlas -lasound -ljack -lpthread -lrt -lm -ldl -lFLAC -lcurl -lwiringPi -luuid LDFLAGS_RELEASE = $(LDFLAGS) -s OBJDIR_RELEASE = obj/Release DEP_RELEASE = OUT_RELEASE = bin/Release/rhonda -OBJ_DEBUG = $(OBJDIR_DEBUG)/applications.o $(OBJDIR_DEBUG)/audio.o $(OBJDIR_DEBUG)/flac.o $(OBJDIR_DEBUG)/fonction.o $(OBJDIR_DEBUG)/hardware.o $(OBJDIR_DEBUG)/libs/i2cbusses.o $(OBJDIR_DEBUG)/libs/pugixml.o $(OBJDIR_DEBUG)/libs/rs232.o $(OBJDIR_DEBUG)/libs/slre.o $(OBJDIR_DEBUG)/prog.o $(OBJDIR_DEBUG)/traitement.o $(OBJDIR_DEBUG)/translategoogle.o $(OBJDIR_DEBUG)/uart.o +OBJ_DEBUG = $(OBJDIR_DEBUG)/STTEngine.o $(OBJDIR_DEBUG)/applications.o $(OBJDIR_DEBUG)/audio.o $(OBJDIR_DEBUG)/flac.o $(OBJDIR_DEBUG)/fonction.o $(OBJDIR_DEBUG)/hardware.o $(OBJDIR_DEBUG)/libs/i2cbusses.o $(OBJDIR_DEBUG)/libs/pugixml.o $(OBJDIR_DEBUG)/libs/rs232.o $(OBJDIR_DEBUG)/libs/slre.o $(OBJDIR_DEBUG)/prog.o $(OBJDIR_DEBUG)/traitement.o $(OBJDIR_DEBUG)/uart.o -OBJ_RELEASE = $(OBJDIR_RELEASE)/applications.o $(OBJDIR_RELEASE)/audio.o $(OBJDIR_RELEASE)/flac.o $(OBJDIR_RELEASE)/fonction.o $(OBJDIR_RELEASE)/hardware.o $(OBJDIR_RELEASE)/libs/i2cbusses.o $(OBJDIR_RELEASE)/libs/pugixml.o $(OBJDIR_RELEASE)/libs/rs232.o $(OBJDIR_RELEASE)/libs/slre.o $(OBJDIR_RELEASE)/prog.o $(OBJDIR_RELEASE)/traitement.o $(OBJDIR_RELEASE)/translategoogle.o $(OBJDIR_RELEASE)/uart.o +OBJ_RELEASE = $(OBJDIR_RELEASE)/STTEngine.o $(OBJDIR_RELEASE)/applications.o $(OBJDIR_RELEASE)/audio.o $(OBJDIR_RELEASE)/flac.o $(OBJDIR_RELEASE)/fonction.o $(OBJDIR_RELEASE)/hardware.o $(OBJDIR_RELEASE)/libs/i2cbusses.o $(OBJDIR_RELEASE)/libs/pugixml.o $(OBJDIR_RELEASE)/libs/rs232.o $(OBJDIR_RELEASE)/libs/slre.o $(OBJDIR_RELEASE)/prog.o $(OBJDIR_RELEASE)/traitement.o $(OBJDIR_RELEASE)/uart.o all: debug release @@ -60,6 +60,9 @@ debug: before_debug out_debug after_debug out_debug: before_debug $(OBJ_DEBUG) $(DEP_DEBUG) $(LD) $(LIBDIR_DEBUG) -o $(OUT_DEBUG) $(OBJ_DEBUG) $(LDFLAGS_DEBUG) $(LIB_DEBUG) +$(OBJDIR_DEBUG)/STTEngine.o: STTEngine.cpp + $(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c STTEngine.cpp -o $(OBJDIR_DEBUG)/STTEngine.o + $(OBJDIR_DEBUG)/applications.o: applications.cpp $(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c applications.cpp -o $(OBJDIR_DEBUG)/applications.o @@ -93,9 +96,6 @@ $(OBJDIR_DEBUG)/prog.o: prog.cpp $(OBJDIR_DEBUG)/traitement.o: traitement.cpp $(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c traitement.cpp -o $(OBJDIR_DEBUG)/traitement.o -$(OBJDIR_DEBUG)/translategoogle.o: translategoogle.cpp - $(CXX) $(CFLAGS_DEBUG) $(INC_DEBUG) -c translategoogle.cpp -o $(OBJDIR_DEBUG)/translategoogle.o - $(OBJDIR_DEBUG)/uart.o: uart.c $(CC) $(CFLAGS_DEBUG) $(INC_DEBUG) -c uart.c -o $(OBJDIR_DEBUG)/uart.o @@ -117,6 +117,9 @@ release: before_release out_release after_release out_release: before_release $(OBJ_RELEASE) $(DEP_RELEASE) $(LD) $(LIBDIR_RELEASE) -o $(OUT_RELEASE) $(OBJ_RELEASE) $(LDFLAGS_RELEASE) $(LIB_RELEASE) +$(OBJDIR_RELEASE)/STTEngine.o: STTEngine.cpp + $(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c STTEngine.cpp -o $(OBJDIR_RELEASE)/STTEngine.o + $(OBJDIR_RELEASE)/applications.o: applications.cpp $(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c applications.cpp -o $(OBJDIR_RELEASE)/applications.o @@ -150,9 +153,6 @@ $(OBJDIR_RELEASE)/prog.o: prog.cpp $(OBJDIR_RELEASE)/traitement.o: traitement.cpp $(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c traitement.cpp -o $(OBJDIR_RELEASE)/traitement.o -$(OBJDIR_RELEASE)/translategoogle.o: translategoogle.cpp - $(CXX) $(CFLAGS_RELEASE) $(INC_RELEASE) -c translategoogle.cpp -o $(OBJDIR_RELEASE)/translategoogle.o - $(OBJDIR_RELEASE)/uart.o: uart.c $(CC) $(CFLAGS_RELEASE) $(INC_RELEASE) -c uart.c -o $(OBJDIR_RELEASE)/uart.o diff --git a/rhonda/prog.cpp b/rhonda/prog.cpp index d72b04e..2040b4e 100644 --- a/rhonda/prog.cpp +++ b/rhonda/prog.cpp @@ -35,7 +35,7 @@ No comment yet #include "audio.h" #include "hardware.h" #include "flac.h" -#include "translategoogle.h" +#include "STTEngine.h" #include "traitement.h" #include "fonction.h" #include "applications.h" @@ -60,6 +60,7 @@ int HotWordModel = 1; // Global variables bool bExit = false; int language = 0; // 0 = FR 1 = EN +int STTEngine = 0; // 0 = google - 1 = Bing /***********************************************************************/ @@ -95,8 +96,8 @@ int main(int argc, char* argv[]) { bool HotWord = false; - size_t size_flac; - char *buff_flac = NULL; + size_t size_soundbuffer; + char *buff_soundbuffer = NULL; Resultat[0] = '\0'; @@ -177,12 +178,31 @@ int main(int argc, char* argv[]) { //TestTransmitter(0,12325261,1,"on"); //parle(L"test m\u00e9t\u00e9o"); - cTraitement.traite("recherche le fichier test"); + //cTraitement.traite("recherche le fichier test"); //CheckGitHubNotification(); +#if 0 + { + char *source = NULL; + FILE *fp; + long bufsize; + size_t newLen; + char res[255]; + + fp = fopen("c:\\testfile.wav", "rb"); + fseek(fp, 0L, SEEK_END); + bufsize = ftell(fp); + + source = (char*)malloc(sizeof(char) * (bufsize + 1)); - //TranslateGoggle("c://", Resultat); + fseek(fp, 0L, SEEK_SET); + newLen = fread(source, sizeof(char), bufsize, fp); + fclose(fp); + + TranslateBing(source, newLen, res); + } +#endif bExit = true; #endif @@ -292,23 +312,64 @@ int main(int argc, char* argv[]) { //Wait(800); wprintf(L"Recording\n"); + SP(); - /* Recording sound and convert it to flac file */ - buff_flac = cRecord.RecordFLAC(5,&size_flac); - //err = 0; - if (buff_flac != NULL) + err = 0; + size_soundbuffer = 0; + + //Google STT engine + if (STTEngine == 0) { + size_t size_tmpbuffer = 0; + char *buff_tmp = NULL; - SP(); + /* Recording sound and convert it to flac file */ + buff_tmp = cRecord.RecordSound(5, &size_tmpbuffer); + + if ((!buff_tmp) || (size_tmpbuffer == 0)) + { + err = 0; + break; + } - wprintf(L"Send to google\n"); - cMatrixLed.DisplayIcone(SABLIER); + //convert wav buffer to flac buffer + printf("Sound recorded, convertion to flac\n"); + buff_soundbuffer = ConvertWavBufferToFlacBuffer(buff_tmp, size_tmpbuffer, &size_soundbuffer); - err = TranslateGoggle(buff_flac, size_flac, Resultat); + if (buff_tmp) free(buff_tmp); - free(buff_flac); - buff_flac = NULL; + if ((buff_soundbuffer) && (size_soundbuffer > 0)) + { + + wprintf(L"Send to google\n"); + cMatrixLed.DisplayIcone(SABLIER); + err = TranslateGoggle(buff_soundbuffer, size_soundbuffer, Resultat); + } + } + //Bing STT engine + else if (STTEngine == 1) + { + /* Recording sound */ + buff_soundbuffer = cRecord.RecordSound(5, &size_soundbuffer); + + if ((buff_soundbuffer) && (size_soundbuffer > 0)) + { + wprintf(L"Send to Bing\n"); + cMatrixLed.DisplayIcone(SABLIER); + + err = TranslateBing(buff_soundbuffer, size_soundbuffer, Resultat); + } + } + + if (buff_soundbuffer != NULL) + { + free(buff_soundbuffer); + buff_soundbuffer = NULL; + } + + if (err > 0) + { SP(); wprintf(L"Resultat with a score of (%d) : ", err); Mywprintf(L"%s\n", Resultat); @@ -367,7 +428,8 @@ bool LoadConfig(void) pugi::xml_node panels = doc.child("mesh"); //config - SetGoogleApiKey((char *)panels.child("config").child_value("api")); + SetSTTApiKey((char *)panels.child("config").child_value("api")); + SetSTTMode(atoi((char *)panels.child("config").child_value("STTMode"))); SetCity((char *)panels.child("config").child_value("ville")); SetLanguage((char *)panels.child("config").child_value("language")); SetMailUserPass((char *)(panels.child("config").child_value("mailuser_and_pass"))); @@ -493,53 +555,19 @@ wchar_t * GetCommonString(int index) return (wchar_t*)CommonString[index].c_str(); } -/*******************************************/ - -#if 0 -int main2(int argc, char* argv[]) { - /* For sound engine */ - std::vector data; - int Samplerate = 44100; - int Numchannel = 1; - int Bitpersample = 16; - - if (!LoadConfig()) return 0; - - wprintf(L"Snowboy sensivity %s\n", sensitivity_str.c_str()); - wprintf(L"Snowboy modele %s\n", model_filename.c_str()); - - if ((int)(model_filename.find(",")) > 0) HotWordModel = 2; - - // Initializes Snowboy detector.A faire avant setlocale sinon bug !! - snowboy::SnowboyDetect detector(resource_filename, model_filename); - detector.SetSensitivity(sensitivity_str); - detector.SetAudioGain(audio_gain); - - Samplerate = detector.SampleRate(); - Numchannel = detector.NumChannels(); - Bitpersample = detector.BitsPerSample(); - - wprintf(L"Samplerate %i Numchannel %i, Bitpersampple %i\n", Samplerate, Numchannel, Bitpersample); +void SetSTTMode(int v) +{ + STTEngine = v; + if (STTEngine > 1) STTEngine = 0; - PortAudioWrapper pa_wrapper(Samplerate, Numchannel, Bitpersample); - if (!(pa_wrapper.ready)) + if (STTEngine == 0) { - wprintf(L"Audio problem for snowboy\n"); + wprintf(L"Use Google STT\n"); } - - while (true) + else if (STTEngine == 1) { - pa_wrapper.Read(&data); - - if (data.size() != 0) { - int result = detector.RunDetection(data.data(), data.size()); - wprintf(L"xxx %d\n", result); - if (result > 0) { - wprintf(L"Hotword detected %d\n", result); - } - } + wprintf(L"Use Bing STT\n"); + cRecord.SetSampleRate(8820); + //cRecord.SetSampleRate(44100); } -} -#endif - -/*************************************************************************************************/ +} \ No newline at end of file diff --git a/rhonda/prog.h b/rhonda/prog.h index 0ab98f2..ef23668 100644 --- a/rhonda/prog.h +++ b/rhonda/prog.h @@ -13,4 +13,5 @@ void Exit(void); void SetLanguage(char *s); int GetLanguage(void); void SetCommonString(int index, char *s); -wchar_t * GetCommonString(int index); \ No newline at end of file +wchar_t * GetCommonString(int index); +void SetSTTMode(int v); \ No newline at end of file diff --git a/rhonda/rhonda.cbp b/rhonda/rhonda.cbp index 472f35a..570965b 100644 --- a/rhonda/rhonda.cbp +++ b/rhonda/rhonda.cbp @@ -52,6 +52,7 @@ + @@ -59,6 +60,7 @@ + @@ -78,7 +80,6 @@ - diff --git a/rhonda/rhonda.depend b/rhonda/rhonda.depend index ab0e998..9edbd2c 100644 --- a/rhonda/rhonda.depend +++ b/rhonda/rhonda.depend @@ -788,7 +788,7 @@ -1468844157 source:/root/Desktop/Rhonda/applications.cpp +1474288990 source:/root/Desktop/Rhonda/applications.cpp @@ -801,20 +801,21 @@ "libs/slre.h" "libs/pugixml.hpp" + -1468843098 /root/Desktop/Rhonda/applications.h +1474288989 /root/Desktop/Rhonda/applications.h -1468843212 /root/Desktop/Rhonda/fonction.h +1474288975 /root/Desktop/Rhonda/fonction.h 1466946587 /root/Desktop/Rhonda/libs/slre.h -1468843100 source:/root/Desktop/Rhonda/audio.cpp +1474291827 source:/root/Desktop/Rhonda/audio.cpp @@ -825,15 +826,15 @@ "prog.h" -1468161739 /root/Desktop/Rhonda/flac.h +1474288978 /root/Desktop/Rhonda/flac.h -1468843246 /root/Desktop/Rhonda/hardware.h +1470558760 /root/Desktop/Rhonda/hardware.h -1468843102 /root/Desktop/Rhonda/audio.h +1474288986 /root/Desktop/Rhonda/audio.h @@ -849,12 +850,12 @@ 1466946594 /root/Desktop/Rhonda/libs/portaudio/pa_util.h "portaudio.h" -1468843250 /root/Desktop/Rhonda/prog.h +1474288971 /root/Desktop/Rhonda/prog.h -1468161738 source:/root/Desktop/Rhonda/flac.c +1474288977 source:/root/Desktop/Rhonda/flac.c @@ -876,7 +877,7 @@ "fonction.h" -1468843243 source:/root/Desktop/Rhonda/hardware.cpp +1470558759 source:/root/Desktop/Rhonda/hardware.cpp @@ -973,7 +974,7 @@ "slre.h" -1468843252 source:/root/Desktop/Rhonda/prog.cpp +1474288969 source:/root/Desktop/Rhonda/prog.cpp @@ -989,7 +990,7 @@ "audio.h" "hardware.h" "flac.h" - "translategoogle.h" + "STTEngine.h" "traitement.h" "fonction.h" "applications.h" @@ -1005,14 +1006,14 @@ 1466946579 /root/Desktop/Rhonda/uart.h -1468843265 /root/Desktop/Rhonda/translategoogle.h +1470558776 /root/Desktop/Rhonda/translategoogle.h -1468843260 /root/Desktop/Rhonda/traitement.h +1474288964 /root/Desktop/Rhonda/traitement.h -1468843258 source:/root/Desktop/Rhonda/traitement.cpp +1474288962 source:/root/Desktop/Rhonda/traitement.cpp @@ -1048,7 +1049,7 @@ 1466946587 /root/Desktop/Rhonda/libs/pa_util.h "portaudio.h" -1468843211 source:/root/Desktop/Rhonda/fonction.cpp +1474288974 source:/root/Desktop/Rhonda/fonction.cpp @@ -1061,13 +1062,33 @@ "fonction.h" -1468843280 source:/root/Desktop/Rhonda/translategoogle.cpp +1470558774 source:/root/Desktop/Rhonda/translategoogle.cpp + "translategoogle.h" "libs/slre.h" "fonction.h" + "prog.h" + +1474288824 /root/Desktop/Rhonda/STTEngine.h + + + +1474290884 source:/root/Desktop/Rhonda/STTEngine.cpp + + + + + + + + + "STTEngine.h" + "libs/slre.h" + "fonction.h" + "prog.h" diff --git a/rhonda/rhonda.vcxproj b/rhonda/rhonda.vcxproj index ca8055e..83bd869 100644 --- a/rhonda/rhonda.vcxproj +++ b/rhonda/rhonda.vcxproj @@ -24,8 +24,8 @@ + - @@ -37,8 +37,8 @@ + - @@ -85,7 +85,7 @@ MachineX86 true Console - portaudio_x86.lib;libcurl.lib;libogg_static.lib;libFLAC_static.lib;utf8_static.lib;win_utf8_io_static.lib;%(AdditionalDependencies) + portaudio_x86.lib;libcurl.lib;libogg_static.lib;libFLAC_static.lib;utf8_static.lib;win_utf8_io_static.lib;Rpcrt4.lib;%(AdditionalDependencies) uuid.lib diff --git a/rhonda/rhonda.vcxproj.filters b/rhonda/rhonda.vcxproj.filters index 1e6d7b5..d3e4e91 100644 --- a/rhonda/rhonda.vcxproj.filters +++ b/rhonda/rhonda.vcxproj.filters @@ -63,7 +63,7 @@ Source Files - + Source Files @@ -71,9 +71,6 @@ Header Files - - Header Files - Header Files @@ -101,5 +98,8 @@ Header Files + + Header Files + \ No newline at end of file diff --git a/rhonda/translategoogle.cpp b/rhonda/translategoogle.cpp deleted file mode 100644 index 4515131..0000000 --- a/rhonda/translategoogle.cpp +++ /dev/null @@ -1,248 +0,0 @@ - -// Aralox - http://stackoverflow.com/questions/25307487/how-to-use-libcurl-with-google-speech-api-what-is-the-equivalent-for-data-bin/25310710#25310710 - -#include -#include -#include -#include -#include - -#include -//#include - -#include "translategoogle.h" - -#include "libs/slre.h" -#include "fonction.h" -#include "prog.h" - - -char GoogleApiKey[40]; - -void SetGoogleApiKey(char *s) -{ - int l = strlen(s); - if (l > 39) l = 39; - strncpy(GoogleApiKey, s, l); -} - - -struct WriteThis { - const char *readptr; - long sizeleft; - long totalsize; -}; - -struct MemoryStruct { - char *memory; - size_t size; -}; - - -static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) -{ - size_t realsize = size * nmemb; - struct MemoryStruct *mem = (struct MemoryStruct *)userp; - - mem->memory = (char *)realloc(mem->memory, mem->size + realsize + 1); - if(mem->memory == NULL) { - /* out of memory! */ - printf("not enough memory (realloc returned NULL)\n"); - return 0; - } - - memcpy(&(mem->memory[mem->size]), contents, realsize); - mem->size += realsize; - mem->memory[mem->size] = 0; - - return realsize; -} - - -int lastpercent = -1; -static size_t read_callback(void *ptr, size_t size, size_t nmemb, void *userp) -{ - - struct WriteThis *pooh = (struct WriteThis *)userp; - - int percent = 100*(pooh->totalsize - pooh->sizeleft)/pooh->totalsize; - if ((percent != lastpercent) && (percent % 5 == 0)) - { - printf("Avancement %d\n",percent); - lastpercent = percent; - } - - if (size*nmemb < 1) return 0; - - if (pooh->sizeleft) - { - long written = (long)(size * nmemb); - if (pooh->sizeleft < written) written = pooh->sizeleft; - memcpy(ptr, ((char*)(pooh->readptr)), written); - pooh->readptr += written; - pooh->sizeleft -= written; - return written; - } - -/* - if (pooh->sizeleft) { - *(char *)ptr = pooh->readptr[0]; // copy one single byte - pooh->readptr++; // advance pointer - pooh->sizeleft--; // less data left - return 1; // we return 1 byte at a time! - - } -*/ - return 0; /* no more data left to deliver */ -} - -int TranslateGoggle(char *Buff_flac, size_t s, char *resultat) -{ - CURL *curl; // curl handle - CURLcode res; - - int bestscore = 0; - - struct MemoryStruct data; - data.memory = (char *)malloc(1); /* will be grown as needed by the realloc above */ - data.size = 0; /* no data at this point */ - - resultat[0] = '\0'; - - curl = curl_easy_init(); - if (curl) - { - struct curl_slist *chunk = NULL; - - struct WriteThis pooh; - - char sizeHeader[255]; - - std::string apiurl; - apiurl = "https://www.google.com/speech-api/v2/recognize?output=json&lang="; - - if (GetLanguage() == 0) apiurl = apiurl + "FR-fr"; - else apiurl = apiurl + "EN-en"; - - apiurl = apiurl + "&key=" + GoogleApiKey; - - if (s == 0) return 0; - - wprintf(L"File size %d Kb\n",s/1000); - - //chunk = curl_slist_append(chunk, "Content-Type: audio/l16; rate=44100"); - chunk = curl_slist_append(chunk, "Content-Type: audio/x-flac; rate=44100"); - - pooh.readptr = Buff_flac; - pooh.sizeleft = s; - pooh.totalsize = s; - - sprintf(sizeHeader,"Content-Length: %d",s); - chunk = curl_slist_append(chunk, sizeHeader); - - //disalbe Expect: 100-continue - chunk = curl_slist_append(chunk, "Expect:"); - - curl_easy_setopt(curl, CURLOPT_POST, 1L); - curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback); - curl_easy_setopt(curl, CURLOPT_READDATA, &pooh); - //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);// To debug - - curl_easy_setopt(curl, CURLOPT_HTTPHEADER, chunk); - - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&data); - - curl_easy_setopt(curl, CURLOPT_CAINFO, 0L); - curl_easy_setopt(curl, CURLOPT_CAPATH, 0L); - //curl_easy_setopt(curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); - curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); - - curl_easy_setopt(curl, CURLOPT_URL, apiurl.c_str()); - - res = curl_easy_perform(curl); - - Mywprintf(L"Resultat From Google\n %s \n",data.memory); - - - { - //parsing with regex - struct slre_cap caps[2]; - - - //Have a result with confidence ? - if (slre_match("{\"transcript\":\"([^\"]+)\",\"confidence\":([0-9\.]+)}", data.memory, data.size, caps, 2, 0) > 0) - { - char *tmp2; - int l; - - strncpy(resultat, caps[0].ptr, caps[0].len); - resultat[caps[0].len] = '\0'; - - l = caps[1].len; - if (l > 4) l = 4; - tmp2 = (char *)malloc((l + 1) * sizeof(char)); - strncpy(tmp2, caps[1].ptr + 2, l - 2); - tmp2[l - 2] = '\0'; - - bestscore = 1 + (atoi(tmp2)); - - free(tmp2); - - } - else - { - char *Pmemory = data.memory; - - // ok nevermind, add all the other results - while (slre_match("{\"transcript\":\"([^\"]+)\"}", Pmemory, data.size, caps, 1, 0) > 0) - { - char *tmp1; - char *tmp2; - int l; - - tmp1 = (char *)malloc((caps[0].len + 1) * sizeof(char)); - strncpy(tmp1, caps[0].ptr, caps[0].len); - strncpy(tmp1 + caps[0].len, "\0", 1); - - tmp2 = tmp1; - while (tmp2) - { - while ((tmp2[0] != ' ') && (tmp2[0] != '\0')) tmp2++; - if (tmp2[0] != '\0') - { - tmp2[0] = '\0'; - tmp2++; - } - else tmp2 = NULL; - - if (!mystrstr(resultat, tmp1)) - { - int l = strlen(resultat) + strlen(tmp1) + 1; - if (l < 254) - { - strcat(resultat, " "); - strcat(resultat, tmp1); - } - } - - if (tmp2) tmp1 = tmp2; - } - - l = caps[0].ptr + caps[0].len - data.memory; - Pmemory += l; - data.size -= l; - - bestscore = 50; - } - } - } - - curl_easy_cleanup(curl); - - } - - free(data.memory); - - return bestscore; -}