PhoenixFileParser  1.0.0
Set of tools to ease file parsing
Loading...
Searching...
No Matches
PFileParser.cpp
Go to the documentation of this file.
1
2/***************************************
3 Auteur : Pierre Aubert
4 Mail : pierre.aubert@lapp.in2p3.fr
5 Licence : CeCILL-C
6****************************************/
7
8#include "PFileParser.h"
9
14
19
21
24bool PFileParser::open(const PPath & fileName){
25 p_fileName = fileName;
26 p_fileContent = fileName.loadFileContent();
28 return (p_fileContent != "");
29}
30
32
35void PFileParser::setWhiteSpace(const PString & whiteSpace){
36 p_listWhiteSpace = whiteSpace;
37}
38
40
43void PFileParser::setSeparator(const PString & separator){
44 p_listSeparator = separator;
45}
46
48
50void PFileParser::setFileContent(const PString & fileContent){
51 p_fileContent = fileContent;
53}
54
56
58void PFileParser::setEscapeChar(char escapeChar){
59 p_echapChar = escapeChar;
60}
61
63
65void PFileParser::setLocation(const PLocation & location){
66 setLine(location.getLine());
67 setColumn(location.getColumn());
68 p_fileName = location.getFileName();
69}
70
72
74void PFileParser::setLine(size_t currentLine){
75 p_currentLine = currentLine;
76}
77
79
81void PFileParser::setColumn(size_t currentCol){
82 p_currentLineFirstColumn = currentCol;
83}
84
86
89 return (p_currentChar >= p_nbTotalChar);
90}
91
97
100 if(p_vecPosition.size() == 0lu){
101 return;
102 }
104 p_currentLine = p_vecLine.back();
105 p_vecPosition.pop_back();
106 p_vecLine.pop_back();
107}
108
111 p_vecPosition.clear();
112 p_vecLine.clear();
113}
114
116
119 if(isEndOfFile()) return false;
121}
122
124
127 if(isEndOfFile()) return false;
129}
130
132
135 return p_echapChar;
136}
137
139
142 return p_fileName;
143}
144
146
149 PString dummySkipedStr("");
150 return getNextToken(dummySkipedStr);
151}
152
154
157PString PFileParser::getNextToken(PString & skippedStr){
158 if(isEndOfFile()) return "";
159 char ch = p_fileContent[p_currentChar];
160 while(!isEndOfFile() && p_listWhiteSpace.find(ch)){
161 skippedStr += ch;
163 if(isEndOfFile()) return "";
165 }
166 //We are sur ethe current char is not a white character
167 if(p_listSeparator.find(ch) && !isEndOfFile()){ //If is it a separator, we stop
169 PString s("");
170 s += ch;
171 return s;
172 }
173 //If not we get all characters until the next white character or separator character
174 PString buf("");
175 while(!isEndOfFile() && !p_listWhiteSpace.find(ch) && !p_listSeparator.find(ch)){
176 buf += ch;
178 if(isEndOfFile()){return buf;}
180 }
181 return buf;
182}
183
185
190 char ch = p_fileContent[p_currentChar];
191 return ch;
192 }else{
194 return '\0';
195 }
196}
197
199
202PString PFileParser::getUntilKey(const PString & patern){
203 if(patern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
204 return getUntilKeyWithoutPatern(patern) + patern;
205}
206
208
211PString PFileParser::getUntilKeyWithoutPatern(const PString & patern){
212 if(patern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
213 size_t sizePatern(patern.size());
214 std::string out(""); //on évite les petits désagréments
215 size_t sizeSrc(p_nbTotalChar - p_currentChar);
216 size_t beginTest(0lu), beginLine(0lu), beginI(0lu), nbMatch(0lu);
217 for(size_t i(0lu); i < sizeSrc; ++i){
218 if(p_fileContent[p_currentChar] == patern[nbMatch] && !p_currentCharEchaped){ //si le caractère i est le même que le caractère nbMatch
219 if(nbMatch == 0lu){ //c'est le premier qu'on teste
220 beginTest = p_currentChar; //il faut donc se rappeler où on a commencer à faire le test
221 beginLine = p_currentLine;
222 beginI = i;
223 }
224 ++nbMatch; //la prochaîne fois on testera le caractère suivant
225 if(nbMatch == sizePatern){ //dans ce cas, on a tout testé et tout les caractères correspondent, donc on sauvegarde
227 return out;
228 }
229 }else{ //si le caractère i n'est pas le même caractère que nbMatch
230 if(nbMatch == 0lu){ //si on n'en avait pas trouver de bon avant
231 out += p_fileContent[p_currentChar]; //on ne change rien à ce caractère
232 }else{ //si on avais déjà tester des caractères avant
233 out += p_fileContent[beginTest];
234 p_currentChar = beginTest;
235 p_currentLine = beginLine;
236 i = beginI;
237 }
238 beginTest = 0lu; //on remet le début des tests à 0 (pour évité les dépassements, on ne sait jamais)
239 nbMatch = 0lu; //on remet ne nombre des tests à 0, comme on n'a pas trouver de nouveau le motif
240 }
242 }
243 return out;
244}
245
247
251PString PFileParser::getUntilKeyWithoutPaternExclude(const PString & patern, const PString & strNotBeforeEndPatern){
252 if(patern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
253 std::string out(""); //on évite les petits désagréments
254 bool prevSkipSpace(p_dontSkipSpace);
255 p_dontSkipSpace = true;
256 bool skiptNextEnd(false);
257 while(!isEndOfFile()){
258 if(isMatch(strNotBeforeEndPatern)){
259 out += strNotBeforeEndPatern;
260 skiptNextEnd = true;
261 }else if(skiptNextEnd){
262 skiptNextEnd = false;
265 }else if(isMatch(patern)){
266 p_dontSkipSpace = prevSkipSpace;
267 return out;
268 }else{
271 }
272 }
273 p_dontSkipSpace = prevSkipSpace;
274 return out;
275}
276
278
283PString PFileParser::getUntilKeyWithoutPaternRecurse(const PString & patern, const PString & beginPatern,
284 const PString & allowedCharAfterBegin)
285{
286 if(patern == "" || beginPatern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
287 bool prevSkipSpace(p_dontSkipSpace);
288 p_dontSkipSpace = true;
289 std::string out("");
290 long int nbEmbeded(1lu);
291 while(!isEndOfFile()){
292 if(isMatch(patern)){
293 --nbEmbeded;
294 if(nbEmbeded <= 0l){
295 p_dontSkipSpace = prevSkipSpace;
296 return out;
297 }else{
298 out += patern;
299 }
300 }else if(isMatch(beginPatern)){
301 if(allowedCharAfterBegin.find(p_fileContent[p_currentChar])){
302 out += beginPatern;
303 ++nbEmbeded;
304 }
305 }else{
308 }
309 }
310 p_dontSkipSpace = prevSkipSpace;
311 return out;
312}
313
315
320PString PFileParser::getUntilKeyWithoutPaternRecurseExclude(const PString & patern, const PString & beginPatern,
321 const PString & echapExpr)
322{
323 if(patern == "" || beginPatern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
324 bool prevSkipSpace(p_dontSkipSpace);
325 p_dontSkipSpace = true;
326 std::string out("");
327 long int nbEmbeded(1lu);
328 bool skiptNextEnd(false);
329 while(!isEndOfFile()){
330 if(isMatch(echapExpr)){
331 out += echapExpr;
332 skiptNextEnd = true;
333 }else if(skiptNextEnd){
334 skiptNextEnd = false;
337 }else if(isMatch(patern)){
338 --nbEmbeded;
339 if(nbEmbeded <= 0l){
340 p_dontSkipSpace = prevSkipSpace;
341 return out;
342 }else{
343 out += patern;
344 }
345 }else if(isMatch(beginPatern)){
346 out += beginPatern;
347 ++nbEmbeded;
348 }else{
351 }
352 }
353 p_dontSkipSpace = prevSkipSpace;
354 return out;
355}
356
358
362PString PFileParser::getUntilKeyWithoutPaternRecurse(const PString & patern, const PString & beginPatern)
363{
364 if(patern == "" || beginPatern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
365 bool prevSkipSpace(p_dontSkipSpace);
366 p_dontSkipSpace = true;
367 std::string out("");
368 long int nbEmbeded(1l);
369 while(!isEndOfFile()){
370 if(isMatch(patern)){
371 --nbEmbeded;
372 if(nbEmbeded <= 0l){
373 p_dontSkipSpace = prevSkipSpace;
374 return out;
375 }else{
376 out += patern;
377 }
378 }else if(isMatch(beginPatern)){
379 out += beginPatern;
380 ++nbEmbeded;
381 }else{
384 }
385 }
386 p_dontSkipSpace = prevSkipSpace;
387 return out;
388}
389
391
394PString PFileParser::getStrComposedOf(const PString & charset){
395 PString tmpWhiteSpace(p_listWhiteSpace.eraseChar(charset));
396 if(tmpWhiteSpace != ""){
397 skipChars(tmpWhiteSpace);
398 }
399 std::string out("");
400 bool isInCharSet(true);
401 while(!isEndOfFile() && isInCharSet){
402 char ch = p_fileContent[p_currentChar];
403 isInCharSet = charset.find(ch);
404 if(isInCharSet){
405 out += ch;
407 }
408 }
409 return out;
410}
411
413
416 if(p_fileContent.empty()) return "";
417 size_t currentCharIndex(p_currentChar);
418 char ch = p_fileContent[currentCharIndex];
419 size_t indexBeginRow(currentCharIndex);
420 size_t indexEndRow(currentCharIndex);
421 if(ch != '\n'){
422 while(p_fileContent[indexEndRow] != '\n' && !isEndOfFile()){
423 ++indexEndRow;
424 }
425 }
426 if(ch == '\n' && indexBeginRow != 0lu){
427 --indexBeginRow;
428 }
429 while(p_fileContent[indexBeginRow] != '\n' && indexBeginRow != 0lu){
430 --indexBeginRow;
431 }
432 if(p_fileContent[indexBeginRow] == '\n'){indexBeginRow++;}
433 return p_fileContent.substr(indexBeginRow, indexEndRow - indexBeginRow);
434}
435
437
441bool PFileParser::isMatch(const PString & patern){
442 if(patern == "" || isEndOfFile() || p_currentCharEchaped) return false;
444 size_t nbCharPatern(patern.size());
445 if(p_currentChar + nbCharPatern > p_nbTotalChar){return false;}
446 bool match = true;
447 size_t i(0lu);
448 while(match && i < nbCharPatern){
449 match = (patern[i] == p_fileContent[p_currentChar + i]);
450 ++i;
451 }
452 if(match){
453 incrementCurrentChar(nbCharPatern);
454 }
455 return match;
456}
457
459
463bool PFileParser::isMatchRewind(const PString & patern){
464 pushPosition();
465 bool b = isMatch(patern);
466 popPosition();
467 return b;
468}
469
471
475bool PFileParser::isMatchSeq(const PVecString & patern, bool alwaysPopBack){
476 pushPosition();
477 PVecString::const_iterator it(patern.begin());
478 bool matchPatern(true);
479 while(it != patern.end() && matchPatern){
480 matchPatern = isMatch(*it);
481 ++it;
482 }
483 if(!matchPatern || alwaysPopBack){
484 popPosition();
485 }
486 return matchPatern;
487}
488
490
495bool PFileParser::isMatch(const PString & patern, const PString & forbiddenCharBefore){
496 if(p_currentChar > 0lu){
497 //If we find a forbidden character before the current char, the patern is canceled
498 if(forbiddenCharBefore.find(p_fileContent[p_currentChar - 1lu])){
499 return false;
500 }
501 }
502 return isMatch(patern);
503}
504
506
510bool PFileParser::isMatchToken(const PString & patern){
511 pushPosition();
512 if(!isMatch(patern)){
513 popPosition();
514 return false;
515 }
516 PString letterNumberUnderscore("_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
517 if(p_currentChar > patern.size()){
518 if(letterNumberUnderscore.find(p_fileContent[p_currentChar - patern.size() - 1lu])){
519 popPosition();
520 return false;
521 }
522 }
524 if(letterNumberUnderscore.find(p_fileContent[p_currentChar])){
525 popPosition();
526 return false;
527 }
528 }
529 return true;
530}
531
533
536PString PFileParser::isMatch(const PVecString & patern){
537 if(patern.size() == 0lu) return "";
538 PVecString::const_iterator it(patern.begin());
539 while(it != patern.end()){
540 if(isMatch(*it)) return *it;
541 ++it;
542 }
543 return "";
544}
545
547
550PString PFileParser::isMatchToken(const PVecString & patern){
551 if(patern.size() == 0lu) return "";
552 PVecString::const_iterator it(patern.begin());
553 while(it != patern.end()){
554 if(isMatchToken(*it)) return *it;
555 ++it;
556 }
557 return "";
558}
559
561
564PString PFileParser::isMatch(const std::vector<PVecString > & patern){
565 if(patern.size() == 0lu) return "";
566 std::vector<PVecString >::const_iterator itList(patern.begin());
567 while(itList != patern.end()){
568 PVecString::const_iterator it(itList->begin());
569 while(it != itList->end()){
570 if(isMatch(*it)) return *it;
571 ++it;
572 }
573 ++itList;
574 }
575 return "";
576}
577
579
582PString PFileParser::isMatch(const PParseSeq & seq){
583 pushPosition();
584 PString body("");
585 const PVecParseStep & vecStep = seq.getVecStep();
586 PVecParseStep::const_iterator itStep(vecStep.begin());
587 bool isParseNextStep(true);
588 while(itStep != vecStep.end() && isParseNextStep){
589 isParseNextStep = itStep->getIsOptional();
590 const PVecParseCmd & vecCmd = itStep->getVecCmd();
591 bool isMatchedCmd(false);
592 PVecParseCmd::const_iterator itCmd(vecCmd.begin());
593 while(itCmd != vecCmd.end() && !isMatchedCmd){
594 PString str(itCmd->getStr());
595 if(itCmd->getIsMatch()){
596 isMatchedCmd = isMatch(str);
597 body += str;
598 }else{
599 PString res(getStrComposedOf(str));
600 if(res != ""){
601 body += res;
602 isMatchedCmd = true;
603 }
604 }
605 ++itCmd;
606 }
607 isParseNextStep |= isMatchedCmd;
608 ++itStep;
609 }
610 if(!isParseNextStep){
611 popPosition();
612 body = "";
613 }
614 return body;
615}
616
618
622 if(isEndOfFile()) return false;
624 do{
627 return true;
628 }else return false;
629}
630
640
642
644void PFileParser::skipChars(const PString & chToSkip){
645 if(chToSkip.find(p_fileContent[p_currentChar])){
646 do{
648 }while(chToSkip.find(p_fileContent[p_currentChar]) && !isEndOfFile());
649 }
650}
651
653
656 return p_listWhiteSpace;
657}
658
660
663 return p_listSeparator;
664}
665
667
670 if(isEndOfFile()) return '\0';
672}
673
675
678 if(isEndOfFile() && p_currentChar > 0lu) return '\0';
679 return p_fileContent[p_currentChar - 1lu];
680}
681
683
685size_t PFileParser::getLine() const{
686 return p_currentLine;
687}
688
690
695 }else{return 0lu;}
696}
697
699
702 return p_nbTotalChar;
703}
704
706
709 return p_currentChar;
710}
711
713
716 //First, let's get the current column
717 size_t indentation(0lu), currentCharIdx(p_currentLineFirstColumn);
718 while(currentCharIdx < p_nbTotalChar && PString(" \t").find(p_fileContent[currentCharIdx])){
719 ++indentation;
720 ++currentCharIdx;
721 }
722 if(currentCharIdx > p_currentChar){
723 p_currentChar = currentCharIdx; //Anyway, it was just white character
724 }
725
726 return indentation;
727}
728
730
735
737
741std::ostream & operator << (std::ostream & out, const PFileParser & other){
742 out << "file '" << other.getFileName() << "' line " << other.getLine() << ":" << other.getColumn();
743 return out;
744}
745
748 p_currentChar = 0lu;
749 p_currentLine = 1lu;
751 p_fileContent = "";
752 p_listWhiteSpace = " \t\n";
753 p_listSeparator = "()[]{}+=.;,:/*%<>#";
754 p_echapChar = '\0';
755 p_dontSkipSpace = false;
756 p_currentCharEchaped = false;
757}
758
764
766
769 for(size_t i(0lu); i < nbChar; ++i){
770 if(p_fileContent[p_currentChar] == '\n'){
772 }
776 }
777 }
778}
779
780
std::ostream & operator<<(std::ostream &out, const PFileParser &other)
Définition de l'opérateur de flux sortant.
std::vector< PParseCmd > PVecParseCmd
std::vector< PParseStep > PVecParseStep
void setSeparator(const PString &separator)
Initialise la liste des caractères séparateurs.
size_t getLine() const
Fonction qui renvoie le numéro de la ligne courante.
PFileParser()
Constructeur de PFileParser.
std::vector< size_t > p_vecPosition
Vector of all the checkpoint positions in the text file (added with pushPosition() and removed with p...
virtual ~PFileParser()
Destructeur de PFileParser.
void skipChars(const PString &chToSkip)
Skip the characters in the given string.
bool isWhiteSpace()
Says if the current char is a white space.
bool isMatchSeq(const PVecString &patern, bool alwaysPopBack=false)
Match a sequence of token in a vector.
bool open(const PPath &fileName)
Fonction qui ouvre le fichier que l'on va parser.
bool isChSpace() const
Dis si le caractère courant est un caractère blanc.
bool p_dontSkipSpace
Say if we don't want to skip the spaces.
PString getUntilKeyWithoutPaternExclude(const PString &patern, const PString &strNotBeforeEndPatern)
Parse a string until the patern is found, only if it has not strNotBeforeEndPatern before it.
PString getCurrentRow() const
Get the current parsed row.
size_t getColumn() const
Fonction qui renvoie le numéro de la colonne du caractère courant.
PString getNextToken()
Get the next token.
PString getUntilKeyWithoutPatern(const PString &patern)
Renvoie la chaine de caractère du caractère courant jusqu'à patern exclu.
void incrementCurrentChar(size_t nbChar=1lu)
Increment the current caracter.
void setEscapeChar(char escapeChar)
Sets the escape character of the PFileParser.
void clear()
Clear the save position of the parser in ther current file.
PString getWhiteSpace() const
renvoie la liste des caractères blancs
PString getSeparator() const
renvoie la liste des caractères séparateurs
PString getUntilKeyWithoutPaternRecurse(const PString &patern, const PString &beginPatern, const PString &allowedCharAfterBegin)
Get the string until end sequence and take account recursive patern (embeded strings)
void setLine(size_t currentLine)
Set the current line of the PFileParser.
PString getStrComposedOf(const PString &charset)
Get string composed of the characters in the string charset.
PString p_listSeparator
liste des séparateurs
void setWhiteSpace(const PString &whiteSpace)
Initialise la liste des caractères blancs.
bool isMatchRewind(const PString &patern)
Do a isMatch and then go back at the previous position.
std::vector< size_t > p_vecLine
Vector of all the checkpoint rows in the text file (added with pushPosition() and removed with popPos...
bool p_currentCharEchaped
True if the current char is escaped.
bool isMatchToken(const PString &patern)
Says if the patern match with the current caracters of the PFileParser but treats the string as a tok...
bool isMatch(const PString &patern)
Says if the patern match with the current caracters of the PFileParser.
char getCurrentCh() const
Renvoie le caractère courant.
char p_echapChar
Echap caracter.
size_t getLineIndentation()
Get the current line indentation.
void popPosition()
Get to the last saved position of the PFileParser in the current file.
size_t p_nbTotalChar
Nombre de caractères total.
PPath p_fileName
Nom du fichier que l'on veut parser.
Definition PFileParser.h:97
PString getUntilKey(const PString &patern)
Renvoie la chaine de caractère du caractère courant jusqu'à patern comprise.
void initialisationPFileParser()
Fonction d'initialisation du PFileParser.
void incrementCurrentLine()
Increment the current line.
PString p_fileContent
Contenu du fichier de configuration.
Definition PFileParser.h:99
PLocation getLocation() const
Fonction qui renvoie la PLocation du PFileParser.
PPath getFileName() const
Fonction qui renvoie le nom du fichier que l'on a ouvert.
bool isChSeparator() const
Dis si le caractère courant est un séparateur.
void setColumn(size_t currentCol)
Set the current column of the PFileParser.
size_t p_currentLineFirstColumn
Number of the first column caracter of the current line.
void setLocation(const PLocation &location)
Set the current location of the PFileParser.
PString p_listWhiteSpace
liste des espaces blancs
void skipWhiteSpace()
Skip the white space if there is at the current caracter position.
void setFileContent(const PString &fileContent)
Set the file content.
void pushPosition()
Remember the current position of the PFileParser in the current file.
size_t p_currentChar
Numéro du caractère courant.
size_t p_currentLine
Numéro de la ligne courante.
PString getUntilKeyWithoutPaternRecurseExclude(const PString &patern, const PString &beginPatern, const PString &echapExpr)
Get the string until end sequence and take account recursive patern (embeded strings)
char getPrevCh() const
Renvoie le caractère courant.
size_t getCurrentCharIdx() const
Return the index of the current character.
bool isEndOfFile() const
Dit si on est à la fin du fichier.
char getEscapeChar() const
Gets the escape character of the PFileParser.
size_t getNbTotalChar() const
Return the number of characters in the current opened file.
char getNextChar()
Fonction qui renvoie le prochain caractère du fichier courant.
Classe qui permet de décrire une localisation, avec un nom de fichier et une ligne.
Definition PLocation.h:15
size_t getLine() const
renvoie la ligne du PLocation
Definition PLocation.cpp:67
PPath getFileName() const
renvoie le fichier du PLocation
Definition PLocation.cpp:60
size_t getColumn() const
renvoie la colonne du PLocation
Definition PLocation.cpp:74
Parsing sequence.
Definition PParseSeq.h:77
const std ::vector< PParseStep > & getVecStep() const
Get the variable p_vecStep.