PhoenixFileParser  1.5.1
Set of tools to ease file parsing
Loading...
Searching...
No Matches
PFileParser.cpp
Go to the documentation of this file.
1
2/***************************************
3 Auteur : Pierre Aubert
4 Mail : pierre.aubert@lapp.in2p3.fr
5 Licence : CeCILL-C
6****************************************/
7
8#include "PFileParser.h"
9
14
19
21
24bool PFileParser::open(const PPath & fileName){
25 p_fileName = fileName;
26 p_fileContent = fileName.loadFileContent();
28 return (p_fileContent != "");
29}
30
32
35void PFileParser::setWhiteSpace(const PString & whiteSpace){
36 p_listWhiteSpace = whiteSpace;
37}
38
40
43void PFileParser::setSeparator(const PString & separator){
44 p_listSeparator = separator;
45}
46
48
50void PFileParser::setFileContent(const PString & fileContent){
51 p_fileContent = fileContent;
53}
54
56
58void PFileParser::setEscapeChar(char escapeChar){
59 p_echapChar = escapeChar;
60}
61
63
65void PFileParser::setLocation(const PLocation & location){
66 setLine(location.getLine());
67 setColumn(location.getColumn());
68 p_fileName = location.getFileName();
69}
70
72
75 p_currentChar = index;
76}
77
79
81void PFileParser::setLine(size_t currentLine){
82 p_currentLine = currentLine;
83}
84
86
88void PFileParser::setColumn(size_t currentCol){
89 p_currentLineFirstColumn = currentCol;
90}
91
93
96 return (p_currentChar >= p_nbTotalChar);
97}
98
104
107 if(p_vecPosition.size() == 0lu){
108 return;
109 }
111 p_currentLine = p_vecLine.back();
112 p_vecPosition.pop_back();
113 p_vecLine.pop_back();
114}
115
118 p_vecPosition.clear();
119 p_vecLine.clear();
120}
121
123
126 if(isEndOfFile()) return false;
128}
129
131
134 if(isEndOfFile()) return false;
136}
137
139
142 return p_echapChar;
143}
144
146
149 return p_fileName;
150}
151
153
156 PString dummySkipedStr("");
157 return getNextToken(dummySkipedStr);
158}
159
161
164PString PFileParser::getNextToken(PString & skippedStr){
165 if(isEndOfFile()) return "";
166 char ch = p_fileContent[p_currentChar];
167 while(!isEndOfFile() && p_listWhiteSpace.find(ch)){
168 skippedStr += ch;
170 if(isEndOfFile()) return "";
172 }
173 //We are sur ethe current char is not a white character
174 if(p_listSeparator.find(ch) && !isEndOfFile()){ //If is it a separator, we stop
176 PString s("");
177 s += ch;
178 return s;
179 }
180 //If not we get all characters until the next white character or separator character
181 PString buf("");
182 while(!isEndOfFile() && !p_listWhiteSpace.find(ch) && !p_listSeparator.find(ch)){
183 buf += ch;
185 if(isEndOfFile()){return buf;}
187 }
188 return buf;
189}
190
192
197 char ch = p_fileContent[p_currentChar];
198 return ch;
199 }else{
201 return '\0';
202 }
203}
204
206
209PString PFileParser::getUntilKey(const PString & patern){
210 if(patern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
211 return getUntilKeyWithoutPatern(patern) + patern;
212}
213
215
218PString PFileParser::getUntilKeyWithoutPatern(const PString & patern){
219 if(patern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
220 size_t sizePatern(patern.size());
221 std::string out(""); //on évite les petits désagréments
222 size_t sizeSrc(p_nbTotalChar - p_currentChar);
223 size_t beginTest(0lu), beginLine(0lu), beginI(0lu), nbMatch(0lu);
224 for(size_t i(0lu); i < sizeSrc; ++i){
225 if(p_fileContent[p_currentChar] == patern[nbMatch] && !p_currentCharEchaped){ //si le caractère i est le même que le caractère nbMatch
226 if(nbMatch == 0lu){ //c'est le premier qu'on teste
227 beginTest = p_currentChar; //il faut donc se rappeler où on a commencer à faire le test
228 beginLine = p_currentLine;
229 beginI = i;
230 }
231 ++nbMatch; //la prochaîne fois on testera le caractère suivant
232 if(nbMatch == sizePatern){ //dans ce cas, on a tout testé et tout les caractères correspondent, donc on sauvegarde
234 return out;
235 }
236 }else{ //si le caractère i n'est pas le même caractère que nbMatch
237 if(nbMatch == 0lu){ //si on n'en avait pas trouver de bon avant
238 out += p_fileContent[p_currentChar]; //on ne change rien à ce caractère
239 }else{ //si on avais déjà tester des caractères avant
240 out += p_fileContent[beginTest];
241 p_currentChar = beginTest;
242 p_currentLine = beginLine;
243 i = beginI;
244 }
245 beginTest = 0lu; //on remet le début des tests à 0 (pour évité les dépassements, on ne sait jamais)
246 nbMatch = 0lu; //on remet ne nombre des tests à 0, comme on n'a pas trouver de nouveau le motif
247 }
249 }
250 return out;
251}
252
254
258PString PFileParser::getUntilKeyWithoutPaternExclude(const PString & patern, const PString & strNotBeforeEndPatern){
259 if(patern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
260 std::string out(""); //on évite les petits désagréments
261 bool prevSkipSpace(p_dontSkipSpace);
262 p_dontSkipSpace = true;
263 bool skiptNextEnd(false);
264 while(!isEndOfFile()){
265 if(isMatch(strNotBeforeEndPatern)){
266 out += strNotBeforeEndPatern;
267 skiptNextEnd = true;
268 }else if(skiptNextEnd){
269 skiptNextEnd = false;
272 }else if(isMatch(patern)){
273 p_dontSkipSpace = prevSkipSpace;
274 return out;
275 }else{
278 }
279 }
280 p_dontSkipSpace = prevSkipSpace;
281 return out;
282}
283
285
290PString PFileParser::getUntilKeyWithoutPaternRecurse(const PString & patern, const PString & beginPatern,
291 const PString & allowedCharAfterBegin)
292{
293 if(patern == "" || beginPatern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
294 bool prevSkipSpace(p_dontSkipSpace);
295 p_dontSkipSpace = true;
296 std::string out("");
297 long int nbEmbeded(1lu);
298 while(!isEndOfFile()){
299 if(isMatch(patern)){
300 --nbEmbeded;
301 if(nbEmbeded <= 0l){
302 p_dontSkipSpace = prevSkipSpace;
303 return out;
304 }else{
305 out += patern;
306 }
307 }else if(isMatch(beginPatern)){
308 if(allowedCharAfterBegin.find(p_fileContent[p_currentChar])){
309 out += beginPatern;
310 ++nbEmbeded;
311 }
312 }else{
315 }
316 }
317 p_dontSkipSpace = prevSkipSpace;
318 return out;
319}
320
322
327PString PFileParser::getUntilKeyWithoutPaternRecurseExclude(const PString & patern, const PString & beginPatern,
328 const PString & echapExpr)
329{
330 if(patern == "" || beginPatern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
331 bool prevSkipSpace(p_dontSkipSpace);
332 p_dontSkipSpace = true;
333 std::string out("");
334 long int nbEmbeded(1lu);
335 bool skiptNextEnd(false);
336 while(!isEndOfFile()){
337 if(isMatch(echapExpr)){
338 out += echapExpr;
339 skiptNextEnd = true;
340 }else if(skiptNextEnd){
341 skiptNextEnd = false;
344 }else if(isMatch(patern)){
345 --nbEmbeded;
346 if(nbEmbeded <= 0l){
347 p_dontSkipSpace = prevSkipSpace;
348 return out;
349 }else{
350 out += patern;
351 }
352 }else if(isMatch(beginPatern)){
353 out += beginPatern;
354 ++nbEmbeded;
355 }else{
358 }
359 }
360 p_dontSkipSpace = prevSkipSpace;
361 return out;
362}
363
365
369PString PFileParser::getUntilKeyWithoutPaternRecurse(const PString & patern, const PString & beginPatern)
370{
371 if(patern == "" || beginPatern == "" || p_nbTotalChar == 0lu || isEndOfFile()) return "";
372 bool prevSkipSpace(p_dontSkipSpace);
373 p_dontSkipSpace = true;
374 std::string out("");
375 long int nbEmbeded(1l);
376 while(!isEndOfFile()){
377 if(isMatch(patern)){
378 --nbEmbeded;
379 if(nbEmbeded <= 0l){
380 p_dontSkipSpace = prevSkipSpace;
381 return out;
382 }else{
383 out += patern;
384 }
385 }else if(isMatch(beginPatern)){
386 out += beginPatern;
387 ++nbEmbeded;
388 }else{
391 }
392 }
393 p_dontSkipSpace = prevSkipSpace;
394 return out;
395}
396
398
401PString PFileParser::getStrComposedOf(const PString & charset){
402 PString tmpWhiteSpace(p_listWhiteSpace.eraseChar(charset));
403 if(tmpWhiteSpace != ""){
404 skipChars(tmpWhiteSpace);
405 }
406 std::string out("");
407 bool isInCharSet(true);
408 while(!isEndOfFile() && isInCharSet){
409 char ch = p_fileContent[p_currentChar];
410 isInCharSet = charset.find(ch);
411 if(isInCharSet){
412 out += ch;
414 }
415 }
416 return out;
417}
418
420
423 if(p_fileContent.empty()) return "";
424 size_t currentCharIndex(p_currentChar);
425 char ch = p_fileContent[currentCharIndex];
426 size_t indexBeginRow(currentCharIndex);
427 size_t indexEndRow(currentCharIndex);
428 if(ch != '\n'){
429 while(p_fileContent[indexEndRow] != '\n' && !isEndOfFile()){
430 ++indexEndRow;
431 }
432 }
433 if(ch == '\n' && indexBeginRow != 0lu){
434 --indexBeginRow;
435 }
436 while(p_fileContent[indexBeginRow] != '\n' && indexBeginRow != 0lu){
437 --indexBeginRow;
438 }
439 if(p_fileContent[indexBeginRow] == '\n'){indexBeginRow++;}
440 return p_fileContent.substr(indexBeginRow, indexEndRow - indexBeginRow);
441}
442
444
448bool PFileParser::isMatch(const PString & patern){
449 if(patern == "" || isEndOfFile() || p_currentCharEchaped) return false;
451 size_t nbCharPatern(patern.size());
452 if(p_currentChar + nbCharPatern > p_nbTotalChar){return false;}
453 bool match = true;
454 size_t i(0lu);
455 while(match && i < nbCharPatern){
456 match = (patern[i] == p_fileContent[p_currentChar + i]);
457 ++i;
458 }
459 if(match){
460 incrementCurrentChar(nbCharPatern);
461 }
462 return match;
463}
464
466
470bool PFileParser::isMatchRewind(const PString & patern){
471 pushPosition();
472 bool b = isMatch(patern);
473 popPosition();
474 return b;
475}
476
478
482bool PFileParser::isMatchSeq(const PVecString & patern, bool alwaysPopBack){
483 pushPosition();
484 PVecString::const_iterator it(patern.begin());
485 bool matchPatern(true);
486 while(it != patern.end() && matchPatern){
487 matchPatern = isMatch(*it);
488 ++it;
489 }
490 if(!matchPatern || alwaysPopBack){
491 popPosition();
492 }
493 return matchPatern;
494}
495
497
502bool PFileParser::isMatch(const PString & patern, const PString & forbiddenCharBefore){
503 if(p_currentChar > 0lu){
504 //If we find a forbidden character before the current char, the patern is canceled
505 if(forbiddenCharBefore.find(p_fileContent[p_currentChar - 1lu])){
506 return false;
507 }
508 }
509 return isMatch(patern);
510}
511
513
517bool PFileParser::isMatchToken(const PString & patern){
518 pushPosition();
519 if(!isMatch(patern)){
520 popPosition();
521 return false;
522 }
523 PString letterNumberUnderscore("_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
524 if(p_currentChar > patern.size()){
525 if(letterNumberUnderscore.find(p_fileContent[p_currentChar - patern.size() - 1lu])){
526 popPosition();
527 return false;
528 }
529 }
531 if(letterNumberUnderscore.find(p_fileContent[p_currentChar])){
532 popPosition();
533 return false;
534 }
535 }
536 return true;
537}
538
540
543PString PFileParser::isMatch(const PVecString & patern){
544 if(patern.size() == 0lu) return "";
545 PVecString::const_iterator it(patern.begin());
546 while(it != patern.end()){
547 if(isMatch(*it)) return *it;
548 ++it;
549 }
550 return "";
551}
552
554
557PString PFileParser::isMatchToken(const PVecString & patern){
558 if(patern.size() == 0lu) return "";
559 PVecString::const_iterator it(patern.begin());
560 while(it != patern.end()){
561 if(isMatchToken(*it)) return *it;
562 ++it;
563 }
564 return "";
565}
566
568
571PString PFileParser::isMatch(const std::vector<PVecString > & patern){
572 if(patern.size() == 0lu) return "";
573 std::vector<PVecString >::const_iterator itList(patern.begin());
574 while(itList != patern.end()){
575 PVecString::const_iterator it(itList->begin());
576 while(it != itList->end()){
577 if(isMatch(*it)) return *it;
578 ++it;
579 }
580 ++itList;
581 }
582 return "";
583}
584
586
589PString PFileParser::isMatch(const PParseSeq & seq){
590 pushPosition();
591 PString body("");
592 const PVecParseStep & vecStep = seq.getVecStep();
593 PVecParseStep::const_iterator itStep(vecStep.begin());
594 bool isParseNextStep(true);
595 while(itStep != vecStep.end() && isParseNextStep){
596 isParseNextStep = itStep->getIsOptional();
597 const PVecParseCmd & vecCmd = itStep->getVecCmd();
598 bool isMatchedCmd(false);
599 PVecParseCmd::const_iterator itCmd(vecCmd.begin());
600 while(itCmd != vecCmd.end() && !isMatchedCmd){
601 PString str(itCmd->getStr());
602 if(itCmd->getIsMatch()){
603 isMatchedCmd = isMatch(str);
604 body += str;
605 }else{
606 PString res(getStrComposedOf(str));
607 if(res != ""){
608 body += res;
609 isMatchedCmd = true;
610 }
611 }
612 ++itCmd;
613 }
614 isParseNextStep |= isMatchedCmd;
615 ++itStep;
616 }
617 if(!isParseNextStep){
618 popPosition();
619 body = "";
620 }
621 return body;
622}
623
625
629 if(isEndOfFile()) return false;
631 do{
634 return true;
635 }else return false;
636}
637
647
649
651void PFileParser::skipChars(const PString & chToSkip){
652 if(chToSkip.find(p_fileContent[p_currentChar])){
653 do{
655 }while(chToSkip.find(p_fileContent[p_currentChar]) && !isEndOfFile());
656 }
657}
658
660
663 return p_listWhiteSpace;
664}
665
667
670 return p_listSeparator;
671}
672
674
677 if(isEndOfFile()) return '\0';
679}
680
682
685 if(isEndOfFile() && p_currentChar > 0lu) return '\0';
686 return p_fileContent[p_currentChar - 1lu];
687}
688
690
693char PFileParser::getChar(size_t index) const{
694 if(index >= p_nbTotalChar) return '\0';
695 return p_fileContent[index];
696}
697
699
701size_t PFileParser::getLine() const{
702 return p_currentLine;
703}
704
706
711 }else{return 0lu;}
712}
713
715
718 return p_nbTotalChar;
719}
720
722
725 return p_currentChar;
726}
727
729
732 //First, let's get the current column
733 size_t indentation(0lu), currentCharIdx(p_currentLineFirstColumn);
734 while(currentCharIdx < p_nbTotalChar && PString(" \t").find(p_fileContent[currentCharIdx])){
735 ++indentation;
736 ++currentCharIdx;
737 }
738 if(currentCharIdx > p_currentChar){
739 p_currentChar = currentCharIdx; //Anyway, it was just white character
740 }
741
742 return indentation;
743}
744
746
751
753
757std::ostream & operator << (std::ostream & out, const PFileParser & other){
758 out << "file '" << other.getFileName() << "' line " << other.getLine() << ":" << other.getColumn();
759 return out;
760}
761
764 p_currentChar = 0lu;
765 p_currentLine = 1lu;
767 p_fileContent = "";
768 p_listWhiteSpace = " \t\n";
769 p_listSeparator = "()[]{}+=.;,:/*%<>#";
770 p_echapChar = '\0';
771 p_dontSkipSpace = false;
772 p_currentCharEchaped = false;
773}
774
780
782
785 for(size_t i(0lu); i < nbChar; ++i){
786 if(p_fileContent[p_currentChar] == '\n'){
788 }
792 }
793 }
794}
795
796
std::ostream & operator<<(std::ostream &out, const PFileParser &other)
Définition de l'opérateur de flux sortant.
std::vector< PParseCmd > PVecParseCmd
std::vector< PParseStep > PVecParseStep
void setSeparator(const PString &separator)
Initialise la liste des caractères séparateurs.
size_t getLine() const
Fonction qui renvoie le numéro de la ligne courante.
PFileParser()
Constructeur de PFileParser.
std::vector< size_t > p_vecPosition
Vector of all the checkpoint positions in the text file (added with pushPosition() and removed with p...
virtual ~PFileParser()
Destructeur de PFileParser.
void skipChars(const PString &chToSkip)
Skip the characters in the given string.
bool isWhiteSpace()
Says if the current char is a white space.
bool isMatchSeq(const PVecString &patern, bool alwaysPopBack=false)
Match a sequence of token in a vector.
bool open(const PPath &fileName)
Fonction qui ouvre le fichier que l'on va parser.
bool isChSpace() const
Dis si le caractère courant est un caractère blanc.
bool p_dontSkipSpace
Say if we don't want to skip the spaces.
PString getUntilKeyWithoutPaternExclude(const PString &patern, const PString &strNotBeforeEndPatern)
Parse a string until the patern is found, only if it has not strNotBeforeEndPatern before it.
PString getCurrentRow() const
Get the current parsed row.
size_t getColumn() const
Fonction qui renvoie le numéro de la colonne du caractère courant.
PString getNextToken()
Get the next token.
PString getUntilKeyWithoutPatern(const PString &patern)
Renvoie la chaine de caractère du caractère courant jusqu'à patern exclu.
void incrementCurrentChar(size_t nbChar=1lu)
Increment the current caracter.
void setEscapeChar(char escapeChar)
Sets the escape character of the PFileParser.
PString getWhiteSpace() const
renvoie la liste des caractères blancs
void setCurrentCharIdx(size_t index)
Set the index of the current char.
PString getSeparator() const
renvoie la liste des caractères séparateurs
PString getUntilKeyWithoutPaternRecurse(const PString &patern, const PString &beginPatern, const PString &allowedCharAfterBegin)
Get the string until end sequence and take account recursive patern (embeded strings)
char getChar(size_t index) const
Get the char at the given index.
void setLine(size_t currentLine)
Set the current line of the PFileParser.
PString getStrComposedOf(const PString &charset)
Get string composed of the characters in the string charset.
PString p_listSeparator
liste des séparateurs
void setWhiteSpace(const PString &whiteSpace)
Initialise la liste des caractères blancs.
bool isMatchRewind(const PString &patern)
Do a isMatch and then go back at the previous position.
std::vector< size_t > p_vecLine
Vector of all the checkpoint rows in the text file (added with pushPosition() and removed with popPos...
bool p_currentCharEchaped
True if the current char is escaped.
bool isMatchToken(const PString &patern)
Says if the patern match with the current caracters of the PFileParser but treats the string as a tok...
bool isMatch(const PString &patern)
Says if the patern match with the current caracters of the PFileParser.
char getCurrentCh() const
Renvoie le caractère courant.
char p_echapChar
Echap caracter.
size_t getLineIndentation()
Get the current line indentation.
void popPosition()
Get to the last saved position of the PFileParser in the current file.
size_t p_nbTotalChar
Nombre de caractères total.
void clearPosition()
Clear the save position of the parser in ther current file.
PPath p_fileName
Nom du fichier que l'on veut parser.
Definition PFileParser.h:99
PString getUntilKey(const PString &patern)
Renvoie la chaine de caractère du caractère courant jusqu'à patern comprise.
void initialisationPFileParser()
Fonction d'initialisation du PFileParser.
void incrementCurrentLine()
Increment the current line.
PString p_fileContent
Contenu du fichier de configuration.
PLocation getLocation() const
Fonction qui renvoie la PLocation du PFileParser.
PPath getFileName() const
Fonction qui renvoie le nom du fichier que l'on a ouvert.
bool isChSeparator() const
Dis si le caractère courant est un séparateur.
void setColumn(size_t currentCol)
Set the current column of the PFileParser.
size_t p_currentLineFirstColumn
Number of the first column caracter of the current line.
void setLocation(const PLocation &location)
Set the current location of the PFileParser.
PString p_listWhiteSpace
liste des espaces blancs
void skipWhiteSpace()
Skip the white space if there is at the current caracter position.
void setFileContent(const PString &fileContent)
Set the file content.
void pushPosition()
Remember the current position of the PFileParser in the current file.
size_t p_currentChar
Numéro du caractère courant.
size_t p_currentLine
Numéro de la ligne courante.
PString getUntilKeyWithoutPaternRecurseExclude(const PString &patern, const PString &beginPatern, const PString &echapExpr)
Get the string until end sequence and take account recursive patern (embeded strings)
char getPrevCh() const
Renvoie le caractère courant.
size_t getCurrentCharIdx() const
Return the index of the current character.
bool isEndOfFile() const
Dit si on est à la fin du fichier.
char getEscapeChar() const
Gets the escape character of the PFileParser.
size_t getNbTotalChar() const
Return the number of characters in the current opened file.
char getNextChar()
Fonction qui renvoie le prochain caractère du fichier courant.
Classe qui permet de décrire une localisation, avec un nom de fichier et une ligne.
Definition PLocation.h:15
size_t getLine() const
renvoie la ligne du PLocation
Definition PLocation.cpp:67
const PPath & getFileName() const
renvoie le fichier du PLocation
Definition PLocation.cpp:60
size_t getColumn() const
renvoie la colonne du PLocation
Definition PLocation.cpp:74
Parsing sequence.
Definition PParseSeq.h:77
const std ::vector< PParseStep > & getVecStep() const
Get the variable p_vecStep.