#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; static long long getnow() { struct timeval t; gettimeofday(&t,NULL); return (long long)t.tv_sec *1000000 + t.tv_usec; } struct SUBACCT{ __u32 ac_uid; __u32 ac_gid; __u32 ac_ppid; __u32 ac_exitcode; __u32 ac_btime; unsigned char ac_flag; char ac_comm[ACCT_COMM+1]; }; // Extra bits in ac_flag telling the type of event #define AC_FLAG_FORK 0x20 #define AC_FLAG_EXEC 0x40 #define AC_FLAG_IS_END(f) ((f&(AC_FLAG_FORK|AC_FLAG_EXEC))==0) // The default event if end, so there is no bit to indicate // that. #define AC_FLAG_IS_FORK(f) ((f&AC_FLAG_FORK)!=0) #define AC_FLAG_IS_EXEC(f) ((f&AC_FLAG_EXEC)!=0) // If AC_FLAG_FORK and EXEC struct ACCT{ //struct acct_v3 ac; struct SUBACCT ac; string pstree; unsigned seqnum; // Sequence number. Entry with bigger sequence number are newer in time. ACCT *found; // ACCT record of the parent pid (ppid) bool referenced; ACCT(struct acct_v3 &_ac, unsigned _seqnum){ ac.ac_uid = _ac.ac_uid; ac.ac_gid = _ac.ac_gid; ac.ac_ppid = _ac.ac_ppid; ac.ac_flag = _ac.ac_flag; ac.ac_exitcode = _ac.ac_exitcode; ac.ac_btime = _ac.ac_btime; strcpy (ac.ac_comm,_ac.ac_comm); seqnum = _seqnum; found = NULL; referenced = false; } ACCT(const ACCT &n){ pstree = n.pstree; seqnum = n.seqnum; found = n.found; ac = n.ac; referenced = n.referenced; } }; struct TREESTAT{ unsigned count; time_t date; TREESTAT(){ count = 0; date = (time_t)0; } }; /* Convert a time_t into a string localtime */ void forrest_asctime (time_t t, char dst[20]) { if (t == (time_t)0){ strcpy (dst,"----/--/--_--:--:--"); }else{ struct tm *tt = localtime (&t); snprintf (dst,20,"%04d/%02d/%02d-%02d:%02d:%02d" ,tt->tm_year+1900,tt->tm_mon+1,tt->tm_mday ,tt->tm_hour,tt->tm_min,tt->tm_sec); } } // We build a pstree string for every pid. // Now, there is an issue. pids may be reused. So the vector may // contain unrelated processes. // The solution to locate the parent was to find a process with a btime (start time) // smaller or equal to the child one. // Now btime is expressed in seconds since 1970. Instead we use the sequence number // assigned to each record as it is read from the file. // So we have to find a parent process with the highest sequence number, yet // smaller than the sequence number of the current process. // This way, if a parent was created by a fork(), then exec() multiple time (so change name) // and then fork() to produce a child, this child will be associated with the last // exec() of the parent, not the "end" record. // Also, each process may have up to 3 entries. One with the flag AC_FLAG_FORK (0x20) is // written when the process is created after a fork(). At this point // it does not have the proper command name. // Then another entry is written when the process execs. The flag is AC_FLAG_EXEC(0x40) static void forrest_build (map > &processes) { // This is a multi-pass solution. // For the first pass, we process all pid (so we fill pid_todo with all pids). // For each pid, we lookup the ppid. If the ppid has a filled pstree, we use it. // If not, we enter the pid in the pid_todo vector. In the next pass we will // process only the pid_todo pids until it is empty. vector pid_todo; for (map >::iterator it=processes.begin(); it != processes.end(); it++){ pid_todo.push_back (it->first); } while (pid_todo.size() > 0){ //fprintf (stderr,"pid_todo size %lu\n",pid_todo.size()); vector todo = pid_todo; pid_todo.clear(); for (vector::iterator u=todo.begin(); u!=todo.end(); u++){ map >::iterator it=processes.find(*u); unsigned pid = it->first; // We build a pstree string for all entries (forking, execing, ending) bool do_next = false; // Some ppid had no pstree // so we will have to redo this one for (unsigned i=0; i < it->second.size(); i++){ ACCT &ac = it->second[i]; unsigned ppid = ac.ac.ac_ppid; //fprintf (stderr,"lookup ppid %u for pid %u\n",ppid,pid); if (ppid == 0){ // No need to search, this is init ac.pstree = string(ac.ac.ac_comm); }else if (ac.pstree.size() == 0){ if (ac.found == NULL){ // The vector is ordered by time (seqnum in fact) since it is push_back // while reading the accounting log file map >::iterator pit = processes.find(ppid); if (pit == processes.end()){ tlmp_error ("No ppid %u found for pid %u\n",ppid,pid); }else{ for (vector::iterator pac=pit->second.begin(); pac != pit->second.end(); pac++){ if (pac->seqnum < ac.seqnum){ ac.found = &(*pac); }else if (pac->seqnum > ac.seqnum){ break; } } } } if (ac.found != NULL){ ac.found->referenced = true; if (ac.found->pstree.size() > 0){ char tmp[100]; // We use upper case in the actype to help with sorting, so fork,exec and end are // kept in that order. const char *actype = "end"; const char *name = ac.ac.ac_comm; const char *newname = ""; if (AC_FLAG_IS_FORK(ac.ac.ac_flag)){ actype = "Fork "; // Usually, a process fork and then exec later. When it execs // it changes name. // To make the pstree more readable, instead of entering // just the original name at fork time, we dig further in // in the log to find out what will be the name of the process // and will use that to make the pstree more readable. // so instead of having something like // .../foo/foo(fork,...) // .../foo/bash(exec,...) // we will have // .../foo/bash(fork foo,...) // .../foo/bash(exec,...) // So we see more clearly that this fork is execing something unsigned ii=i+1; if (ii < it->second.size()){ ACCT &iiac = it->second[ii]; if (AC_FLAG_IS_EXEC(iiac.ac.ac_flag)){ newname = name; name = iiac.ac.ac_comm; } } }else if (AC_FLAG_IS_EXEC(ac.ac.ac_flag)){ actype = "eXec "; // Like fork above, we are trying to find where we come from // What was the name of the process before the exec() if (i > 0){ // We pick the name in the previous entry, but we check this // was not too old ACCT &iiac = it->second[i-1]; if (iiac.ac.ac_btime > ac.ac.ac_btime - 24*60*60){ newname = iiac.ac.ac_comm; }else{ fprintf (stderr,"Process pid=%u,ppid=%u exec %s: previous process is too old\n" ,pid,ac.ac.ac_ppid,ac.ac.ac_comm); } }else{ fprintf (stderr,"Process pid=%u,ppid=%u exec %s: Can't find previous process name\n" ,pid,ac.ac.ac_ppid,ac.ac.ac_comm); } } snprintf (tmp,sizeof(tmp)-1,"/%s:%s%s,%u,%u,%04x" ,name,actype,newname ,ac.ac.ac_uid,ac.ac.ac_gid,ac.ac.ac_exitcode); ac.pstree = ac.found->pstree + tmp; }else{ do_next = true; } }else{ tlmp_error ("No matching ppid found for pid %u %s\n",pid,ac.ac.ac_comm); } } } if (do_next) pid_todo.push_back(pid); } if (todo.size() == pid_todo.size()){ tlmp_error ("Build algorythm does not resolve, pid_todo.size()=%u todo.size()=%u\n",pid_todo.size(),todo.size()); break; } } } static void forrest_printdelay (bool doprint, const char *title, long long start, long long end) { if (doprint){ fprintf (stderr,"%s: %.3lf\n",title,(end-start)/1000000.0); } } /* Produce a list of unique pstree-like processes not found in "excludes" and "known" */ static int forrest_compare ( FILE *fout, map > &processes, const vector &excludes, // This is a list of regex to exclude const set &known, // This is a set of already known pstree, to exclude // This is the reference bool showexcluded, const set &knownevents, FILE *reportevent) { unsigned nb_excludes = excludes.size(); regex_t regs[nb_excludes]; bool regvalid[nb_excludes]; // Even if one regex is invalid, we process anyway // We want our intrusion monitoring to work // anyway, even if it produces false positive // (Because of a broken exclude) for (unsigned i=0; i unknowns; // To avoid repeating unknown pattern. set exs; // Most entry in excludes are not regex // so must match completly for (unsigned i=0; i >::iterator it = processes.begin(); it != processes.end(); it++){ for (unsigned i=0; isecond.size(); i++){ ACCT &acct = it->second[i]; const string &pstree = acct.pstree; if (knownevents.count(acct.seqnum)==0 && known.count(pstree)==0 && exs.count(pstree)==0){ // When reportevent != NULL, we want to know all occurences of // unknown patterns. When reportevent is NULL, we check // that it has already been reported. This way, we avoid // testing the regex uselessly. This optimisation is especially // useful when we build the reference: Since the known set is emtpy // we end up applying the regex over and over for nothing. if (reportevent != NULL || unknowns.count(pstree)==0){ // Ok, this is a pattern not part of the reference (so it is new) // and this is the first time we see it in this run // So we insert it in unknowns to avoid repeating it. int nomatch = true; for (unsigned i=0; i static void forrest_readone(const char *compfile, set &knowns) { glocal set *knowns = &knowns; (compfile,true); glocal.knowns->insert(line); return 0; } /* Read the list of compare files and put each lines in knowns If a compare file is a directory, opens it and reads all the files. */ static void forrest_readcompare (const vector &compares, set &knowns) { glocal set *knowns = &knowns; for (unsigned i=0; i(compfile); forrest_readone(path,*glocal.knowns); }else{ forrest_readone(compfile,knowns); } } } int main (int argc, char *argv[]) { glocal int ret = -1; glocal const char *facct = NULL; glocal const char *build = NULL; glocal const char *count = NULL; // Will produce the list of all pstrees with a count glocal vector compare; glocal bool dump = false; glocal vector excludes; glocal bool showexcluded=false; glocal vector excludefiles; glocal const char *eventfile = NULL; glocal bool stats = false; glocal const char *notref = NULL; glocal bool norecompare = false; glocal.ret = (argc,argv); setproginfo ("forrest",VERSION,"Manage enhanced process accounting files"); setarg (' ',"facct","Process accounting file",glocal.facct,true); setarg (' ',"build","Write the pstree patterns in that file",glocal.build,false); setarg (' ',"count","Write the pstree patterns with count in that file",glocal.count,false); setgrouparg ("Compare options"); setarg (' ',"compare","Compare the pstree patterns with file(s)",glocal.compare,false); setarg (' ',"eventfile","File holding already reported events",glocal.eventfile,false); setarg (' ',"exclude","Exclude a process pattern (with --compare)",glocal.excludes,false); setarg (' ',"excludefile","File holding exclude patterns",glocal.excludefiles,false); setarg (' ',"optimunref","File holding the account records already tested\nand not needed anymore",glocal.notref,false); setarg (' ',"norecompare","Abort the compare operation if the process has not grown\n(requires --optimunref)",glocal.norecompare,false); setgrouparg ("Misc."); setarg (' ',"dump","Print all entries",glocal.dump,false); setarg (' ',"showexcluded","Prints excluded patterns",glocal.showexcluded,false); setarg (' ',"printstats","Print some stats and execution time",glocal.stats,false); glocal map > processes; glocal vector ignoreseq; // Ignore some records in the account file // because they have been tested once // There is one bool per record. If true, the record // is not loaded // fprintf (stderr,"sizeof %lu\n",sizeof (ACCT)); int ret = -1; { ret = 0; long long start = getnow(); if (glocal.notref != NULL){ (glocal.notref,sizeof(bool)); glocal.ignoreseq.reserve (nbrec); bool *seqs = (bool*)buf; for (unsigned i=0; i // normal } long long after_notref = getnow(); forrest_printdelay (glocal.stats,"Read ignoreseq",start,after_notref); // Reads the process accounting file. This is the running set (glocal.facct,sizeof(struct acct_v3)); tlmp_error ("Can't open accounting file %s (%s), aborting\n",glocal.facct,strerror(errno)); exit (-1); nbrec+=2; if (glocal.compare.size()>0 && glocal.norecompare && nbrec == glocal.ignoreseq.size()){ exit (0); } glocal.ignoreseq.reserve(nbrec); for (unsigned i=glocal.ignoreseq.size(); i < nbrec; i++){ glocal.ignoreseq.push_back(false); } struct acct_v3 ac; // We are adding a fake init process because process accounting // starts after init, so it lacks a record for it. // records for init only show when the vserver ends. memset (&ac,0,sizeof(ac)); ac.ac_pid = 1; ac.ac_ppid = 0; strcpy (ac.ac_comm,"init"); glocal.processes[ac.ac_pid].push_back(ACCT(ac,0)); ac.ac_pid = 2; ac.ac_ppid = 1; ac.ac_flag = AC_FLAG_FORK; // This is a fork strcpy (ac.ac_comm,"init"); glocal.processes[ac.ac_pid].push_back(ACCT(ac,1)); struct acct_v3 *acs = (struct acct_v3*)buf; int seq = recno + 2; for (unsigned i=0; iac_pid].push_back(ACCT(*acs,seq)); } return 0; { map >::iterator it = glocal.processes.find(2); if (it != glocal.processes.end()){ if (it->second.size() > 1){ it->second[0].ac.ac_btime = it->second[1].ac.ac_btime - 1; }else{ // Can't patch process 2 btime fprintf (stderr,"The file %s has no entry for pid 2, can't patch the init process btime, weird\n" ,glocal.facct); } }else{ fprintf (stderr,"No entry for pid 2, impossible, exiting\n"); exit (-1); } if (glocal.stats) fprintf (stderr,"Process records: %lu\n",glocal.ignoreseq.size()); } long long after_read = getnow(); forrest_printdelay (glocal.stats,"Read BSD process file",after_notref,after_read); forrest_build (glocal.processes); long long after_build = getnow(); // Reads the exclude file. They contain regex pattern // They are used when comparing the running set with the reference. // anything matching the exclude file is not reported (when not part of the reference) for (unsigned i=0; i< glocal.excludefiles.size(); i++){ (glocal.excludefiles[i].c_str(),true); if (line[0] != '\0' && line[0] != '#'){ glocal.excludes.push_back(line); } return 0; } forrest_printdelay (glocal.stats,"Build",after_read,after_build); if (glocal.count != NULL){ // Produce a report on the running set showing how many time // an execution pattern has been encountered. (glocal.count,false); map pstrees; for (map >::const_iterator it = glocal.processes.begin(); it != glocal.processes.end(); it++){ for (unsigned i=0; isecond.size(); i++){ const string &pstree = it->second[i].pstree; TREESTAT &s = pstrees[pstree]; s.count++; s.date = it->second[i].ac.ac_btime; } } for (map::iterator it=pstrees.begin(); it != pstrees.end(); it++){ char tmp[21]; struct tm *t = localtime(&it->second.date); snprintf (tmp,sizeof(tmp)-1,"%04d/%02d/%02d_%02d:%02d:%02d" ,t->tm_year+1900,t->tm_mon+1,t->tm_mday ,t->tm_hour,t->tm_min,t->tm_sec); fprintf (fout,"%u %s %s\n",it->second.count,tmp,it->first.c_str()); } return 0; } if (glocal.dump){ for (map >::iterator it=glocal.processes.begin(); it != glocal.processes.end(); it++){ printf ("%5u: ",it->first); const char *pad = ""; for (unsigned i=0; isecond.size(); i++){ struct ACCT &acct = it->second[i]; struct SUBACCT &ac = acct.ac; char start[20]; forrest_asctime(ac.ac_btime,start); printf ("%sCommande %-20s flag=%02x uid=%-5u gid=%-5u ppid=%-5u exit=%04x start=%s ps=%s seq=%u\n" ,pad ,ac.ac_comm,ac.ac_flag ,ac.ac_uid,ac.ac_gid,ac.ac_ppid ,ac.ac_exitcode,start,it->second[i].pstree.c_str() ,acct.seqnum); pad = " "; } } } if (glocal.build != NULL){ // Save the running set. It becomes the reference for further compare. (glocal.build,false); set knowns; forrest_readcompare (glocal.compare,knowns); set events; // Empty set forrest_compare (fout,glocal.processes,glocal.excludes,knowns,false,events,NULL); return 0; }else if (glocal.compare.size() > 0){ // We read the reference(s) and compare it with the running set long long start_compare = getnow(); glocal set knownevents; glocal set known; forrest_readcompare (glocal.compare,glocal.known); FILE *reportevent = NULL; if (glocal.eventfile != NULL){ // Check if the event file is older than the oldest process // This happens if the vserver has been restarted // The process 1 does not have an entry in the account file (a fake one is inserted) // so it btime is not reliable. We use process 2. struct stat st; if (stat(glocal.eventfile,&st)!=-1 && st.st_mtime < glocal.processes[2][0].ac.ac_btime){ tlmp_warning ("Event file %s is too old, unlink it\n",glocal.eventfile); unlink (glocal.eventfile); } (glocal.eventfile,true); unsigned seqnum = atoi(line); if (seqnum > 0) glocal.knownevents.insert (seqnum); return 0; // Normal reportevent = fopen (glocal.eventfile,"a"); if (reportevent == NULL){ tlmp_error ("Can't open eventfile %s (%s)\n",glocal.eventfile,strerror(errno)); } } long long end_readref = getnow(); forrest_printdelay (glocal.stats,"Read reference",start_compare,end_readref); ret = forrest_compare (stdout,glocal.processes,glocal.excludes,glocal.known,glocal.showexcluded,glocal.knownevents,reportevent); long long end_compare = getnow(); forrest_printdelay (glocal.stats,"Compare",end_readref,end_compare); if (glocal.notref != NULL){ (glocal.notref,false); // We are looking for all completed process. // If they are completed, they won't be referenced in a later run // by sub-processes, so they are not needed anymore to perform // the compare operation. for (map >::const_iterator it = glocal.processes.begin(); it != glocal.processes.end(); it++){ if (it->first > 2){ for (unsigned i=0; isecond.size(); i++){ const ACCT &acct = it->second[i]; if (AC_FLAG_IS_END(acct.ac.ac_flag)){ glocal.ignoreseq[acct.seqnum] = true; #if 0 // This does not work... yet // We eliminate all the EXECs and the FORK event just before int j=i-1; while (j >= 0){ const ACCT &acctprev = it->second[j]; if (AC_FLAG_IS_EXEC(acctprev.ac.ac_flag)){ if (!acctprev.referenced) glocal.ignoreseq[acctprev.seqnum] = true; // The previous record is a exec, we check // if there is more or if there is a fork j--; }else if (AC_FLAG_IS_FORK(acctprev.ac.ac_flag)){ if (!acctprev.referenced) glocal.ignoreseq[acctprev.seqnum] = true; // The previous record is a fork, so we stop there break; } } #endif } } } } for (unsigned i=0; i } } } return ret; return glocal.ret; }