/* This file is part of Bolixo. Bolixo is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Bolixo is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Bolixo. If not, see . */ /* This program deletes stuff in the database while maintaining some integrity This program is used by deleteoldmsgs on a regular basis. deleteoldmsgs remove the directory entries in table dirs_content and left the data in the table "files" in place. The program deleteitems finds un-referenced stuff dans deletes it. This program may be used after deleting an account. You just remove an account entry in the table id2name and all tables will be cleaned. It could be argued that if the table "files" would be better with a reference count. There are cases where this program will fail to remove un-referenced files entry. For now these cases do not occur because we can't pack a directory yet. Packing a directory is the operation which cleans all old versions of a file. */ #include #include #include #include #include #include #include #include #include #include "bolixo.m" #define DEFINE_TBFTYPE #include "bolixo.h" using namespace std; struct BOOLSET{ bool maxid_fixed = false; unsigned maxid = 0; vector ids; BOOLSET(){ } bool notin (unsigned id) const{ return id < maxid && (id >= ids.size() || !ids[id]); } void insert (const char *id){ const unsigned itemid = atoi(id); if (!maxid_fixed && itemid > maxid) maxid = itemid; if (itemid <= maxid){ if (ids.size() < itemid){ ids.resize(itemid+1000000); } ids[itemid] = true; } } void clear(){ if (!maxid_fixed) maxid = 0; ids.clear(); } void setmaxid(const char *tablename); }; /* Query the database to get the maximum auto-increment value for that table The notin() function will return false for any larger than maxid. These ids were added after we collected all ids from a table. */ void BOOLSET::setmaxid(const char *table_name) { glocal unsigned maxid = 0; ("SELECT AUTO_INCREMENT FROM information_schema.tables WHERE table_name = '%s' AND table_schema = DATABASE( )",table_name); if (row[0] != nullptr) glocal.maxid = atoi(row[0]); if (glocal.maxid > 0){ maxid = glocal.maxid; maxid_fixed = true; } } /* Collect all ids in table dirs_content and files */ static void deleteitems_getids (BOOLSET &itemids, bool from_dirs_content, bool from_files) { glocal BOOLSET *itemids = &itemids; itemids.clear(); if (from_dirs_content){ // Retrieve all used ids ("select itemid from dirs_content"); glocal.itemids->insert(row[0]); } if (from_files){ ("select id from files"); glocal.itemids->insert(row[0]); } debug_printf ("getids fixed=%d maxid=%u\n",itemids.maxid_fixed,itemids.maxid); } static bool verbose = false; static bool doit = false; // Delete unused entries in table ids static void deleteitems_table_ids(const BOOLSET &itemids) { glocal const BOOLSET *itemids = &itemids; ("select id from ids"); unsigned id = atoi(row[0]); if (glocal.itemids->notin(id)){ if (verbose){ printf ("Itemid %5u must go\n",id); } if (doit){ if (sql_action("delete from ids where id=%u",id)==-1){ tlmp_error ("Delete fail in table ids, aborting: %s\n",query_getdefaultdb()->error()); exit (-1); } } } } static void delete_fileids (vector &fileids) { if (fileids.size() > 0){ NSQL_REQ req; req.appendf ("delete from files where id in "); req.appendlist(fileids); fileids.clear(); // tlmp_error ("delete_fileids %s\n",req.c_str()); if (sql_action(req)==-1){ tlmp_error ("Delete fail in table files, aborting: %s\n",query_getdefaultdb()->error()); exit (-1); } } } struct MARKID{ unsigned userid; unsigned itemid; MARKID(unsigned _userid, unsigned _itemid) :userid(_userid),itemid(_itemid){} }; static void delete_marks (vector &markids) { if (markids.size() > 0){ NSQL_REQ req; req.appendf ("delete from marks where ( "); auto it=markids.begin(); req.appendf ("(userid=%u and itemid=%d)",it->userid,it->itemid); for (it++; it != markids.end(); it++){ req.appendf (" or (userid=%u and itemid=%d)",it->userid,it->itemid); } req.append (')'); markids.clear(); //tlmp_error ("delete_markids %s\n",req.c_str()); if (sql_action(req)==-1){ tlmp_error ("Delete fail in table marks, aborting: %s\n",query_getdefaultdb()->error()); exit (-1); } } } int main (int argc, char *argv[]) { glocal int ret = -1; glocal bool integrity = false; glocal const char *data_socket = "/var/lib/mysql/mysql.sock"; glocal const char *data_dbserv = "localhost"; glocal const char *data_dbname = "files"; glocal const char *data_dbuser = nullptr; static const char *tb[]={"bolixo","tlmpsql",nullptr}; glocal.ret = (argc,argv,tb); setproginfo ("deleteitems",VERSION ,MSG_U(I_DELETEITEMS ,"Delete items in the Bolixo database.")); setarg ('I',"integrity",MSG_U(O_INTEGRITY,"Perform garbage collection"),glocal.integrity,false); setarg (' ',"doit",MSG_U(O_DOIT,"Perform the deletion, not just a test"),doit,false); setgrouparg (MSG_R(I_DATABASE)); setarg (' ',"data_dbserv","Database server",glocal.data_dbserv,false); setarg (' ',"data_dbname","Database name",glocal.data_dbname,false); setarg (' ',"data_dbuser","Database user",glocal.data_dbuser,true); setarg (' ',"data_socket","Database unix socket",glocal.data_socket,false); setgrouparg(MSG_R(I_MISC)); setarg ('v',"verbose",MSG_U(O_VERBOPER,"Verbose operation"),verbose,false); int ret = -1; if (!doit) verbose = true; // Probably a test const char *passwd = getenv("DELETEITEMS_PWD"); if (passwd == nullptr){ tlmp_error ("Can't get database password from environment, aborting\n"); exit (-1); } query_setdefaultdb (glocal.data_dbserv,glocal.data_dbname,glocal.data_dbuser,passwd); query_getdefaultdb()->setunixpath(glocal.data_socket); if (glocal.integrity){ glocal BOOLSET userids; glocal BOOLSET itemids; glocal.userids.setmaxid("id2name"); glocal.itemids.setmaxid("ids"); // Find all userids ("select userid from id2name"); // Avoid negative userids if (row[0][0] == '-') return; glocal.userids.insert(row[0]); debug_printf ("userids fixed=%d maxid=%u\n",glocal.userids.maxid_fixed,glocal.userids.maxid); // Delete directory entries pointing to deleted/invalid userid ("select dirid,itemid,ids.ownerid,dirs_content.name,dirs_content.modified,files.filetype" " from dirs_content join ids on dirs_content.itemid=ids.id" " left join files on files.id=dirs_content.itemid and files.modified=dirs_content.modified"); unsigned dirid = atoi(row[0]); unsigned itemid = atoi(row[1]); unsigned ownerid = atoi(row[2]); const char *name = row[3]; const char *modified = row[4]; const char *filetype = row[5] == nullptr ? "NULL" : tbftype[atoi(row[5])]; if (glocal.userids.notin(ownerid)){ if (verbose){ printf ("directory %5u item %5u ownerid %5u name %-30s modified %s filetype=%s must go\n" ,dirid,itemid,ownerid,name,modified,filetype); } if (doit){ if (sql_action("delete from dirs_content where dirid=%u and itemid=%u and modified='%s'" ,dirid,itemid,modified)==-1){ tlmp_error ("Delete fail in table dirs_content, aborting: %s\n",query_getdefaultdb()->error()); exit (-1); } } } // Delete files entries pointing to deleted/invalid userid ("select files.id,ids.ownerid,modified,filetype,content is null" " from files join ids on files.id=ids.id"); unsigned ownerid = atoi(row[1]); if (glocal.userids.notin(ownerid)){ unsigned fileid = atoi(row[0]); const char *modified = row[2]; const char *filetype = tbftype[atoi(row[3])]; bool content_is_null = atoi(row[4]); if (verbose){ printf ("file %5u ownerid %5u modified %s filetype=%s content_is_null %d must go\n" ,fileid,ownerid,modified,filetype,content_is_null); if (content_is_null){ printf ("Content file %u-%s must go\n",fileid,modified); } } if (doit){ if (sql_action("delete from files where id=%u and modified='%s'" ,fileid,modified)==-1){ tlmp_error ("Delete fail in table files, aborting: %s\n",query_getdefaultdb()->error()); exit (-1); } if (content_is_null){ // We must delete the file content in /var/lib/bolixo string path = string_f("/var/lib/bolixo/%u-%s",fileid,modified); for (auto &c:path) if (c == ' ') c='-'; if (unlink(path.c_str())==-1){ tlmp_error ("Can't delete content file %s (%s)\n",path.c_str(),strerror(errno)); exit (-1); } } } } deleteitems_getids(glocal.itemids,true,true); deleteitems_table_ids(glocal.itemids); // Delete dirs_content entries where dirid has no entry in ids // This case happen when we delete an itemid of dirs_content pointing to a deleted userid // dirid=parent_dir,itemid=dir1 // We end up with many lines like this // dirid=dir1, ... where dir1 has no entry in ids ("select dirid from dirs_content where dirid != 0"); unsigned id = atoi(row[0]); if (glocal.itemids.notin(id)){ if (verbose){ printf ("Dirid %5u must go\n",id); } if (doit){ if (sql_action("delete from dirs_content where dirid=%u",id)==-1){ tlmp_error ("Delete fail in table dirs_content, aborting: %s\n",query_getdefaultdb()->error()); exit (-1); } } } // Delete orphan files entries (no directory entry reference them) /* *** There is a flaw here. If a file is has multiple versions and only one version and not all versions are referenced, the following code won't do anything. It will keep all versions. This happens becauses we only check if the itemid is referenced, not the combination itemid,modified. */ // First, Retrieve (again) all used referenced itemids deleteitems_getids(glocal.itemids,true,false); glocal bool deleted_orphan = false; glocal vector delete_fileids; // Optimise the deletes ("select id,modified,content is null from files"); unsigned fileid = atoi(row[0]); if (glocal.itemids.notin(fileid)){ if (verbose){ printf ("Orphan file %5u must go\n",fileid); } if (doit){ glocal.deleted_orphan = true; glocal.delete_fileids.push_back(fileid); if (glocal.delete_fileids.size() > 1000) delete_fileids(glocal.delete_fileids); bool content_is_null = atoi(row[2]); if (content_is_null){ const char *modified = row[1]; // We must delete the file content in /var/lib/bolixo string path = string_f("/var/lib/bolixo/%u-%s",fileid,modified); for (auto &c:path) if (c == ' ') c='-'; if (unlink(path.c_str())==-1){ tlmp_error ("Can't delete content file %s (%s)\n",path.c_str(),strerror(errno)); exit (-1); } } } } delete_fileids(glocal.delete_fileids); if (glocal.deleted_orphan){ // Another cleanup pass deleteitems_getids(glocal.itemids,true,true); deleteitems_table_ids(glocal.itemids); } // Delete all files in /var/lib/bolixo connected to unused itemid ("/var/lib/bolixo"); unsigned id = atoi(basename); if (glocal.itemids.notin(id)){ if (verbose){ printf ("Content file %s must go\n",basename); } if (doit){ if (unlink(path)==-1){ tlmp_error ("Can't delete content file %s (%s)\n",path,strerror(errno)); exit (-1); } } } // Other table just related to userid // While cleaning those tables, we collect other ids which will make orphans in other tables. set group_ids; set group_list_ids; struct TABLESPEC { const char *table; const char *field; set *collect;} tables[]={ {"contact_requests","userid",nullptr}, {"contact_requests","reqid",nullptr}, {"interests","userid",nullptr}, {"interests_remote","userid",nullptr}, {"notifications","userid",nullptr}, {"userinfo","userid",nullptr}, {"group_members","userid",nullptr}, {"groups","ownerid",&group_ids}, {"group_lists","ownerid",&group_list_ids} }; for (auto const &table:tables){ glocal const char *table = table.table; glocal const char *field = table.field; glocal set *collect = table.collect; glocal set done; // Avoid repetition // Another solution would be to put unique in the SQL statement // but it would put too much work on the server. ("select %s%s from %s",table.field ,table.collect == nullptr ? "" : ",id" ,table.table); unsigned id = atoi(row[0]); if (glocal.userids.notin(id)){ if (glocal.collect != nullptr) glocal.collect->insert(atoi(row[1])); if (glocal.done.insert(id).second){ if (verbose){ printf ("Table %s: %s %5u must go\n",glocal.table,glocal.field,id); } if (doit){ if (sql_action("delete from %s where %s=%u",glocal.table,glocal.field,id)==-1){ tlmp_error ("Delete fail in table %s, aborting: %s\n" ,glocal.table,query_getdefaultdb()->error()); exit (-1); } } } } } // Some groups and group_lists have vanished // We must delete all records related to them in group_members and group_list_members for (auto const &table:tables){ if (table.collect != nullptr){ for (auto &u:*table.collect){ if (verbose){ printf ("delete %s %u\n",table.table,u); } if (doit){ if (sql_action("delete from %s where id=%u",table.table,u)==-1){ tlmp_error ("Delete fail in table %s, aborting: %s\n" ,table.table,query_getdefaultdb()->error()); exit (-1); } } } } } // Table marks may bve associated with missing userids and missing itemids glocal vector markids; ("select userid,itemid from marks"); unsigned userid = atoi(row[0]); unsigned itemid = atoi(row[1]); if (glocal.userids.notin(userid) || glocal.itemids.notin(itemid)){ if (verbose){ printf ("Table makrs: userid %5u itemid %5u must go\n",userid,itemid); } if (doit){ glocal.markids.emplace_back(userid,itemid); if (glocal.markids.size() > 100) delete_marks(glocal.markids); } } delete_marks (glocal.markids); } return ret; int ret = -1; return ret; return glocal.ret; }