/* A snapshot file (nbd volume) has a name which defines its role. map-id-seq.data id is a name associated with a project or a branch in the project. map is a sequence of uppercase letter allowing one to understand the relationship (inheritance) between the files. seq is a number allowing one to connect the remote volume with the proper file. Examples: A-client1_prod-0.data: This is the first volume file created for a project AA--client1_prod-1.data: This is the first evolution (snapshot) AAA-client1_prod-3.data: This is the second evolution AB-client1_bug1-2.data: This is an independant evolution of the first volume */ #include #include #include #include #include #include #include #include #include #include #include #include #include "drsnap.h" #include "drraid.h" static SNAPFILES files; static DEBUG_KEY DBGSFILE ("snapfile","snapshot and volume management"); #define CHUNK_SIZE (1024*1024) inline bool testbit(unsigned *bitmap, unsigned pos) { unsigned word = pos>>5; unsigned bit = 1<<(pos&31); return bitmap[word] & bit; } inline void setbit(unsigned *bitmap, unsigned pos) { unsigned word = pos>>5; unsigned bit = 1<<(pos&31); bitmap[word] |= bit; } /* Replace the extension of a file with .bitmap. Return -1 if there was no extension. */ static int snapfile_bitmap(const char *path, SSTRING &bpath) { int ret = -1; const char *pt = strchr(path,'.'); if (pt == NULL){ tlmp_error ("Improper snapshot file, no extension: %s",path); }else{ bpath.setfrom (path,pt-path); bpath.append (".bitmap"); ret = 0; } return ret; } PUBLIC SNAPFILE::SNAPFILE( const char *_dir, const char *_map, const char *_branch, int _rev) { bitmapsize = 64*1024; dir = _dir; map = _map; branch = _branch; rev = _rev; in_use = false; parent = NULL; path.setfromf ("%s/%s-%s-%d.data",dir.c_str(),map.c_str(),branch.c_str() ,rev); const char *_path = path.c_str(); root = false; bitmap = NULL; fd = open (_path,O_RDWR,0); if (fd == -1){ tlmp_error ("Can't open file %s (%s)",_path,strerror(errno)); }else{ if (map == "A"){ root = true; }else{ SSTRING tmp; if(snapfile_bitmap(_path,tmp)!=-1){ int fdb = open (tmp.c_str(),O_RDWR,0); if (fdb != -1){ struct stat st; if (fstat(fdb,&st)==-1){ tlmp_error ("Can't fstat the bimap file %s (%s)" ,tmp.c_str(),strerror(errno)); }else{ bitmapsize=st.st_size; bitmap = (unsigned*)mmap(0,bitmapsize,PROT_READ|PROT_WRITE ,MAP_SHARED,fdb,0); } close (fdb); }else{ tlmp_error ("Can't open bitmap file %s for %s (%s)" ,tmp.c_str(),_path,strerror(errno)); } } } } } PUBLIC SNAPFILE::~SNAPFILE() { close (fd); if (bitmap != NULL){ munmap(bitmap,bitmapsize); } } PUBLIC bool SNAPFILE::is_inuse() const { return in_use; } PUBLIC void SNAPFILE::set_inuse() { in_use = true; } PUBLIC void SNAPFILE::unset_inuse() { in_use = false; } PUBLIC bool SNAPFILE::is_ok() { return fd != -1 && (root || bitmap != NULL); } PUBLIC void SNAPFILE::setsize (unsigned long long _size) { size = _size; } /* Set size from file. Only the root volume has a real size. The snapshot are sparse file growing as needed up the root volume size */ PUBLIC void SNAPFILE::setsize_fromfile() { size = 0; struct stat64 st; if (fstat64(fd,&st)==-1){ tlmp_error ("Can't fstat file %s (%s)",path.c_str() ,strerror(errno)); }else{ size = st.st_size; bitmapsize = (size/CHUNK_SIZE)/8+1; } } PUBLIC unsigned long long SNAPFILE::getsize() const { return size; } PUBLIC unsigned SNAPFILE::getbitmapsize() const { return bitmapsize; } PUBLIC bool SNAPFILE::is_root() const { return root; } PUBLIC void SNAPFILE::getmap(SSTRING &_map) const { _map = map; } PUBLIC void SNAPFILE::getbranchname(SSTRING &_branch) const { _branch = branch; } PUBLIC int SNAPFILE::getrevision() const { return rev; } PUBLIC const char *SNAPFILE::getpath() const { return path.c_str(); } PUBLIC void SNAPFILE::setparent (SNAPFILE *_parent) { parent = _parent; } int snapfile_getnewrev(const char *dir) { glocal int ret = 1; // If file revision does not exist, start with // one. The first data file has normally // revision 0. SSTRING tmp; tmp.setfromf ("%s/revision",dir); (tmp.c_str(),true); glocal.ret = atoi(line); return -1; // No problem, we assume it contains the number 1 (tmp.c_str(),false); fprintf (fout,"%d\n",glocal.ret+1); return 0; return glocal.ret; } /* Create a snapshot of the current data file. */ PUBLIC SNAPFILE *SNAPFILE::snap(const char suffix, const char *newbranch) { SNAPFILE *ret = NULL; SSTRING tmp; SSTRING newmap; newmap.setfromf ("%s%c",map.c_str(),suffix); int newrev = snapfile_getnewrev(dir.c_str()); tmp.setfromf("%s/%s-%s-%d.data",dir.c_str(),newmap.c_str(),newbranch ,newrev); int nfd = open (tmp.c_str(),O_RDWR|O_CREAT,0600); if (nfd == -1){ tlmp_error ("Can't create new snapshot %s (%s)" ,tmp.c_str(),strerror(errno)); }else{ close (nfd); SSTRING btmp; if (snapfile_bitmap(tmp.c_str(),btmp)!=-1){ int fdb = open (btmp.c_str(),O_RDWR|O_CREAT,0600); if (fdb == -1){ tlmp_error ("Can't create bitmap file %s (%s)" ,btmp.c_str(),strerror(errno)); }else{ static char zero[] = {'\0'}; for (unsigned i=0; isetparent(this); ret->setsize (size); files.add (ret); } } } return ret; } PUBLIC SNAPFILE *SNAPFILE::snap() { return snap ('A',branch.c_str()); } PUBLIC int SNAPFILE::checkrange (unsigned long long from, int len) { int ret = 0; unsigned long long end = from + len; debug_printf (DBGSFILE,"Checkrange %Lu + %lu = %Lu > %Lu\n" ,from,len,end,size); if (end > size){ tlmp_error ("Trying to seek pass end of file %s: %Lu + %lu > %Lu" ,path.c_str(),from,len,size); ret = -1; } return ret; } PRIVATE int SNAPFILE::seek (unsigned long long from) { int ret = 0; if (lseek64 (fd,from,SEEK_SET)==-1){ tlmp_error ("Can't seek file %s to position %Lu (%s)" ,path.c_str(),from,strerror(errno)); ret = -1; } return ret; } /* Read from the volume. We are sure the data is there. */ PRIVATE int SNAPFILE::raw_read (unsigned long long from, int len, int tofd) { int ret = 0; if (seek (from)!=-1){ char buf[len]; if (::read(fd,buf,len)==len){ ::write (tofd,buf,len); ret = 0; }else{ tlmp_error("Can't read %s (%s)",path.c_str() ,strerror(errno)); } } return ret; } /* Read the RAID1 superblock. Return -1 if any error. */ PUBLIC int SNAPFILE::readsb (SUPERBLOCK &sb) { int ret = -1; unsigned long long offsb = size - 8*1024; if (seek (offsb)!=-1){ if (::read(fd,&sb,sizeof(sb))==sizeof(sb)){ ret = 0; } } return ret; } /* Here is all the logic of the snapshot bitmap. This function tries to find which part of the request may be satisfied from the current snapshot. If the request can't be serviced from this file, it is then process from the parent. But one must be cautious: A process may be services partly by the parent and the child. Technically, if a request is very large (much larger than a chunk), it can be broken in several parts, some services by the child and some by the parents. The current function DO NOT address this case. A request is assumed to be smaller than the chunk size (1megs while writing this comment). */ PRIVATE int SNAPFILE::read_part( unsigned long long from, int len, int tofd) { int ret = -1; unsigned long long end = from + len - 1; unsigned chunk0 = (unsigned)(from/CHUNK_SIZE); unsigned chunk1 = (unsigned)(end/CHUNK_SIZE); debug_printf ("read_part from %Lu len %d: chunk0 %d chunk1 %d\n" ,from,len,chunk0,chunk1); if (chunk0 == chunk1){ if (testbit(bitmap,chunk0)){ ret = raw_read (from,len,tofd); }else{ ret = parent->read (from,len,tofd); } }else{ // We solve this by splitting the request in two reads unsigned long long endchunk0 = (unsigned long long)chunk1 * CHUNK_SIZE; int len0 = (int)(endchunk0 - from); if (read(from,len0,tofd)!=-1){ len -= len0; ret = read (endchunk0,len,tofd); } } return ret; } PUBLIC int SNAPFILE::read (unsigned long long from, int len, int tofd) { int ret = -1; if (root){ ret = raw_read (from,len,tofd); }else{ ret = read_part (from,len,tofd); } return ret; } PRIVATE int SNAPFILE::copy_from_parent( unsigned long long from, int len) { int ret = -1; if (seek(from)!=-1){ ret = parent->read (from,len,fd); } return ret; } /* Make sure the current snapshot has all the necessary chunks to perform a write */ PRIVATE int SNAPFILE::copy_on_write( unsigned long long from, int len) { int ret = 0; unsigned long long end = from + len - 1; unsigned chunk0 = (unsigned)(from/CHUNK_SIZE); unsigned chunk1 = (unsigned)(end/CHUNK_SIZE); debug_printf (DBGSFILE,"copy_on_write from %Lu len %d: chunk0 %d chunk1 %d\n" ,from,len,chunk0,chunk1); if (!testbit(bitmap,chunk0)){ ret = copy_from_parent ((unsigned long long)chunk0*CHUNK_SIZE,CHUNK_SIZE); if (ret != -1){ setbit (bitmap,chunk0); } } if (chunk0 != chunk1 && !testbit(bitmap,chunk1)){ ret = copy_from_parent ((unsigned long long)chunk1*CHUNK_SIZE,CHUNK_SIZE); if (ret != -1){ setbit (bitmap,chunk1); } } return ret; } PUBLIC int SNAPFILE::write (unsigned long long from, const char *buf, int len) { int ret = 0; if (!root){ ret = copy_on_write (from,len); if (ret == -1) return -1; } if (seek(from)==-1){ ret = -1; }else{ if (::write(fd,buf,len)==len){ ret = 0; }else{ tlmp_error("Can't write %s (%s)",path.c_str() ,strerror(errno)); } } return ret; } /* Load an initialise all snapshots. */ int snapfile_load (const char *dir) { glocal int ret = 0; (dir); const char *pt = strstr(basename,".data"); if (pt!=NULL && pt[5] == '\0'){ pt = strchr(basename,'-'); bool err = false; if (pt == NULL){ err = true; }else{ SSTRING map; map.setfrom(basename,pt-basename); const char *bpt = pt+1; pt = strchr(bpt,'-'); if (pt == NULL){ err = true; }else{ SSTRING branch; branch.setfrom (bpt,pt-bpt); pt++; if (!isdigit(pt[0])){ err = true; }else{ int rev = atoi(pt); pt = str_skipdig(pt); if (strcmp(pt,".data")!=0){ err = true; }else{ SSTRING dir; pt = strrchr(path,'/'); if (pt != NULL){ dir.setfrom(path,pt-path); }else{ dir = "."; } SNAPFILE *sn = new SNAPFILE (dir.c_str() ,map.c_str(),branch.c_str(),rev); files.add (sn); } } } } if (err){ static const char *formaterr="Invalid format for data file name: %s"; tlmp_error (formaterr,path); glocal.ret = -1; } } return false; if (glocal.ret != -1){ unsigned long long size = 0; for (int i=0; iis_root()){ SSTRING map,parent; f->getmap(map); parent.setfrom(map.c_str(),map.getlen()-1); bool found = false; for (int j=0; jgetmap(ffmap); if (ffmap == parent){ f->setparent(ff); found=true; break; } } if (!found){ tlmp_error("Snapshot %s has no parent",f->getpath()); glocal.ret = -1; } }else{ f->setsize_fromfile(); size = f->getsize(); } } for (int i=0; isetsize(size); if (!f->is_ok()) glocal.ret = -1; } } return glocal.ret; } /* Find the end of a branch. For now, we assume there is a single branch */ SNAPFILE *snapfile_find (const char *branch) { int maxlen = 0; SNAPFILE *ret = NULL; for (int i=0; igetbranchname(name); if (name == branch){ SSTRING map; f->getmap(map); int len = map.getlen(); if (len > maxlen){ debug_printf (DBGSFILE,"Longest map found %s\n",map.c_str()); ret = f; maxlen = len; } } } if (ret != NULL){ debug_printf (DBGSFILE,"Find volume %s\n",ret->getpath()); } return ret; } SNAPFILE *snapfile_find_from_rev(int rev) { SNAPFILE *ret = NULL; for (int i=0; igetrevision()){ ret = f; break; } } return ret; } static SNAPFILE *snapfile_find_from_map(const char *map) { SNAPFILE *ret = NULL; for (int i=0; igetmap(m); if (m == map){ ret = f; break; } } return ret; } /* Walk the snapfile from sn to common, oring the bits in the bitmap of each snapfile in res. */ PUBLIC void SNAPFILE::bitmap_or ( SNAPFILE *common, unsigned *res) { if (this != common){ unsigned sz = bitmapsize/4; if (bitmapsize &0x3) sz++; for (unsigned i=0; ibitmap_or(common,res); } } /* Produce the list of chunks needed to make revto equal to revfrom. The local volume is in sync with revfrom. We wish to move it to revto. Here is an example. At first we had A-prod-0.data. We create a snapshot AA-prod-1.data. We run a script with a flaw. We fix the script and want to rerun it. But we want to keep the result of the first run for analysis. We create another snapshot AB-prod-2.data We want to connect to this snapshot, but our local volume is in sync with AA-prod-1.data. We have to find the common ancestor of both revisions. The difference is simply the sum of all bits set in the bitmaps of the various snapshot. We also support the case where one revision is the ancestor of the other. */ int snapfile_diff (int revfrom, int revto, int tofd) { int ret = -1; FILE *fout = fdopen (dup(tofd),"w"); if (fout != NULL){ SNAPFILE *sn_from = snapfile_find_from_rev(revfrom); static const char *no_snap = "No snap file with revision %d\n"; if (sn_from == NULL){ fprintf (fout,no_snap,revfrom); }else{ SNAPFILE *sn_to = snapfile_find_from_rev(revto); if (sn_to == NULL){ fprintf (fout,no_snap,revto); }else{ SSTRING map_from,map_to; sn_from->getmap(map_from); sn_to->getmap(map_to); SNAPFILE *parent = NULL; int len_from = map_from.getlen(); int len_to = map_to.getlen(); if (len_to > len_from && strncmp(map_from.c_str(),map_to.c_str(),len_from)==0){ parent = sn_from; }else if (strncmp(map_from.c_str(),map_to.c_str(),len_to)==0){ parent = sn_to; }else{ len_to--; len_from--; int len = len_from; if (len_to < len_from) len = len_to; while (len > 0){ if (strncmp(map_from.c_str(),map_to.c_str(),len)==0){ SSTRING tmp; tmp.setfrom (map_from.c_str(),len); parent = snapfile_find_from_map(tmp.c_str()); } len--; } } if (parent == NULL){ fprintf (fout,"*** No common parent found for revision %d and %d\n" ,revfrom,revto); }else{ fprintf (fout,"#CHUNK:%d\n",CHUNK_SIZE); fprintf (fout,"#FROM: %d -> %s\n",revfrom,sn_from->getpath()); fprintf (fout,"#TO: %d -> %s\n",revto,sn_to->getpath()); fprintf (fout,"#PARENT: %s\n",parent->getpath()); unsigned bitmapsize = sn_from->getbitmapsize(); unsigned mapsize = bitmapsize/4; if (bitmapsize & 0x3) mapsize++; fprintf (fout,"#MAPSIZE: %u\n",mapsize); unsigned res[mapsize]; memset (res,0,sizeof(res)); sn_from->bitmap_or(parent,res); sn_to->bitmap_or(parent,res); for (unsigned i=0; ireadsb(sb)!=-1 && sb.utime == utime && sb.events == events){ ret = f; break; } } return ret; } /* Like above, but we must identify the source snapshot from the raid superblock. The destination is the last snapshot of a branch. */ int snapfile_diff ( unsigned long long from_utime, unsigned long long from_events, unsigned long long to_utime, unsigned long long to_events, int tofd) { int ret = -1; SNAPFILE *from_sn = snapfile_find_from_sb (from_utime,from_events); SNAPFILE *to_sn = snapfile_find_from_sb (to_utime,to_events); if (from_sn != NULL && to_sn != NULL){ ret = snapfile_diff (from_sn->getrevision(),to_sn->getrevision() ,tofd); } return ret; }